summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2020-09-09 07:53:59 +1000
committerDave Airlie <airlied@redhat.com>2020-09-09 07:55:22 +1000
commit1f4b2aca794f7aeb918ed5f0d7221d68a81d6b43 (patch)
treee7047fddc2e409049ca2748804fbe758dd780950 /drivers/gpu/drm
parent61d98185b41c5ddc442fcfef7dd5a7b289ef69f5 (diff)
parente0ee152fce25dc9269c7ea5280c98aa4b3682759 (diff)
downloadlinux-1f4b2aca794f7aeb918ed5f0d7221d68a81d6b43.tar.bz2
Merge tag 'drm-intel-gt-next-2020-09-07' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
(Same content as drm-intel-gt-next-2020-09-04-3, S-o-b's added) UAPI Changes: (- Potential implicit changes from WW locking refactoring) Cross-subsystem Changes: (- WW locking changes should align the i915 locking more with others) Driver Changes: - MAJOR: Apply WW locking across the driver (Maarten) - Reverts for 5 commits to make applying WW locking faster (Maarten) - Disable preparser around invalidations on Tigerlake for non-RCS engines (Chris) - Add missing dma_fence_put() for error case of syncobj timeline (Chris) - Parse command buffer earlier in eb_relocate(slow) to facilitate backoff (Maarten) - Pin engine before pinning all objects (Maarten) - Rework intel_context pinning to do everything outside of pin_mutex (Maarten) - Avoid tracking GEM context until registered (Cc: stable, Chris) - Provide a fastpath for waiting on vma bindings (Chris) - Fixes to preempt-to-busy mechanism (Chris) - Distinguish the virtual breadcrumbs from the irq breadcrumbs (Chris) - Switch to object allocations for page directories (Chris) - Hold context/request reference while breadcrumbs are active (Chris) - Make sure execbuffer always passes ww state to i915_vma_pin (Maarten) - Code refactoring to facilitate use of WW locking (Maarten) - Locking refactoring to use more granular locking (Maarten, Chris) - Support for multiple pinned timelines per engine (Chris) - Move complication of I915_GEM_THROTTLE to the ioctl from general code (Chris) - Make active tracking/vma page-directory stash work preallocated (Chris) - Avoid flushing submission tasklet too often (Chris) - Reduce context termination list iteration guard to RCU (Chris) - Reductions to locking contention (Chris) - Fixes for issues found by CI (Chris) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Joonas Lahtinen <jlahtine@jlahtine-mobl.ger.corp.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200907130039.GA27766@jlahtine-mobl.ger.corp.intel.com
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.c6
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_client_blt.c89
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.c105
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c4
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_domain.c80
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c1249
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_mman.c51
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.h40
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_blt.c152
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_blt.h3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_types.h10
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pm.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_throttle.c67
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_tiling.c2
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/huge_pages.c9
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c2
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c50
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c144
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c75
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c45
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c2
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_ppgtt.c106
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_ppgtt.h5
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.c181
-rw-r--r--drivers/gpu/drm/i915/gt/intel_breadcrumbs.c305
-rw-r--r--drivers/gpu/drm/i915/gt/intel_breadcrumbs.h36
-rw-r--r--drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h47
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c318
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.h13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_types.h5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h20
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c34
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h31
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt.c97
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c23
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c103
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_irq.c1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.c300
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.h142
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c182
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ppgtt.c150
-rw-r--r--drivers/gpu/drm/i915/gt/intel_renderstate.c73
-rw-r--r--drivers/gpu/drm/i915/gt/intel_renderstate.h9
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.c10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_submission.c42
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.c28
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.h24
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c43
-rw-r--r--drivers/gpu/drm/i915/gt/mock_engine.c30
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_context.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_lrc.c22
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rps.c30
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_timeline.c10
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_workarounds.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c2
-rw-r--r--drivers/gpu/drm/i915/gvt/cmd_parser.c3
-rw-r--r--drivers/gpu/drm/i915/gvt/scheduler.c17
-rw-r--r--drivers/gpu/drm/i915/i915_active.c237
-rw-r--r--drivers/gpu/drm/i915/i915_active.h31
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c2
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h24
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c107
-rw-r--r--drivers/gpu/drm/i915/i915_gem.h12
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c1
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c57
-rw-r--r--drivers/gpu/drm/i915/i915_request.c224
-rw-r--r--drivers/gpu/drm/i915/i915_request.h8
-rw-r--r--drivers/gpu/drm/i915/i915_sw_fence.c10
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c65
-rw-r--r--drivers/gpu/drm/i915/i915_vma.h13
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem.c41
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem_gtt.c75
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_perf.c4
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_request.c18
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_vma.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/intel_memory_region.c8
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_gtt.c26
82 files changed, 3434 insertions, 2178 deletions
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index d60efb8f4ba3..dc622af8695c 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -2311,7 +2311,7 @@ err:
void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags)
{
- i915_gem_object_lock(vma->obj);
+ i915_gem_object_lock(vma->obj, NULL);
if (flags & PLANE_HAS_FENCE)
i915_vma_unpin_fence(vma);
i915_gem_object_unpin_from_display_plane(vma);
@@ -3451,7 +3451,7 @@ initial_plane_vma(struct drm_i915_private *i915,
if (IS_ERR(vma))
goto err_obj;
- if (i915_ggtt_pin(vma, 0, PIN_MAPPABLE | PIN_OFFSET_FIXED | base))
+ if (i915_ggtt_pin(vma, NULL, 0, PIN_MAPPABLE | PIN_OFFSET_FIXED | base))
goto err_obj;
if (i915_gem_object_is_tiled(obj) &&
@@ -17194,7 +17194,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
if (!intel_fb->frontbuffer)
return -ENOMEM;
- i915_gem_object_lock(obj);
+ i915_gem_object_lock(obj, NULL);
tiling = i915_gem_object_get_tiling(obj);
stride = i915_gem_object_get_stride(obj);
i915_gem_object_unlock(obj);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
index 278664f831e7..272cf3ea68d5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
@@ -32,12 +32,13 @@ static void vma_clear_pages(struct i915_vma *vma)
vma->pages = NULL;
}
-static int vma_bind(struct i915_address_space *vm,
- struct i915_vma *vma,
- enum i915_cache_level cache_level,
- u32 flags)
+static void vma_bind(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags)
{
- return vm->vma_ops.bind_vma(vm, vma, cache_level, flags);
+ vm->vma_ops.bind_vma(vm, stash, vma, cache_level, flags);
}
static void vma_unbind(struct i915_address_space *vm, struct i915_vma *vma)
@@ -157,6 +158,7 @@ static void clear_pages_worker(struct work_struct *work)
struct clear_pages_work *w = container_of(work, typeof(*w), work);
struct drm_i915_gem_object *obj = w->sleeve->vma->obj;
struct i915_vma *vma = w->sleeve->vma;
+ struct i915_gem_ww_ctx ww;
struct i915_request *rq;
struct i915_vma *batch;
int err = w->dma.error;
@@ -172,17 +174,20 @@ static void clear_pages_worker(struct work_struct *work)
obj->read_domains = I915_GEM_GPU_DOMAINS;
obj->write_domain = 0;
- err = i915_vma_pin(vma, 0, 0, PIN_USER);
- if (unlikely(err))
+ i915_gem_ww_ctx_init(&ww, false);
+ intel_engine_pm_get(w->ce->engine);
+retry:
+ err = intel_context_pin_ww(w->ce, &ww);
+ if (err)
goto out_signal;
- batch = intel_emit_vma_fill_blt(w->ce, vma, w->value);
+ batch = intel_emit_vma_fill_blt(w->ce, vma, &ww, w->value);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
- goto out_unpin;
+ goto out_ctx;
}
- rq = intel_context_create_request(w->ce);
+ rq = i915_request_create(w->ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out_batch;
@@ -224,9 +229,19 @@ out_request:
i915_request_add(rq);
out_batch:
intel_emit_vma_release(w->ce, batch);
-out_unpin:
- i915_vma_unpin(vma);
+out_ctx:
+ intel_context_unpin(w->ce);
out_signal:
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
+
+ i915_vma_unpin(w->sleeve->vma);
+ intel_engine_pm_put(w->ce->engine);
+
if (unlikely(err)) {
dma_fence_set_error(&w->dma, err);
dma_fence_signal(&w->dma);
@@ -234,6 +249,44 @@ out_signal:
}
}
+static int pin_wait_clear_pages_work(struct clear_pages_work *w,
+ struct intel_context *ce)
+{
+ struct i915_vma *vma = w->sleeve->vma;
+ struct i915_gem_ww_ctx ww;
+ int err;
+
+ i915_gem_ww_ctx_init(&ww, false);
+retry:
+ err = i915_gem_object_lock(vma->obj, &ww);
+ if (err)
+ goto out;
+
+ err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
+ if (unlikely(err))
+ goto out;
+
+ err = i915_sw_fence_await_reservation(&w->wait,
+ vma->obj->base.resv, NULL,
+ true, 0, I915_FENCE_GFP);
+ if (err)
+ goto err_unpin_vma;
+
+ dma_resv_add_excl_fence(vma->obj->base.resv, &w->dma);
+
+err_unpin_vma:
+ if (err)
+ i915_vma_unpin(vma);
+out:
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
+ return err;
+}
+
static int __i915_sw_fence_call
clear_pages_work_notify(struct i915_sw_fence *fence,
enum i915_sw_fence_notify state)
@@ -287,17 +340,9 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,
dma_fence_init(&work->dma, &clear_pages_work_ops, &fence_lock, 0, 0);
i915_sw_fence_init(&work->wait, clear_pages_work_notify);
- i915_gem_object_lock(obj);
- err = i915_sw_fence_await_reservation(&work->wait,
- obj->base.resv, NULL, true, 0,
- I915_FENCE_GFP);
- if (err < 0) {
+ err = pin_wait_clear_pages_work(work, ce);
+ if (err < 0)
dma_fence_set_error(&work->dma, err);
- } else {
- dma_resv_add_excl_fence(obj->base.resv, &work->dma);
- err = 0;
- }
- i915_gem_object_unlock(obj);
dma_fence_get(&work->dma);
i915_sw_fence_commit(&work->wait);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index d0bdb6d447ed..cf5ecbde9e06 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -439,29 +439,36 @@ static bool __cancel_engine(struct intel_engine_cs *engine)
return __reset_engine(engine);
}
-static struct intel_engine_cs *__active_engine(struct i915_request *rq)
+static bool
+__active_engine(struct i915_request *rq, struct intel_engine_cs **active)
{
struct intel_engine_cs *engine, *locked;
+ bool ret = false;
/*
* Serialise with __i915_request_submit() so that it sees
* is-banned?, or we know the request is already inflight.
+ *
+ * Note that rq->engine is unstable, and so we double
+ * check that we have acquired the lock on the final engine.
*/
locked = READ_ONCE(rq->engine);
spin_lock_irq(&locked->active.lock);
while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
spin_unlock(&locked->active.lock);
- spin_lock(&engine->active.lock);
locked = engine;
+ spin_lock(&locked->active.lock);
}
- engine = NULL;
- if (i915_request_is_active(rq) && rq->fence.error != -EIO)
- engine = rq->engine;
+ if (!i915_request_completed(rq)) {
+ if (i915_request_is_active(rq) && rq->fence.error != -EIO)
+ *active = locked;
+ ret = true;
+ }
spin_unlock_irq(&locked->active.lock);
- return engine;
+ return ret;
}
static struct intel_engine_cs *active_engine(struct intel_context *ce)
@@ -472,17 +479,16 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
if (!ce->timeline)
return NULL;
- mutex_lock(&ce->timeline->mutex);
- list_for_each_entry_reverse(rq, &ce->timeline->requests, link) {
- if (i915_request_completed(rq))
- break;
+ rcu_read_lock();
+ list_for_each_entry_rcu(rq, &ce->timeline->requests, link) {
+ if (i915_request_is_active(rq) && i915_request_completed(rq))
+ continue;
/* Check with the backend if the request is inflight */
- engine = __active_engine(rq);
- if (engine)
+ if (__active_engine(rq, &engine))
break;
}
- mutex_unlock(&ce->timeline->mutex);
+ rcu_read_unlock();
return engine;
}
@@ -713,6 +719,7 @@ __create_context(struct drm_i915_private *i915)
ctx->i915 = i915;
ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL);
mutex_init(&ctx->mutex);
+ INIT_LIST_HEAD(&ctx->link);
spin_lock_init(&ctx->stale.lock);
INIT_LIST_HEAD(&ctx->stale.engines);
@@ -740,10 +747,6 @@ __create_context(struct drm_i915_private *i915)
for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
- spin_lock(&i915->gem.contexts.lock);
- list_add_tail(&ctx->link, &i915->gem.contexts.list);
- spin_unlock(&i915->gem.contexts.lock);
-
return ctx;
err_free:
@@ -889,7 +892,7 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) {
struct intel_timeline *timeline;
- timeline = intel_timeline_create(&i915->gt, NULL);
+ timeline = intel_timeline_create(&i915->gt);
if (IS_ERR(timeline)) {
context_close(ctx);
return ERR_CAST(timeline);
@@ -931,6 +934,7 @@ static int gem_context_register(struct i915_gem_context *ctx,
struct drm_i915_file_private *fpriv,
u32 *id)
{
+ struct drm_i915_private *i915 = ctx->i915;
struct i915_address_space *vm;
int ret;
@@ -949,8 +953,16 @@ static int gem_context_register(struct i915_gem_context *ctx,
/* And finally expose ourselves to userspace via the idr */
ret = xa_alloc(&fpriv->context_xa, id, ctx, xa_limit_32b, GFP_KERNEL);
if (ret)
- put_pid(fetch_and_zero(&ctx->pid));
+ goto err_pid;
+
+ spin_lock(&i915->gem.contexts.lock);
+ list_add_tail(&ctx->link, &i915->gem.contexts.list);
+ spin_unlock(&i915->gem.contexts.lock);
+ return 0;
+
+err_pid:
+ put_pid(fetch_and_zero(&ctx->pid));
return ret;
}
@@ -1094,6 +1106,7 @@ I915_SELFTEST_DECLARE(static intel_engine_mask_t context_barrier_inject_fault);
static int context_barrier_task(struct i915_gem_context *ctx,
intel_engine_mask_t engines,
bool (*skip)(struct intel_context *ce, void *data),
+ int (*pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data),
int (*emit)(struct i915_request *rq, void *data),
void (*task)(void *data),
void *data)
@@ -1101,6 +1114,7 @@ static int context_barrier_task(struct i915_gem_context *ctx,
struct context_barrier_task *cb;
struct i915_gem_engines_iter it;
struct i915_gem_engines *e;
+ struct i915_gem_ww_ctx ww;
struct intel_context *ce;
int err = 0;
@@ -1138,10 +1152,21 @@ static int context_barrier_task(struct i915_gem_context *ctx,
if (skip && skip(ce, data))
continue;
- rq = intel_context_create_request(ce);
+ i915_gem_ww_ctx_init(&ww, true);
+retry:
+ err = intel_context_pin_ww(ce, &ww);
+ if (err)
+ goto err;
+
+ if (pin)
+ err = pin(ce, &ww, data);
+ if (err)
+ goto err_unpin;
+
+ rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
- break;
+ goto err_unpin;
}
err = 0;
@@ -1151,6 +1176,16 @@ static int context_barrier_task(struct i915_gem_context *ctx,
err = i915_active_add_request(&cb->base, rq);
i915_request_add(rq);
+err_unpin:
+ intel_context_unpin(ce);
+err:
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
+
if (err)
break;
}
@@ -1206,6 +1241,17 @@ static void set_ppgtt_barrier(void *data)
i915_vm_close(old);
}
+static int pin_ppgtt_update(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data)
+{
+ struct i915_address_space *vm = ce->vm;
+
+ if (!HAS_LOGICAL_RING_CONTEXTS(vm->i915))
+ /* ppGTT is not part of the legacy context image */
+ return gen6_ppgtt_pin(i915_vm_to_ppgtt(vm), ww);
+
+ return 0;
+}
+
static int emit_ppgtt_update(struct i915_request *rq, void *data)
{
struct i915_address_space *vm = rq->context->vm;
@@ -1262,20 +1308,10 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
static bool skip_ppgtt_update(struct intel_context *ce, void *data)
{
- if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))
- return true;
-
if (HAS_LOGICAL_RING_CONTEXTS(ce->engine->i915))
- return false;
-
- if (!atomic_read(&ce->pin_count))
- return true;
-
- /* ppGTT is not part of the legacy context image */
- if (gen6_ppgtt_pin(i915_vm_to_ppgtt(ce->vm)))
- return true;
-
- return false;
+ return !ce->state;
+ else
+ return !atomic_read(&ce->pin_count);
}
static int set_ppgtt(struct drm_i915_file_private *file_priv,
@@ -1326,6 +1362,7 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
*/
err = context_barrier_task(ctx, ALL_ENGINES,
skip_ppgtt_update,
+ pin_ppgtt_update,
emit_ppgtt_update,
set_ppgtt_barrier,
old);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 2679380159fc..27fddc22a7c6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -128,7 +128,7 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire
if (err)
return err;
- err = i915_gem_object_lock_interruptible(obj);
+ err = i915_gem_object_lock_interruptible(obj, NULL);
if (err)
goto out;
@@ -149,7 +149,7 @@ static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direct
if (err)
return err;
- err = i915_gem_object_lock_interruptible(obj);
+ err = i915_gem_object_lock_interruptible(obj, NULL);
if (err)
goto out;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 7f76fc68f498..7c90a63c273d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -32,11 +32,17 @@ void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
if (!i915_gem_object_is_framebuffer(obj))
return;
- i915_gem_object_lock(obj);
+ i915_gem_object_lock(obj, NULL);
__i915_gem_object_flush_for_display(obj);
i915_gem_object_unlock(obj);
}
+void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
+{
+ if (i915_gem_object_is_framebuffer(obj))
+ __i915_gem_object_flush_for_display(obj);
+}
+
/**
* Moves a single object to the WC read, and possibly write domain.
* @obj: object to act on
@@ -197,18 +203,12 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
if (ret)
return ret;
- ret = i915_gem_object_lock_interruptible(obj);
- if (ret)
- return ret;
-
/* Always invalidate stale cachelines */
if (obj->cache_level != cache_level) {
i915_gem_object_set_cache_coherency(obj, cache_level);
obj->cache_dirty = true;
}
- i915_gem_object_unlock(obj);
-
/* The cache-level will be applied when each vma is rebound. */
return i915_gem_object_unbind(obj,
I915_GEM_OBJECT_UNBIND_ACTIVE |
@@ -293,7 +293,12 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
goto out;
}
+ ret = i915_gem_object_lock_interruptible(obj, NULL);
+ if (ret)
+ goto out;
+
ret = i915_gem_object_set_cache_level(obj, level);
+ i915_gem_object_unlock(obj);
out:
i915_gem_object_put(obj);
@@ -313,6 +318,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
unsigned int flags)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ struct i915_gem_ww_ctx ww;
struct i915_vma *vma;
int ret;
@@ -320,6 +326,11 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
return ERR_PTR(-EINVAL);
+ i915_gem_ww_ctx_init(&ww, true);
+retry:
+ ret = i915_gem_object_lock(obj, &ww);
+ if (ret)
+ goto err;
/*
* The display engine is not coherent with the LLC cache on gen6. As
* a result, we make sure that the pinning that is about to occur is
@@ -334,7 +345,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
HAS_WT(i915) ?
I915_CACHE_WT : I915_CACHE_NONE);
if (ret)
- return ERR_PTR(ret);
+ goto err;
/*
* As the user may map the buffer once pinned in the display plane
@@ -347,18 +358,31 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
vma = ERR_PTR(-ENOSPC);
if ((flags & PIN_MAPPABLE) == 0 &&
(!view || view->type == I915_GGTT_VIEW_NORMAL))
- vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
- flags |
- PIN_MAPPABLE |
- PIN_NONBLOCK);
- if (IS_ERR(vma))
- vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
- if (IS_ERR(vma))
- return vma;
+ vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0, alignment,
+ flags | PIN_MAPPABLE |
+ PIN_NONBLOCK);
+ if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
+ vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0,
+ alignment, flags);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto err;
+ }
vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
- i915_gem_object_flush_if_display(obj);
+ i915_gem_object_flush_if_display_locked(obj);
+
+err:
+ if (ret == -EDEADLK) {
+ ret = i915_gem_ww_ctx_backoff(&ww);
+ if (!ret)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
+
+ if (ret)
+ return ERR_PTR(ret);
return vma;
}
@@ -536,7 +560,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
if (err)
goto out;
- err = i915_gem_object_lock_interruptible(obj);
+ err = i915_gem_object_lock_interruptible(obj, NULL);
if (err)
goto out_unpin;
@@ -576,19 +600,17 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
if (!i915_gem_object_has_struct_page(obj))
return -ENODEV;
- ret = i915_gem_object_lock_interruptible(obj);
- if (ret)
- return ret;
+ assert_object_held(obj);
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT);
if (ret)
- goto err_unlock;
+ return ret;
ret = i915_gem_object_pin_pages(obj);
if (ret)
- goto err_unlock;
+ return ret;
if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
!static_cpu_has(X86_FEATURE_CLFLUSH)) {
@@ -616,8 +638,6 @@ out:
err_unpin:
i915_gem_object_unpin_pages(obj);
-err_unlock:
- i915_gem_object_unlock(obj);
return ret;
}
@@ -630,20 +650,18 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
if (!i915_gem_object_has_struct_page(obj))
return -ENODEV;
- ret = i915_gem_object_lock_interruptible(obj);
- if (ret)
- return ret;
+ assert_object_held(obj);
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_ALL,
MAX_SCHEDULE_TIMEOUT);
if (ret)
- goto err_unlock;
+ return ret;
ret = i915_gem_object_pin_pages(obj);
if (ret)
- goto err_unlock;
+ return ret;
if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
!static_cpu_has(X86_FEATURE_CLFLUSH)) {
@@ -680,7 +698,5 @@ out:
err_unpin:
i915_gem_object_unpin_pages(obj);
-err_unlock:
- i915_gem_object_unlock(obj);
return ret;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 322642fb765f..804339255df1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -41,11 +41,6 @@ struct eb_vma {
u32 handle;
};
-struct eb_vma_array {
- struct kref kref;
- struct eb_vma vma[];
-};
-
enum {
FORCE_CPU_RELOC = 1,
FORCE_GTT_RELOC,
@@ -58,9 +53,11 @@ enum {
#define __EXEC_OBJECT_NEEDS_MAP BIT(29)
#define __EXEC_OBJECT_NEEDS_BIAS BIT(28)
#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 28) /* all of the above */
+#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
#define __EXEC_HAS_RELOC BIT(31)
-#define __EXEC_INTERNAL_FLAGS (~0u << 31)
+#define __EXEC_ENGINE_PINNED BIT(30)
+#define __EXEC_INTERNAL_FLAGS (~0u << 30)
#define UPDATE PIN_OFFSET_FIXED
#define BATCH_OFFSET_BIAS (256*1024)
@@ -261,6 +258,8 @@ struct i915_execbuffer {
/** list of vma that have execobj.relocation_count */
struct list_head relocs;
+ struct i915_gem_ww_ctx ww;
+
/**
* Track the most recently used object for relocations, as we
* frequently have to perform multiple relocations within the same
@@ -276,19 +275,22 @@ struct i915_execbuffer {
bool has_fence : 1;
bool needs_unfenced : 1;
- struct i915_vma *target;
struct i915_request *rq;
- struct i915_vma *rq_vma;
u32 *rq_cmd;
unsigned int rq_size;
+ struct intel_gt_buffer_pool_node *pool;
} reloc_cache;
+ struct intel_gt_buffer_pool_node *reloc_pool; /** relocation pool for -EDEADLK handling */
+ struct intel_context *reloc_context;
+
u64 invalid_flags; /** Set of execobj.flags that are invalid */
u32 context_flags; /** Set of execobj.flags to insert from the ctx */
u32 batch_start_offset; /** Location within object of batch */
u32 batch_len; /** Length of batch within object */
u32 batch_flags; /** Flags composed for emit_bb_start() */
+ struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */
/**
* Indicate either the size of the hastable used to resolve
@@ -297,12 +299,16 @@ struct i915_execbuffer {
*/
int lut_size;
struct hlist_head *buckets; /** ht for relocation handles */
- struct eb_vma_array *array;
struct eb_fence *fences;
unsigned long num_fences;
};
+static int eb_parse(struct i915_execbuffer *eb);
+static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb,
+ bool throttle);
+static void eb_unpin_engine(struct i915_execbuffer *eb);
+
static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
{
return intel_engine_requires_cmd_parser(eb->engine) ||
@@ -310,62 +316,8 @@ static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
eb->args->batch_len);
}
-static struct eb_vma_array *eb_vma_array_create(unsigned int count)
-{
- struct eb_vma_array *arr;
-
- arr = kvmalloc(struct_size(arr, vma, count), GFP_KERNEL | __GFP_NOWARN);
- if (!arr)
- return NULL;
-
- kref_init(&arr->kref);
- arr->vma[0].vma = NULL;
-
- return arr;
-}
-
-static inline void eb_unreserve_vma(struct eb_vma *ev)
-{
- struct i915_vma *vma = ev->vma;
-
- if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE))
- __i915_vma_unpin_fence(vma);
-
- if (ev->flags & __EXEC_OBJECT_HAS_PIN)
- __i915_vma_unpin(vma);
-
- ev->flags &= ~(__EXEC_OBJECT_HAS_PIN |
- __EXEC_OBJECT_HAS_FENCE);
-}
-
-static void eb_vma_array_destroy(struct kref *kref)
-{
- struct eb_vma_array *arr = container_of(kref, typeof(*arr), kref);
- struct eb_vma *ev = arr->vma;
-
- while (ev->vma) {
- eb_unreserve_vma(ev);
- i915_vma_put(ev->vma);
- ev++;
- }
-
- kvfree(arr);
-}
-
-static void eb_vma_array_put(struct eb_vma_array *arr)
-{
- kref_put(&arr->kref, eb_vma_array_destroy);
-}
-
static int eb_create(struct i915_execbuffer *eb)
{
- /* Allocate an extra slot for use by the command parser + sentinel */
- eb->array = eb_vma_array_create(eb->buffer_count + 2);
- if (!eb->array)
- return -ENOMEM;
-
- eb->vma = eb->array->vma;
-
if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) {
unsigned int size = 1 + ilog2(eb->buffer_count);
@@ -399,10 +351,8 @@ static int eb_create(struct i915_execbuffer *eb)
break;
} while (--size);
- if (unlikely(!size)) {
- eb_vma_array_put(eb->array);
+ if (unlikely(!size))
return -ENOMEM;
- }
eb->lut_size = size;
} else {
@@ -486,16 +436,17 @@ eb_pin_vma(struct i915_execbuffer *eb,
pin_flags |= PIN_GLOBAL;
/* Attempt to reuse the current location if available */
- if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags))) {
+ /* TODO: Add -EDEADLK handling here */
+ if (unlikely(i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags))) {
if (entry->flags & EXEC_OBJECT_PINNED)
return false;
/* Failing that pick any _free_ space if suitable */
- if (unlikely(i915_vma_pin(vma,
- entry->pad_to_size,
- entry->alignment,
- eb_pin_flags(entry, ev->flags) |
- PIN_USER | PIN_NOEVICT)))
+ if (unlikely(i915_vma_pin_ww(vma, &eb->ww,
+ entry->pad_to_size,
+ entry->alignment,
+ eb_pin_flags(entry, ev->flags) |
+ PIN_USER | PIN_NOEVICT)))
return false;
}
@@ -513,6 +464,19 @@ eb_pin_vma(struct i915_execbuffer *eb,
return !eb_vma_misplaced(entry, vma, ev->flags);
}
+static inline void
+eb_unreserve_vma(struct eb_vma *ev)
+{
+ if (!(ev->flags & __EXEC_OBJECT_HAS_PIN))
+ return;
+
+ if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE))
+ __i915_vma_unpin_fence(ev->vma);
+
+ __i915_vma_unpin(ev->vma);
+ ev->flags &= ~__EXEC_OBJECT_RESERVED;
+}
+
static int
eb_validate_vma(struct i915_execbuffer *eb,
struct drm_i915_gem_exec_object2 *entry,
@@ -604,16 +568,6 @@ eb_add_vma(struct i915_execbuffer *eb,
eb->batch = ev;
}
-
- if (eb_pin_vma(eb, entry, ev)) {
- if (entry->offset != vma->node.start) {
- entry->offset = vma->node.start | UPDATE;
- eb->args->flags |= __EXEC_HAS_RELOC;
- }
- } else {
- eb_unreserve_vma(ev);
- list_add_tail(&ev->bind_link, &eb->unbound);
- }
}
static inline int use_cpu_reloc(const struct reloc_cache *cache,
@@ -633,7 +587,7 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache,
obj->cache_level != I915_CACHE_NONE);
}
-static int eb_reserve_vma(const struct i915_execbuffer *eb,
+static int eb_reserve_vma(struct i915_execbuffer *eb,
struct eb_vma *ev,
u64 pin_flags)
{
@@ -648,7 +602,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
return err;
}
- err = i915_vma_pin(vma,
+ err = i915_vma_pin_ww(vma, &eb->ww,
entry->pad_to_size, entry->alignment,
eb_pin_flags(entry, ev->flags) | pin_flags);
if (err)
@@ -698,10 +652,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
* This avoid unnecessary unbinding of later objects in order to make
* room for the earlier objects *unless* we need to defragment.
*/
-
- if (mutex_lock_interruptible(&eb->i915->drm.struct_mutex))
- return -EINTR;
-
pass = 0;
do {
list_for_each_entry(ev, &eb->unbound, bind_link) {
@@ -709,8 +659,8 @@ static int eb_reserve(struct i915_execbuffer *eb)
if (err)
break;
}
- if (!(err == -ENOSPC || err == -EAGAIN))
- break;
+ if (err != -ENOSPC)
+ return err;
/* Resort *all* the objects into priority order */
INIT_LIST_HEAD(&eb->unbound);
@@ -740,13 +690,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
}
list_splice_tail(&last, &eb->unbound);
- if (err == -EAGAIN) {
- mutex_unlock(&eb->i915->drm.struct_mutex);
- flush_workqueue(eb->i915->mm.userptr_wq);
- mutex_lock(&eb->i915->drm.struct_mutex);
- continue;
- }
-
switch (pass++) {
case 0:
break;
@@ -757,20 +700,15 @@ static int eb_reserve(struct i915_execbuffer *eb)
err = i915_gem_evict_vm(eb->context->vm);
mutex_unlock(&eb->context->vm->mutex);
if (err)
- goto unlock;
+ return err;
break;
default:
- err = -ENOSPC;
- goto unlock;
+ return -ENOSPC;
}
pin_flags = PIN_USER;
} while (1);
-
-unlock:
- mutex_unlock(&eb->i915->drm.struct_mutex);
- return err;
}
static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
@@ -893,12 +831,12 @@ static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle)
static int eb_lookup_vmas(struct i915_execbuffer *eb)
{
+ struct drm_i915_private *i915 = eb->i915;
unsigned int batch = eb_batch_index(eb);
unsigned int i;
int err = 0;
INIT_LIST_HEAD(&eb->relocs);
- INIT_LIST_HEAD(&eb->unbound);
for (i = 0; i < eb->buffer_count; i++) {
struct i915_vma *vma;
@@ -906,22 +844,83 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
vma = eb_lookup_vma(eb, eb->exec[i].handle);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
- break;
+ goto err;
}
err = eb_validate_vma(eb, &eb->exec[i], vma);
if (unlikely(err)) {
i915_vma_put(vma);
- break;
+ goto err;
}
eb_add_vma(eb, i, batch, vma);
}
+ if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)) {
+ drm_dbg(&i915->drm,
+ "Attempting to use self-modifying batch buffer\n");
+ return -EINVAL;
+ }
+
+ if (range_overflows_t(u64,
+ eb->batch_start_offset, eb->batch_len,
+ eb->batch->vma->size)) {
+ drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n");
+ return -EINVAL;
+ }
+
+ if (eb->batch_len == 0)
+ eb->batch_len = eb->batch->vma->size - eb->batch_start_offset;
+
+ return 0;
+
+err:
eb->vma[i].vma = NULL;
return err;
}
+static int eb_validate_vmas(struct i915_execbuffer *eb)
+{
+ unsigned int i;
+ int err;
+
+ INIT_LIST_HEAD(&eb->unbound);
+
+ for (i = 0; i < eb->buffer_count; i++) {
+ struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
+ struct eb_vma *ev = &eb->vma[i];
+ struct i915_vma *vma = ev->vma;
+
+ err = i915_gem_object_lock(vma->obj, &eb->ww);
+ if (err)
+ return err;
+
+ if (eb_pin_vma(eb, entry, ev)) {
+ if (entry->offset != vma->node.start) {
+ entry->offset = vma->node.start | UPDATE;
+ eb->args->flags |= __EXEC_HAS_RELOC;
+ }
+ } else {
+ eb_unreserve_vma(ev);
+
+ list_add_tail(&ev->bind_link, &eb->unbound);
+ if (drm_mm_node_allocated(&vma->node)) {
+ err = i915_vma_unbind(vma);
+ if (err)
+ return err;
+ }
+ }
+
+ GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&
+ eb_vma_misplaced(&eb->exec[i], vma, ev->flags));
+ }
+
+ if (!list_empty(&eb->unbound))
+ return eb_reserve(eb);
+
+ return 0;
+}
+
static struct eb_vma *
eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
{
@@ -942,13 +941,31 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
}
}
+static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
+{
+ const unsigned int count = eb->buffer_count;
+ unsigned int i;
+
+ for (i = 0; i < count; i++) {
+ struct eb_vma *ev = &eb->vma[i];
+ struct i915_vma *vma = ev->vma;
+
+ if (!vma)
+ break;
+
+ eb_unreserve_vma(ev);
+
+ if (final)
+ i915_vma_put(vma);
+ }
+
+ eb_unpin_engine(eb);
+}
+
static void eb_destroy(const struct i915_execbuffer *eb)
{
GEM_BUG_ON(eb->reloc_cache.rq);
- if (eb->array)
- eb_vma_array_put(eb->array);
-
if (eb->lut_size > 0)
kfree(eb->buckets);
}
@@ -960,6 +977,14 @@ relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
return gen8_canonical_addr((int)reloc->delta + target->node.start);
}
+static void reloc_cache_clear(struct reloc_cache *cache)
+{
+ cache->rq = NULL;
+ cache->rq_cmd = NULL;
+ cache->pool = NULL;
+ cache->rq_size = 0;
+}
+
static void reloc_cache_init(struct reloc_cache *cache,
struct drm_i915_private *i915)
{
@@ -972,8 +997,7 @@ static void reloc_cache_init(struct reloc_cache *cache,
cache->has_fence = cache->gen < 4;
cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
cache->node.flags = 0;
- cache->rq = NULL;
- cache->target = NULL;
+ reloc_cache_clear(cache);
}
static inline void *unmask_page(unsigned long p)
@@ -995,132 +1019,60 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
return &i915->ggtt;
}
-#define RELOC_TAIL 4
-
-static int reloc_gpu_chain(struct reloc_cache *cache)
+static void reloc_cache_put_pool(struct i915_execbuffer *eb, struct reloc_cache *cache)
{
- struct intel_gt_buffer_pool_node *pool;
- struct i915_request *rq = cache->rq;
- struct i915_vma *batch;
- u32 *cmd;
- int err;
-
- pool = intel_gt_get_buffer_pool(rq->engine->gt, PAGE_SIZE);
- if (IS_ERR(pool))
- return PTR_ERR(pool);
-
- batch = i915_vma_instance(pool->obj, rq->context->vm, NULL);
- if (IS_ERR(batch)) {
- err = PTR_ERR(batch);
- goto out_pool;
- }
-
- err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK);
- if (err)
- goto out_pool;
-
- GEM_BUG_ON(cache->rq_size + RELOC_TAIL > PAGE_SIZE / sizeof(u32));
- cmd = cache->rq_cmd + cache->rq_size;
- *cmd++ = MI_ARB_CHECK;
- if (cache->gen >= 8)
- *cmd++ = MI_BATCH_BUFFER_START_GEN8;
- else if (cache->gen >= 6)
- *cmd++ = MI_BATCH_BUFFER_START;
- else
- *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
- *cmd++ = lower_32_bits(batch->node.start);
- *cmd++ = upper_32_bits(batch->node.start); /* Always 0 for gen<8 */
- i915_gem_object_flush_map(cache->rq_vma->obj);
- i915_gem_object_unpin_map(cache->rq_vma->obj);
- cache->rq_vma = NULL;
-
- err = intel_gt_buffer_pool_mark_active(pool, rq);
- if (err == 0) {
- i915_vma_lock(batch);
- err = i915_request_await_object(rq, batch->obj, false);
- if (err == 0)
- err = i915_vma_move_to_active(batch, rq, 0);
- i915_vma_unlock(batch);
- }
- i915_vma_unpin(batch);
- if (err)
- goto out_pool;
-
- cmd = i915_gem_object_pin_map(batch->obj,
- cache->has_llc ?
- I915_MAP_FORCE_WB :
- I915_MAP_FORCE_WC);
- if (IS_ERR(cmd)) {
- err = PTR_ERR(cmd);
- goto out_pool;
- }
-
- /* Return with batch mapping (cmd) still pinned */
- cache->rq_cmd = cmd;
- cache->rq_size = 0;
- cache->rq_vma = batch;
-
-out_pool:
- intel_gt_buffer_pool_put(pool);
- return err;
-}
+ if (!cache->pool)
+ return;
-static unsigned int reloc_bb_flags(const struct reloc_cache *cache)
-{
- return cache->gen > 5 ? 0 : I915_DISPATCH_SECURE;
+ /*
+ * This is a bit nasty, normally we keep objects locked until the end
+ * of execbuffer, but we already submit this, and have to unlock before
+ * dropping the reference. Fortunately we can only hold 1 pool node at
+ * a time, so this should be harmless.
+ */
+ i915_gem_ww_unlock_single(cache->pool->obj);
+ intel_gt_buffer_pool_put(cache->pool);
+ cache->pool = NULL;
}
-static int reloc_gpu_flush(struct reloc_cache *cache)
+static void reloc_gpu_flush(struct i915_execbuffer *eb, struct reloc_cache *cache)
{
- struct i915_request *rq;
- int err;
+ struct drm_i915_gem_object *obj = cache->rq->batch->obj;
- rq = fetch_and_zero(&cache->rq);
- if (!rq)
- return 0;
-
- if (cache->rq_vma) {
- struct drm_i915_gem_object *obj = cache->rq_vma->obj;
+ GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32));
+ cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END;
- GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32));
- cache->rq_cmd[cache->rq_size++] = MI_BATCH_BUFFER_END;
-
- __i915_gem_object_flush_map(obj,
- 0, sizeof(u32) * cache->rq_size);
- i915_gem_object_unpin_map(obj);
- }
+ __i915_gem_object_flush_map(obj, 0, sizeof(u32) * (cache->rq_size + 1));
+ i915_gem_object_unpin_map(obj);
- err = 0;
- if (rq->engine->emit_init_breadcrumb)
- err = rq->engine->emit_init_breadcrumb(rq);
- if (!err)
- err = rq->engine->emit_bb_start(rq,
- rq->batch->node.start,
- PAGE_SIZE,
- reloc_bb_flags(cache));
- if (err)
- i915_request_set_error_once(rq, err);
+ intel_gt_chipset_flush(cache->rq->engine->gt);
- intel_gt_chipset_flush(rq->engine->gt);
- i915_request_add(rq);
+ i915_request_add(cache->rq);
+ reloc_cache_put_pool(eb, cache);
+ reloc_cache_clear(cache);
- return err;
+ eb->reloc_pool = NULL;
}
-static void reloc_cache_reset(struct reloc_cache *cache)
+static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb)
{
void *vaddr;
+ if (cache->rq)
+ reloc_gpu_flush(eb, cache);
+
if (!cache->vaddr)
return;
vaddr = unmask_page(cache->vaddr);
if (cache->vaddr & KMAP) {
+ struct drm_i915_gem_object *obj =
+ (struct drm_i915_gem_object *)cache->node.mm;
if (cache->vaddr & CLFLUSH_AFTER)
mb();
kunmap_atomic(vaddr);
- i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm);
+ i915_gem_object_finish_access(obj);
} else {
struct i915_ggtt *ggtt = cache_to_ggtt(cache);
@@ -1145,9 +1097,10 @@ static void reloc_cache_reset(struct reloc_cache *cache)
static void *reloc_kmap(struct drm_i915_gem_object *obj,
struct reloc_cache *cache,
- unsigned long page)
+ unsigned long pageno)
{
void *vaddr;
+ struct page *page;
if (cache->vaddr) {
kunmap_atomic(unmask_page(cache->vaddr));
@@ -1168,17 +1121,22 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
mb();
}
- vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page));
+ page = i915_gem_object_get_page(obj, pageno);
+ if (!obj->mm.dirty)
+ set_page_dirty(page);
+
+ vaddr = kmap_atomic(page);
cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
- cache->page = page;
+ cache->page = pageno;
return vaddr;
}
static void *reloc_iomap(struct drm_i915_gem_object *obj,
- struct reloc_cache *cache,
+ struct i915_execbuffer *eb,
unsigned long page)
{
+ struct reloc_cache *cache = &eb->reloc_cache;
struct i915_ggtt *ggtt = cache_to_ggtt(cache);
unsigned long offset;
void *vaddr;
@@ -1196,16 +1154,17 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
if (use_cpu_reloc(cache, obj))
return NULL;
- i915_gem_object_lock(obj);
err = i915_gem_object_set_to_gtt_domain(obj, true);
- i915_gem_object_unlock(obj);
if (err)
return ERR_PTR(err);
- vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
- PIN_MAPPABLE |
- PIN_NONBLOCK /* NOWARN */ |
- PIN_NOEVICT);
+ vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
+ PIN_MAPPABLE |
+ PIN_NONBLOCK /* NOWARN */ |
+ PIN_NOEVICT);
+ if (vma == ERR_PTR(-EDEADLK))
+ return vma;
+
if (IS_ERR(vma)) {
memset(&cache->node, 0, sizeof(cache->node));
mutex_lock(&ggtt->vm.mutex);
@@ -1241,9 +1200,10 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
}
static void *reloc_vaddr(struct drm_i915_gem_object *obj,
- struct reloc_cache *cache,
+ struct i915_execbuffer *eb,
unsigned long page)
{
+ struct reloc_cache *cache = &eb->reloc_cache;
void *vaddr;
if (cache->page == page) {
@@ -1251,7 +1211,7 @@ static void *reloc_vaddr(struct drm_i915_gem_object *obj,
} else {
vaddr = NULL;
if ((cache->vaddr & KMAP) == 0)
- vaddr = reloc_iomap(obj, cache, page);
+ vaddr = reloc_iomap(obj, eb, page);
if (!vaddr)
vaddr = reloc_kmap(obj, cache, page);
}
@@ -1287,7 +1247,7 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
struct drm_i915_gem_object *obj = vma->obj;
int err;
- i915_vma_lock(vma);
+ assert_vma_held(vma);
if (obj->cache_dirty & ~obj->cache_coherent)
i915_gem_clflush_object(obj, 0);
@@ -1297,25 +1257,31 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
if (err == 0)
err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
- i915_vma_unlock(vma);
-
return err;
}
static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
struct intel_engine_cs *engine,
+ struct i915_vma *vma,
unsigned int len)
{
struct reloc_cache *cache = &eb->reloc_cache;
- struct intel_gt_buffer_pool_node *pool;
+ struct intel_gt_buffer_pool_node *pool = eb->reloc_pool;
struct i915_request *rq;
struct i915_vma *batch;
u32 *cmd;
int err;
- pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE);
- if (IS_ERR(pool))
- return PTR_ERR(pool);
+ if (!pool) {
+ pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE);
+ if (IS_ERR(pool))
+ return PTR_ERR(pool);
+ }
+ eb->reloc_pool = NULL;
+
+ err = i915_gem_object_lock(pool->obj, &eb->ww);
+ if (err)
+ goto err_pool;
cmd = i915_gem_object_pin_map(pool->obj,
cache->has_llc ?
@@ -1323,35 +1289,42 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
I915_MAP_FORCE_WC);
if (IS_ERR(cmd)) {
err = PTR_ERR(cmd);
- goto out_pool;
+ goto err_pool;
}
- batch = i915_vma_instance(pool->obj, eb->context->vm, NULL);
+ batch = i915_vma_instance(pool->obj, vma->vm, NULL);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
goto err_unmap;
}
- err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK);
+ err = i915_vma_pin_ww(batch, &eb->ww, 0, 0, PIN_USER | PIN_NONBLOCK);
if (err)
goto err_unmap;
if (engine == eb->context->engine) {
rq = i915_request_create(eb->context);
} else {
- struct intel_context *ce;
+ struct intel_context *ce = eb->reloc_context;
- ce = intel_context_create(engine);
- if (IS_ERR(ce)) {
- err = PTR_ERR(ce);
- goto err_unpin;
+ if (!ce) {
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ goto err_unpin;
+ }
+
+ i915_vm_put(ce->vm);
+ ce->vm = i915_vm_get(eb->context->vm);
+ eb->reloc_context = ce;
}
- i915_vm_put(ce->vm);
- ce->vm = i915_vm_get(eb->context->vm);
+ err = intel_context_pin_ww(ce, &eb->ww);
+ if (err)
+ goto err_unpin;
- rq = intel_context_create_request(ce);
- intel_context_put(ce);
+ rq = i915_request_create(ce);
+ intel_context_unpin(ce);
}
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
@@ -1362,11 +1335,20 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
if (err)
goto err_request;
- i915_vma_lock(batch);
+ err = reloc_move_to_gpu(rq, vma);
+ if (err)
+ goto err_request;
+
+ err = eb->engine->emit_bb_start(rq,
+ batch->node.start, PAGE_SIZE,
+ cache->gen > 5 ? 0 : I915_DISPATCH_SECURE);
+ if (err)
+ goto skip_request;
+
+ assert_vma_held(batch);
err = i915_request_await_object(rq, batch->obj, false);
if (err == 0)
err = i915_vma_move_to_active(batch, rq, 0);
- i915_vma_unlock(batch);
if (err)
goto skip_request;
@@ -1376,10 +1358,10 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
cache->rq = rq;
cache->rq_cmd = cmd;
cache->rq_size = 0;
- cache->rq_vma = batch;
+ cache->pool = pool;
/* Return with batch mapping (cmd) still pinned */
- goto out_pool;
+ return 0;
skip_request:
i915_request_set_error_once(rq, err);
@@ -1389,8 +1371,8 @@ err_unpin:
i915_vma_unpin(batch);
err_unmap:
i915_gem_object_unpin_map(pool->obj);
-out_pool:
- intel_gt_buffer_pool_put(pool);
+err_pool:
+ eb->reloc_pool = pool;
return err;
}
@@ -1405,9 +1387,12 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
{
struct reloc_cache *cache = &eb->reloc_cache;
u32 *cmd;
- int err;
+
+ if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1))
+ reloc_gpu_flush(eb, cache);
if (unlikely(!cache->rq)) {
+ int err;
struct intel_engine_cs *engine = eb->engine;
if (!reloc_can_use_engine(engine)) {
@@ -1416,31 +1401,11 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
return ERR_PTR(-ENODEV);
}
- err = __reloc_gpu_alloc(eb, engine, len);
+ err = __reloc_gpu_alloc(eb, engine, vma, len);
if (unlikely(err))
return ERR_PTR(err);
}
- if (vma != cache->target) {
- err = reloc_move_to_gpu(cache->rq, vma);
- if (unlikely(err)) {
- i915_request_set_error_once(cache->rq, err);
- return ERR_PTR(err);
- }
-
- cache->target = vma;
- }
-
- if (unlikely(cache->rq_size + len >
- PAGE_SIZE / sizeof(u32) - RELOC_TAIL)) {
- err = reloc_gpu_chain(cache);
- if (unlikely(err)) {
- i915_request_set_error_once(cache->rq, err);
- return ERR_PTR(err);
- }
- }
-
- GEM_BUG_ON(cache->rq_size + len >= PAGE_SIZE / sizeof(u32));
cmd = cache->rq_cmd + cache->rq_size;
cache->rq_size += len;
@@ -1490,7 +1455,9 @@ static bool __reloc_entry_gpu(struct i915_execbuffer *eb,
len = 3;
batch = reloc_gpu(eb, vma, len);
- if (IS_ERR(batch))
+ if (batch == ERR_PTR(-EDEADLK))
+ return (s64)-EDEADLK;
+ else if (IS_ERR(batch))
return false;
addr = gen8_canonical_addr(vma->node.start + offset);
@@ -1543,7 +1510,7 @@ static bool __reloc_entry_gpu(struct i915_execbuffer *eb,
return true;
}
-static bool reloc_entry_gpu(struct i915_execbuffer *eb,
+static int reloc_entry_gpu(struct i915_execbuffer *eb,
struct i915_vma *vma,
u64 offset,
u64 target_addr)
@@ -1565,14 +1532,17 @@ relocate_entry(struct i915_vma *vma,
{
u64 target_addr = relocation_target(reloc, target);
u64 offset = reloc->offset;
+ int reloc_gpu = reloc_entry_gpu(eb, vma, offset, target_addr);
- if (!reloc_entry_gpu(eb, vma, offset, target_addr)) {
+ if (reloc_gpu < 0)
+ return reloc_gpu;
+
+ if (!reloc_gpu) {
bool wide = eb->reloc_cache.use_64bit_reloc;
void *vaddr;
repeat:
- vaddr = reloc_vaddr(vma->obj,
- &eb->reloc_cache,
+ vaddr = reloc_vaddr(vma->obj, eb,
offset >> PAGE_SHIFT);
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
@@ -1723,7 +1693,9 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
* we would try to acquire the struct mutex again. Obviously
* this is bad and so lockdep complains vehemently.
*/
- copied = __copy_from_user(r, urelocs, count * sizeof(r[0]));
+ pagefault_disable();
+ copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0]));
+ pagefault_enable();
if (unlikely(copied)) {
remain = -EFAULT;
goto out;
@@ -1767,74 +1739,400 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
urelocs += ARRAY_SIZE(stack);
} while (remain);
out:
- reloc_cache_reset(&eb->reloc_cache);
+ reloc_cache_reset(&eb->reloc_cache, eb);
return remain;
}
-static int eb_relocate(struct i915_execbuffer *eb)
+static int
+eb_relocate_vma_slow(struct i915_execbuffer *eb, struct eb_vma *ev)
{
+ const struct drm_i915_gem_exec_object2 *entry = ev->exec;
+ struct drm_i915_gem_relocation_entry *relocs =
+ u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
+ unsigned int i;
int err;
- err = eb_lookup_vmas(eb);
- if (err)
- return err;
+ for (i = 0; i < entry->relocation_count; i++) {
+ u64 offset = eb_relocate_entry(eb, ev, &relocs[i]);
+
+ if ((s64)offset < 0) {
+ err = (int)offset;
+ goto err;
+ }
+ }
+ err = 0;
+err:
+ reloc_cache_reset(&eb->reloc_cache, eb);
+ return err;
+}
+
+static int check_relocations(const struct drm_i915_gem_exec_object2 *entry)
+{
+ const char __user *addr, *end;
+ unsigned long size;
+ char __maybe_unused c;
+
+ size = entry->relocation_count;
+ if (size == 0)
+ return 0;
- if (!list_empty(&eb->unbound)) {
- err = eb_reserve(eb);
+ if (size > N_RELOC(ULONG_MAX))
+ return -EINVAL;
+
+ addr = u64_to_user_ptr(entry->relocs_ptr);
+ size *= sizeof(struct drm_i915_gem_relocation_entry);
+ if (!access_ok(addr, size))
+ return -EFAULT;
+
+ end = addr + size;
+ for (; addr < end; addr += PAGE_SIZE) {
+ int err = __get_user(c, addr);
if (err)
return err;
}
+ return __get_user(c, end - 1);
+}
- /* The objects are in their final locations, apply the relocations. */
- if (eb->args->flags & __EXEC_HAS_RELOC) {
- struct eb_vma *ev;
- int flush;
+static int eb_copy_relocations(const struct i915_execbuffer *eb)
+{
+ struct drm_i915_gem_relocation_entry *relocs;
+ const unsigned int count = eb->buffer_count;
+ unsigned int i;
+ int err;
- list_for_each_entry(ev, &eb->relocs, reloc_link) {
+ for (i = 0; i < count; i++) {
+ const unsigned int nreloc = eb->exec[i].relocation_count;
+ struct drm_i915_gem_relocation_entry __user *urelocs;
+ unsigned long size;
+ unsigned long copied;
+
+ if (nreloc == 0)
+ continue;
+
+ err = check_relocations(&eb->exec[i]);
+ if (err)
+ goto err;
+
+ urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr);
+ size = nreloc * sizeof(*relocs);
+
+ relocs = kvmalloc_array(size, 1, GFP_KERNEL);
+ if (!relocs) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ /* copy_from_user is limited to < 4GiB */
+ copied = 0;
+ do {
+ unsigned int len =
+ min_t(u64, BIT_ULL(31), size - copied);
+
+ if (__copy_from_user((char *)relocs + copied,
+ (char __user *)urelocs + copied,
+ len))
+ goto end;
+
+ copied += len;
+ } while (copied < size);
+
+ /*
+ * As we do not update the known relocation offsets after
+ * relocating (due to the complexities in lock handling),
+ * we need to mark them as invalid now so that we force the
+ * relocation processing next time. Just in case the target
+ * object is evicted and then rebound into its old
+ * presumed_offset before the next execbuffer - if that
+ * happened we would make the mistake of assuming that the
+ * relocations were valid.
+ */
+ if (!user_access_begin(urelocs, size))
+ goto end;
+
+ for (copied = 0; copied < nreloc; copied++)
+ unsafe_put_user(-1,
+ &urelocs[copied].presumed_offset,
+ end_user);
+ user_access_end();
+
+ eb->exec[i].relocs_ptr = (uintptr_t)relocs;
+ }
+
+ return 0;
+
+end_user:
+ user_access_end();
+end:
+ kvfree(relocs);
+ err = -EFAULT;
+err:
+ while (i--) {
+ relocs = u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr);
+ if (eb->exec[i].relocation_count)
+ kvfree(relocs);
+ }
+ return err;
+}
+
+static int eb_prefault_relocations(const struct i915_execbuffer *eb)
+{
+ const unsigned int count = eb->buffer_count;
+ unsigned int i;
+
+ for (i = 0; i < count; i++) {
+ int err;
+
+ err = check_relocations(&eb->exec[i]);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
+ struct i915_request *rq)
+{
+ bool have_copy = false;
+ struct eb_vma *ev;
+ int err = 0;
+
+repeat:
+ if (signal_pending(current)) {
+ err = -ERESTARTSYS;
+ goto out;
+ }
+
+ /* We may process another execbuffer during the unlock... */
+ eb_release_vmas(eb, false);
+ i915_gem_ww_ctx_fini(&eb->ww);
+
+ if (rq) {
+ /* nonblocking is always false */
+ if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE,
+ MAX_SCHEDULE_TIMEOUT) < 0) {
+ i915_request_put(rq);
+ rq = NULL;
+
+ err = -EINTR;
+ goto err_relock;
+ }
+
+ i915_request_put(rq);
+ rq = NULL;
+ }
+
+ /*
+ * We take 3 passes through the slowpatch.
+ *
+ * 1 - we try to just prefault all the user relocation entries and
+ * then attempt to reuse the atomic pagefault disabled fast path again.
+ *
+ * 2 - we copy the user entries to a local buffer here outside of the
+ * local and allow ourselves to wait upon any rendering before
+ * relocations
+ *
+ * 3 - we already have a local copy of the relocation entries, but
+ * were interrupted (EAGAIN) whilst waiting for the objects, try again.
+ */
+ if (!err) {
+ err = eb_prefault_relocations(eb);
+ } else if (!have_copy) {
+ err = eb_copy_relocations(eb);
+ have_copy = err == 0;
+ } else {
+ cond_resched();
+ err = 0;
+ }
+
+ if (!err)
+ flush_workqueue(eb->i915->mm.userptr_wq);
+
+err_relock:
+ i915_gem_ww_ctx_init(&eb->ww, true);
+ if (err)
+ goto out;
+
+ /* reacquire the objects */
+repeat_validate:
+ rq = eb_pin_engine(eb, false);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ rq = NULL;
+ goto err;
+ }
+
+ /* We didn't throttle, should be NULL */
+ GEM_WARN_ON(rq);
+
+ err = eb_validate_vmas(eb);
+ if (err)
+ goto err;
+
+ GEM_BUG_ON(!eb->batch);
+
+ list_for_each_entry(ev, &eb->relocs, reloc_link) {
+ if (!have_copy) {
+ pagefault_disable();
err = eb_relocate_vma(eb, ev);
+ pagefault_enable();
+ if (err)
+ break;
+ } else {
+ err = eb_relocate_vma_slow(eb, ev);
if (err)
break;
}
+ }
+
+ if (err == -EDEADLK)
+ goto err;
+
+ if (err && !have_copy)
+ goto repeat;
+
+ if (err)
+ goto err;
+
+ /* as last step, parse the command buffer */
+ err = eb_parse(eb);
+ if (err)
+ goto err;
+
+ /*
+ * Leave the user relocations as are, this is the painfully slow path,
+ * and we want to avoid the complication of dropping the lock whilst
+ * having buffers reserved in the aperture and so causing spurious
+ * ENOSPC for random operations.
+ */
- flush = reloc_gpu_flush(&eb->reloc_cache);
+err:
+ if (err == -EDEADLK) {
+ eb_release_vmas(eb, false);
+ err = i915_gem_ww_ctx_backoff(&eb->ww);
if (!err)
- err = flush;
+ goto repeat_validate;
}
+ if (err == -EAGAIN)
+ goto repeat;
+
+out:
+ if (have_copy) {
+ const unsigned int count = eb->buffer_count;
+ unsigned int i;
+
+ for (i = 0; i < count; i++) {
+ const struct drm_i915_gem_exec_object2 *entry =
+ &eb->exec[i];
+ struct drm_i915_gem_relocation_entry *relocs;
+
+ if (!entry->relocation_count)
+ continue;
+
+ relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
+ kvfree(relocs);
+ }
+ }
+
+ if (rq)
+ i915_request_put(rq);
+
return err;
}
-static int eb_move_to_gpu(struct i915_execbuffer *eb)
+static int eb_relocate_parse(struct i915_execbuffer *eb)
{
- const unsigned int count = eb->buffer_count;
- struct ww_acquire_ctx acquire;
- unsigned int i;
- int err = 0;
+ int err;
+ struct i915_request *rq = NULL;
+ bool throttle = true;
- ww_acquire_init(&acquire, &reservation_ww_class);
+retry:
+ rq = eb_pin_engine(eb, throttle);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ rq = NULL;
+ if (err != -EDEADLK)
+ return err;
- for (i = 0; i < count; i++) {
- struct eb_vma *ev = &eb->vma[i];
- struct i915_vma *vma = ev->vma;
+ goto err;
+ }
+
+ if (rq) {
+ bool nonblock = eb->file->filp->f_flags & O_NONBLOCK;
+
+ /* Need to drop all locks now for throttling, take slowpath */
+ err = i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, 0);
+ if (err == -ETIME) {
+ if (nonblock) {
+ err = -EWOULDBLOCK;
+ i915_request_put(rq);
+ goto err;
+ }
+ goto slow;
+ }
+ i915_request_put(rq);
+ rq = NULL;
+ }
- err = ww_mutex_lock_interruptible(&vma->resv->lock, &acquire);
- if (err == -EDEADLK) {
- GEM_BUG_ON(i == 0);
- do {
- int j = i - 1;
+ /* only throttle once, even if we didn't need to throttle */
+ throttle = false;
- ww_mutex_unlock(&eb->vma[j].vma->resv->lock);
+ err = eb_validate_vmas(eb);
+ if (err == -EAGAIN)
+ goto slow;
+ else if (err)
+ goto err;
- swap(eb->vma[i], eb->vma[j]);
- } while (--i);
+ /* The objects are in their final locations, apply the relocations. */
+ if (eb->args->flags & __EXEC_HAS_RELOC) {
+ struct eb_vma *ev;
- err = ww_mutex_lock_slow_interruptible(&vma->resv->lock,
- &acquire);
+ list_for_each_entry(ev, &eb->relocs, reloc_link) {
+ err = eb_relocate_vma(eb, ev);
+ if (err)
+ break;
}
- if (err)
- break;
+
+ if (err == -EDEADLK)
+ goto err;
+ else if (err)
+ goto slow;
+ }
+
+ if (!err)
+ err = eb_parse(eb);
+
+err:
+ if (err == -EDEADLK) {
+ eb_release_vmas(eb, false);
+ err = i915_gem_ww_ctx_backoff(&eb->ww);
+ if (!err)
+ goto retry;
}
- ww_acquire_done(&acquire);
+
+ return err;
+
+slow:
+ err = eb_relocate_parse_slow(eb, rq);
+ if (err)
+ /*
+ * If the user expects the execobject.offset and
+ * reloc.presumed_offset to be an exact match,
+ * as for using NO_RELOC, then we cannot update
+ * the execobject.offset until we have completed
+ * relocation.
+ */
+ eb->args->flags &= ~__EXEC_HAS_RELOC;
+
+ return err;
+}
+
+static int eb_move_to_gpu(struct i915_execbuffer *eb)
+{
+ const unsigned int count = eb->buffer_count;
+ unsigned int i = count;
+ int err = 0;
while (i--) {
struct eb_vma *ev = &eb->vma[i];
@@ -1879,13 +2177,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
if (err == 0)
err = i915_vma_move_to_active(vma, eb->request, flags);
-
- i915_vma_unlock(vma);
- eb_unreserve_vma(ev);
}
- ww_acquire_fini(&acquire);
-
- eb_vma_array_put(fetch_and_zero(&eb->array));
if (unlikely(err))
goto err_skip;
@@ -1950,7 +2242,8 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
}
static struct i915_vma *
-shadow_batch_pin(struct drm_i915_gem_object *obj,
+shadow_batch_pin(struct i915_execbuffer *eb,
+ struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
unsigned int flags)
{
@@ -1961,7 +2254,7 @@ shadow_batch_pin(struct drm_i915_gem_object *obj,
if (IS_ERR(vma))
return vma;
- err = i915_vma_pin(vma, 0, 0, flags);
+ err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags);
if (err)
return ERR_PTR(err);
@@ -2013,7 +2306,7 @@ __parser_mark_active(struct i915_vma *vma,
{
struct intel_gt_buffer_pool_node *node = vma->private;
- return i915_active_ref(&node->active, tl, fence);
+ return i915_active_ref(&node->active, tl->fence_context, fence);
}
static int
@@ -2077,36 +2370,26 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
if (err)
goto err_commit;
- err = dma_resv_lock_interruptible(pw->batch->resv, NULL);
- if (err)
- goto err_commit;
-
err = dma_resv_reserve_shared(pw->batch->resv, 1);
if (err)
- goto err_commit_unlock;
+ goto err_commit;
/* Wait for all writes (and relocs) into the batch to complete */
err = i915_sw_fence_await_reservation(&pw->base.chain,
pw->batch->resv, NULL, false,
0, I915_FENCE_GFP);
if (err < 0)
- goto err_commit_unlock;
+ goto err_commit;
/* Keep the batch alive and unwritten as we parse */
dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma);
- dma_resv_unlock(pw->batch->resv);
-
/* Force execution to wait for completion of the parser */
- dma_resv_lock(shadow->resv, NULL);
dma_resv_add_excl_fence(shadow->resv, &pw->base.dma);
- dma_resv_unlock(shadow->resv);
dma_fence_work_commit_imm(&pw->base);
return 0;
-err_commit_unlock:
- dma_resv_unlock(pw->batch->resv);
err_commit:
i915_sw_fence_set_error_once(&pw->base.chain, err);
dma_fence_work_commit_imm(&pw->base);
@@ -2121,16 +2404,33 @@ err_free:
return err;
}
+static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma)
+{
+ /*
+ * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
+ * batch" bit. Hence we need to pin secure batches into the global gtt.
+ * hsw should have this fixed, but bdw mucks it up again. */
+ if (eb->batch_flags & I915_DISPATCH_SECURE)
+ return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, 0);
+
+ return NULL;
+}
+
static int eb_parse(struct i915_execbuffer *eb)
{
struct drm_i915_private *i915 = eb->i915;
- struct intel_gt_buffer_pool_node *pool;
- struct i915_vma *shadow, *trampoline;
+ struct intel_gt_buffer_pool_node *pool = eb->batch_pool;
+ struct i915_vma *shadow, *trampoline, *batch;
unsigned int len;
int err;
- if (!eb_use_cmdparser(eb))
- return 0;
+ if (!eb_use_cmdparser(eb)) {
+ batch = eb_dispatch_secure(eb, eb->batch->vma);
+ if (IS_ERR(batch))
+ return PTR_ERR(batch);
+
+ goto secure_batch;
+ }
len = eb->batch_len;
if (!CMDPARSER_USES_GGTT(eb->i915)) {
@@ -2147,11 +2447,18 @@ static int eb_parse(struct i915_execbuffer *eb)
len += I915_CMD_PARSER_TRAMPOLINE_SIZE;
}
- pool = intel_gt_get_buffer_pool(eb->engine->gt, len);
- if (IS_ERR(pool))
- return PTR_ERR(pool);
+ if (!pool) {
+ pool = intel_gt_get_buffer_pool(eb->engine->gt, len);
+ if (IS_ERR(pool))
+ return PTR_ERR(pool);
+ eb->batch_pool = pool;
+ }
- shadow = shadow_batch_pin(pool->obj, eb->context->vm, PIN_USER);
+ err = i915_gem_object_lock(pool->obj, &eb->ww);
+ if (err)
+ goto err;
+
+ shadow = shadow_batch_pin(eb, pool->obj, eb->context->vm, PIN_USER);
if (IS_ERR(shadow)) {
err = PTR_ERR(shadow);
goto err;
@@ -2163,7 +2470,7 @@ static int eb_parse(struct i915_execbuffer *eb)
if (CMDPARSER_USES_GGTT(eb->i915)) {
trampoline = shadow;
- shadow = shadow_batch_pin(pool->obj,
+ shadow = shadow_batch_pin(eb, pool->obj,
&eb->engine->gt->ggtt->vm,
PIN_GLOBAL);
if (IS_ERR(shadow)) {
@@ -2176,42 +2483,43 @@ static int eb_parse(struct i915_execbuffer *eb)
eb->batch_flags |= I915_DISPATCH_SECURE;
}
+ batch = eb_dispatch_secure(eb, shadow);
+ if (IS_ERR(batch)) {
+ err = PTR_ERR(batch);
+ goto err_trampoline;
+ }
+
err = eb_parse_pipeline(eb, shadow, trampoline);
if (err)
- goto err_trampoline;
+ goto err_unpin_batch;
- eb->vma[eb->buffer_count].vma = i915_vma_get(shadow);
- eb->vma[eb->buffer_count].flags = __EXEC_OBJECT_HAS_PIN;
eb->batch = &eb->vma[eb->buffer_count++];
- eb->vma[eb->buffer_count].vma = NULL;
+ eb->batch->vma = i915_vma_get(shadow);
+ eb->batch->flags = __EXEC_OBJECT_HAS_PIN;
eb->trampoline = trampoline;
eb->batch_start_offset = 0;
+secure_batch:
+ if (batch) {
+ eb->batch = &eb->vma[eb->buffer_count++];
+ eb->batch->flags = __EXEC_OBJECT_HAS_PIN;
+ eb->batch->vma = i915_vma_get(batch);
+ }
return 0;
+err_unpin_batch:
+ if (batch)
+ i915_vma_unpin(batch);
err_trampoline:
if (trampoline)
i915_vma_unpin(trampoline);
err_shadow:
i915_vma_unpin(shadow);
err:
- intel_gt_buffer_pool_put(pool);
return err;
}
-static void
-add_to_client(struct i915_request *rq, struct drm_file *file)
-{
- struct drm_i915_file_private *file_priv = file->driver_priv;
-
- rq->file_priv = file_priv;
-
- spin_lock(&file_priv->mm.lock);
- list_add_tail(&rq->client_link, &file_priv->mm.request_list);
- spin_unlock(&file_priv->mm.lock);
-}
-
static int eb_submit(struct i915_execbuffer *eb, struct i915_vma *batch)
{
int err;
@@ -2293,7 +2601,7 @@ static const enum intel_engine_id user_ring_map[] = {
[I915_EXEC_VEBOX] = VECS0
};
-static struct i915_request *eb_throttle(struct intel_context *ce)
+static struct i915_request *eb_throttle(struct i915_execbuffer *eb, struct intel_context *ce)
{
struct intel_ring *ring = ce->ring;
struct intel_timeline *tl = ce->timeline;
@@ -2327,31 +2635,26 @@ static struct i915_request *eb_throttle(struct intel_context *ce)
return i915_request_get(rq);
}
-static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
+static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, bool throttle)
{
+ struct intel_context *ce = eb->context;
struct intel_timeline *tl;
- struct i915_request *rq;
+ struct i915_request *rq = NULL;
int err;
- /*
- * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
- * EIO if the GPU is already wedged.
- */
- err = intel_gt_terminally_wedged(ce->engine->gt);
- if (err)
- return err;
+ GEM_BUG_ON(eb->args->flags & __EXEC_ENGINE_PINNED);
if (unlikely(intel_context_is_banned(ce)))
- return -EIO;
+ return ERR_PTR(-EIO);
/*
* Pinning the contexts may generate requests in order to acquire
* GGTT space, so do this first before we reserve a seqno for
* ourselves.
*/
- err = intel_context_pin(ce);
+ err = intel_context_pin_ww(ce, &eb->ww);
if (err)
- return err;
+ return ERR_PTR(err);
/*
* Take a local wakeref for preparing to dispatch the execbuf as
@@ -2363,45 +2666,17 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
*/
tl = intel_context_timeline_lock(ce);
if (IS_ERR(tl)) {
- err = PTR_ERR(tl);
- goto err_unpin;
+ intel_context_unpin(ce);
+ return ERR_CAST(tl);
}
intel_context_enter(ce);
- rq = eb_throttle(ce);
-
+ if (throttle)
+ rq = eb_throttle(eb, ce);
intel_context_timeline_unlock(tl);
- if (rq) {
- bool nonblock = eb->file->filp->f_flags & O_NONBLOCK;
- long timeout;
-
- timeout = MAX_SCHEDULE_TIMEOUT;
- if (nonblock)
- timeout = 0;
-
- timeout = i915_request_wait(rq,
- I915_WAIT_INTERRUPTIBLE,
- timeout);
- i915_request_put(rq);
-
- if (timeout < 0) {
- err = nonblock ? -EWOULDBLOCK : timeout;
- goto err_exit;
- }
- }
-
- eb->engine = ce->engine;
- eb->context = ce;
- return 0;
-
-err_exit:
- mutex_lock(&tl->mutex);
- intel_context_exit(ce);
- intel_context_timeline_unlock(tl);
-err_unpin:
- intel_context_unpin(ce);
- return err;
+ eb->args->flags |= __EXEC_ENGINE_PINNED;
+ return rq;
}
static void eb_unpin_engine(struct i915_execbuffer *eb)
@@ -2409,6 +2684,11 @@ static void eb_unpin_engine(struct i915_execbuffer *eb)
struct intel_context *ce = eb->context;
struct intel_timeline *tl = ce->timeline;
+ if (!(eb->args->flags & __EXEC_ENGINE_PINNED))
+ return;
+
+ eb->args->flags &= ~__EXEC_ENGINE_PINNED;
+
mutex_lock(&tl->mutex);
intel_context_exit(ce);
mutex_unlock(&tl->mutex);
@@ -2417,11 +2697,10 @@ static void eb_unpin_engine(struct i915_execbuffer *eb)
}
static unsigned int
-eb_select_legacy_ring(struct i915_execbuffer *eb,
- struct drm_file *file,
- struct drm_i915_gem_execbuffer2 *args)
+eb_select_legacy_ring(struct i915_execbuffer *eb)
{
struct drm_i915_private *i915 = eb->i915;
+ struct drm_i915_gem_execbuffer2 *args = eb->args;
unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
if (user_ring_id != I915_EXEC_BSD &&
@@ -2436,7 +2715,7 @@ eb_select_legacy_ring(struct i915_execbuffer *eb,
unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
- bsd_idx = gen8_dispatch_bsd_engine(i915, file);
+ bsd_idx = gen8_dispatch_bsd_engine(i915, eb->file);
} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
bsd_idx <= I915_EXEC_BSD_RING2) {
bsd_idx >>= I915_EXEC_BSD_SHIFT;
@@ -2461,30 +2740,61 @@ eb_select_legacy_ring(struct i915_execbuffer *eb,
}
static int
-eb_pin_engine(struct i915_execbuffer *eb,
- struct drm_file *file,
- struct drm_i915_gem_execbuffer2 *args)
+eb_select_engine(struct i915_execbuffer *eb)
{
struct intel_context *ce;
unsigned int idx;
int err;
if (i915_gem_context_user_engines(eb->gem_context))
- idx = args->flags & I915_EXEC_RING_MASK;
+ idx = eb->args->flags & I915_EXEC_RING_MASK;
else
- idx = eb_select_legacy_ring(eb, file, args);
+ idx = eb_select_legacy_ring(eb);
ce = i915_gem_context_get_engine(eb->gem_context, idx);
if (IS_ERR(ce))
return PTR_ERR(ce);
- err = __eb_pin_engine(eb, ce);
- intel_context_put(ce);
+ intel_gt_pm_get(ce->engine->gt);
+
+ if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
+ err = intel_context_alloc_state(ce);
+ if (err)
+ goto err;
+ }
+
+ /*
+ * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
+ * EIO if the GPU is already wedged.
+ */
+ err = intel_gt_terminally_wedged(ce->engine->gt);
+ if (err)
+ goto err;
+ eb->context = ce;
+ eb->engine = ce->engine;
+
+ /*
+ * Make sure engine pool stays alive even if we call intel_context_put
+ * during ww handling. The pool is destroyed when last pm reference
+ * is dropped, which breaks our -EDEADLK handling.
+ */
+ return err;
+
+err:
+ intel_gt_pm_put(ce->engine->gt);
+ intel_context_put(ce);
return err;
}
static void
+eb_put_engine(struct i915_execbuffer *eb)
+{
+ intel_gt_pm_put(eb->engine->gt);
+ intel_context_put(eb->context);
+}
+
+static void
__free_fence_array(struct eb_fence *fences, unsigned int n)
{
while (n--) {
@@ -2573,6 +2883,7 @@ add_timeline_fence_array(struct i915_execbuffer *eb,
if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
DRM_DEBUG("Syncobj handle missing requested point %llu\n", point);
+ dma_fence_put(fence);
drm_syncobj_put(syncobj);
return err;
}
@@ -2860,6 +3171,10 @@ i915_gem_do_execbuffer(struct drm_device *dev,
args->flags |= __EXEC_HAS_RELOC;
eb.exec = exec;
+ eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1);
+ eb.vma[0].vma = NULL;
+ eb.reloc_pool = eb.batch_pool = NULL;
+ eb.reloc_context = NULL;
eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
reloc_cache_init(&eb.reloc_cache, eb.i915);
@@ -2928,11 +3243,19 @@ i915_gem_do_execbuffer(struct drm_device *dev,
if (unlikely(err))
goto err_destroy;
- err = eb_pin_engine(&eb, file, args);
+ err = eb_select_engine(&eb);
if (unlikely(err))
goto err_context;
- err = eb_relocate(&eb);
+ err = eb_lookup_vmas(&eb);
+ if (err) {
+ eb_release_vmas(&eb, true);
+ goto err_engine;
+ }
+
+ i915_gem_ww_ctx_init(&eb.ww, true);
+
+ err = eb_relocate_parse(&eb);
if (err) {
/*
* If the user expects the execobject.offset and
@@ -2945,54 +3268,9 @@ i915_gem_do_execbuffer(struct drm_device *dev,
goto err_vma;
}
- if (unlikely(eb.batch->flags & EXEC_OBJECT_WRITE)) {
- drm_dbg(&i915->drm,
- "Attempting to use self-modifying batch buffer\n");
- err = -EINVAL;
- goto err_vma;
- }
-
- if (range_overflows_t(u64,
- eb.batch_start_offset, eb.batch_len,
- eb.batch->vma->size)) {
- drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n");
- err = -EINVAL;
- goto err_vma;
- }
-
- if (eb.batch_len == 0)
- eb.batch_len = eb.batch->vma->size - eb.batch_start_offset;
-
- err = eb_parse(&eb);
- if (err)
- goto err_vma;
+ ww_acquire_done(&eb.ww.ctx);
- /*
- * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
- * batch" bit. Hence we need to pin secure batches into the global gtt.
- * hsw should have this fixed, but bdw mucks it up again. */
batch = eb.batch->vma;
- if (eb.batch_flags & I915_DISPATCH_SECURE) {
- struct i915_vma *vma;
-
- /*
- * So on first glance it looks freaky that we pin the batch here
- * outside of the reservation loop. But:
- * - The batch is already pinned into the relevant ppgtt, so we
- * already have the backing storage fully allocated.
- * - No other BO uses the global gtt (well contexts, but meh),
- * so we don't really have issues with multiple objects not
- * fitting due to fragmentation.
- * So this is actually safe.
- */
- vma = i915_gem_object_ggtt_pin(batch->obj, NULL, 0, 0, 0);
- if (IS_ERR(vma)) {
- err = PTR_ERR(vma);
- goto err_parse;
- }
-
- batch = vma;
- }
/* All GPU relocation batches must be submitted prior to the user rq */
GEM_BUG_ON(eb.reloc_cache.rq);
@@ -3001,7 +3279,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb.request = i915_request_create(eb.context);
if (IS_ERR(eb.request)) {
err = PTR_ERR(eb.request);
- goto err_batch_unpin;
+ goto err_vma;
}
if (in_fence) {
@@ -3038,13 +3316,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
* to explicitly hold another reference here.
*/
eb.request->batch = batch;
- if (batch->private)
- intel_gt_buffer_pool_mark_active(batch->private, eb.request);
+ if (eb.batch_pool)
+ intel_gt_buffer_pool_mark_active(eb.batch_pool, eb.request);
trace_i915_request_queue(eb.request, eb.batch_flags);
err = eb_submit(&eb, batch);
err_request:
- add_to_client(eb.request, file);
i915_request_get(eb.request);
eb_request_add(&eb);
@@ -3063,16 +3340,21 @@ err_request:
}
i915_request_put(eb.request);
-err_batch_unpin:
- if (eb.batch_flags & I915_DISPATCH_SECURE)
- i915_vma_unpin(batch);
-err_parse:
- if (batch->private)
- intel_gt_buffer_pool_put(batch->private);
err_vma:
+ eb_release_vmas(&eb, true);
if (eb.trampoline)
i915_vma_unpin(eb.trampoline);
- eb_unpin_engine(&eb);
+ WARN_ON(err == -EDEADLK);
+ i915_gem_ww_ctx_fini(&eb.ww);
+
+ if (eb.batch_pool)
+ intel_gt_buffer_pool_put(eb.batch_pool);
+ if (eb.reloc_pool)
+ intel_gt_buffer_pool_put(eb.reloc_pool);
+ if (eb.reloc_context)
+ intel_context_put(eb.reloc_context);
+err_engine:
+ eb_put_engine(&eb);
err_context:
i915_gem_context_put(eb.gem_context);
err_destroy:
@@ -3089,7 +3371,7 @@ err_ext:
static size_t eb_element_size(void)
{
- return sizeof(struct drm_i915_gem_exec_object2);
+ return sizeof(struct drm_i915_gem_exec_object2) + sizeof(struct eb_vma);
}
static bool check_buffer_count(size_t count)
@@ -3145,7 +3427,9 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
/* Copy in the exec list from userland */
exec_list = kvmalloc_array(count, sizeof(*exec_list),
__GFP_NOWARN | GFP_KERNEL);
- exec2_list = kvmalloc_array(count, eb_element_size(),
+
+ /* Allocate extra slots for use by the command parser */
+ exec2_list = kvmalloc_array(count + 2, eb_element_size(),
__GFP_NOWARN | GFP_KERNEL);
if (exec_list == NULL || exec2_list == NULL) {
drm_dbg(&i915->drm,
@@ -3222,7 +3506,8 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
if (err)
return err;
- exec2_list = kvmalloc_array(count, eb_element_size(),
+ /* Allocate extra slots for use by the command parser */
+ exec2_list = kvmalloc_array(count + 2, eb_element_size(),
__GFP_NOWARN | GFP_KERNEL);
if (exec2_list == NULL) {
drm_dbg(&i915->drm, "Failed to allocate exec list for %zd buffers\n",
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 753f82d87a31..3d69e51f3e4d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -283,37 +283,46 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
struct intel_runtime_pm *rpm = &i915->runtime_pm;
struct i915_ggtt *ggtt = &i915->ggtt;
bool write = area->vm_flags & VM_WRITE;
+ struct i915_gem_ww_ctx ww;
intel_wakeref_t wakeref;
struct i915_vma *vma;
pgoff_t page_offset;
int srcu;
int ret;
- /* Sanity check that we allow writing into this object */
- if (i915_gem_object_is_readonly(obj) && write)
- return VM_FAULT_SIGBUS;
-
/* We don't use vmf->pgoff since that has the fake offset */
page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
trace_i915_gem_object_fault(obj, page_offset, true, write);
- ret = i915_gem_object_pin_pages(obj);
+ wakeref = intel_runtime_pm_get(rpm);
+
+ i915_gem_ww_ctx_init(&ww, true);
+retry:
+ ret = i915_gem_object_lock(obj, &ww);
if (ret)
- goto err;
+ goto err_rpm;
- wakeref = intel_runtime_pm_get(rpm);
+ /* Sanity check that we allow writing into this object */
+ if (i915_gem_object_is_readonly(obj) && write) {
+ ret = -EFAULT;
+ goto err_rpm;
+ }
- ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu);
+ ret = i915_gem_object_pin_pages(obj);
if (ret)
goto err_rpm;
+ ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu);
+ if (ret)
+ goto err_pages;
+
/* Now pin it into the GTT as needed */
- vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
- PIN_MAPPABLE |
- PIN_NONBLOCK /* NOWARN */ |
- PIN_NOEVICT);
- if (IS_ERR(vma)) {
+ vma = i915_gem_object_ggtt_pin_ww(obj, &ww, NULL, 0, 0,
+ PIN_MAPPABLE |
+ PIN_NONBLOCK /* NOWARN */ |
+ PIN_NOEVICT);
+ if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) {
/* Use a partial view if it is bigger than available space */
struct i915_ggtt_view view =
compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
@@ -328,11 +337,11 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
* all hope that the hardware is able to track future writes.
*/
- vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
- if (IS_ERR(vma)) {
+ vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags);
+ if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) {
flags = PIN_MAPPABLE;
view.type = I915_GGTT_VIEW_PARTIAL;
- vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
+ vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags);
}
/* The entire mappable GGTT is pinned? Unexpected! */
@@ -389,10 +398,16 @@ err_unpin:
__i915_vma_unpin(vma);
err_reset:
intel_gt_reset_unlock(ggtt->vm.gt, srcu);
+err_pages:
+ i915_gem_object_unpin_pages(obj);
err_rpm:
+ if (ret == -EDEADLK) {
+ ret = i915_gem_ww_ctx_backoff(&ww);
+ if (!ret)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
intel_runtime_pm_put(rpm, wakeref);
- i915_gem_object_unpin_pages(obj);
-err:
return i915_error_to_vmf_fault(ret);
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 9cf4ad78ece6..d46db8d8f38e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -110,20 +110,44 @@ i915_gem_object_put(struct drm_i915_gem_object *obj)
#define assert_object_held(obj) dma_resv_assert_held((obj)->base.resv)
-static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj)
+static inline int __i915_gem_object_lock(struct drm_i915_gem_object *obj,
+ struct i915_gem_ww_ctx *ww,
+ bool intr)
{
- dma_resv_lock(obj->base.resv, NULL);
+ int ret;
+
+ if (intr)
+ ret = dma_resv_lock_interruptible(obj->base.resv, ww ? &ww->ctx : NULL);
+ else
+ ret = dma_resv_lock(obj->base.resv, ww ? &ww->ctx : NULL);
+
+ if (!ret && ww)
+ list_add_tail(&obj->obj_link, &ww->obj_list);
+ if (ret == -EALREADY)
+ ret = 0;
+
+ if (ret == -EDEADLK)
+ ww->contended = obj;
+
+ return ret;
}
-static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj)
+static inline int i915_gem_object_lock(struct drm_i915_gem_object *obj,
+ struct i915_gem_ww_ctx *ww)
{
- return dma_resv_trylock(obj->base.resv);
+ return __i915_gem_object_lock(obj, ww, ww && ww->intr);
}
-static inline int
-i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj)
+static inline int i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj,
+ struct i915_gem_ww_ctx *ww)
{
- return dma_resv_lock_interruptible(obj->base.resv, NULL);
+ WARN_ON(ww && !ww->intr);
+ return __i915_gem_object_lock(obj, ww, true);
+}
+
+static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj)
+{
+ return dma_resv_trylock(obj->base.resv);
}
static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
@@ -412,7 +436,6 @@ static inline void
i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
{
i915_gem_object_unpin_pages(obj);
- i915_gem_object_unlock(obj);
}
static inline struct intel_engine_cs *
@@ -435,6 +458,7 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
unsigned int cache_level);
void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
+void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj);
int __must_check
i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
index bfdb32d46877..d93eb36160c9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
@@ -14,6 +14,7 @@
struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
struct i915_vma *vma,
+ struct i915_gem_ww_ctx *ww,
u32 value)
{
struct drm_i915_private *i915 = ce->vm->i915;
@@ -39,10 +40,24 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
goto out_pm;
}
+ err = i915_gem_object_lock(pool->obj, ww);
+ if (err)
+ goto out_put;
+
+ batch = i915_vma_instance(pool->obj, ce->vm, NULL);
+ if (IS_ERR(batch)) {
+ err = PTR_ERR(batch);
+ goto out_put;
+ }
+
+ err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER);
+ if (unlikely(err))
+ goto out_put;
+
cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
if (IS_ERR(cmd)) {
err = PTR_ERR(cmd);
- goto out_put;
+ goto out_unpin;
}
rem = vma->size;
@@ -84,19 +99,11 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
intel_gt_chipset_flush(ce->vm->gt);
- batch = i915_vma_instance(pool->obj, ce->vm, NULL);
- if (IS_ERR(batch)) {
- err = PTR_ERR(batch);
- goto out_put;
- }
-
- err = i915_vma_pin(batch, 0, 0, PIN_USER);
- if (unlikely(err))
- goto out_put;
-
batch->private = pool;
return batch;
+out_unpin:
+ i915_vma_unpin(batch);
out_put:
intel_gt_buffer_pool_put(pool);
out_pm:
@@ -108,11 +115,9 @@ int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq)
{
int err;
- i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, false);
if (err == 0)
err = i915_vma_move_to_active(vma, rq, 0);
- i915_vma_unlock(vma);
if (unlikely(err))
return err;
@@ -141,6 +146,7 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
struct intel_context *ce,
u32 value)
{
+ struct i915_gem_ww_ctx ww;
struct i915_request *rq;
struct i915_vma *batch;
struct i915_vma *vma;
@@ -150,17 +156,28 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
if (IS_ERR(vma))
return PTR_ERR(vma);
- err = i915_vma_pin(vma, 0, 0, PIN_USER);
- if (unlikely(err))
- return err;
+ i915_gem_ww_ctx_init(&ww, true);
+ intel_engine_pm_get(ce->engine);
+retry:
+ err = i915_gem_object_lock(obj, &ww);
+ if (err)
+ goto out;
- batch = intel_emit_vma_fill_blt(ce, vma, value);
+ err = intel_context_pin_ww(ce, &ww);
+ if (err)
+ goto out;
+
+ err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
+ if (err)
+ goto out_ctx;
+
+ batch = intel_emit_vma_fill_blt(ce, vma, &ww, value);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
- goto out_unpin;
+ goto out_vma;
}
- rq = intel_context_create_request(ce);
+ rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out_batch;
@@ -170,11 +187,9 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
if (unlikely(err))
goto out_request;
- i915_vma_lock(vma);
err = move_obj_to_gpu(vma->obj, rq, true);
if (err == 0)
err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
- i915_vma_unlock(vma);
if (unlikely(err))
goto out_request;
@@ -193,8 +208,18 @@ out_request:
i915_request_add(rq);
out_batch:
intel_emit_vma_release(ce, batch);
-out_unpin:
+out_vma:
i915_vma_unpin(vma);
+out_ctx:
+ intel_context_unpin(ce);
+out:
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
+ intel_engine_pm_put(ce->engine);
return err;
}
@@ -210,6 +235,7 @@ static bool wa_1209644611_applies(struct drm_i915_private *i915, u32 size)
}
struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
+ struct i915_gem_ww_ctx *ww,
struct i915_vma *src,
struct i915_vma *dst)
{
@@ -236,10 +262,24 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
goto out_pm;
}
+ err = i915_gem_object_lock(pool->obj, ww);
+ if (err)
+ goto out_put;
+
+ batch = i915_vma_instance(pool->obj, ce->vm, NULL);
+ if (IS_ERR(batch)) {
+ err = PTR_ERR(batch);
+ goto out_put;
+ }
+
+ err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER);
+ if (unlikely(err))
+ goto out_put;
+
cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
if (IS_ERR(cmd)) {
err = PTR_ERR(cmd);
- goto out_put;
+ goto out_unpin;
}
rem = src->size;
@@ -296,20 +336,11 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
i915_gem_object_unpin_map(pool->obj);
intel_gt_chipset_flush(ce->vm->gt);
-
- batch = i915_vma_instance(pool->obj, ce->vm, NULL);
- if (IS_ERR(batch)) {
- err = PTR_ERR(batch);
- goto out_put;
- }
-
- err = i915_vma_pin(batch, 0, 0, PIN_USER);
- if (unlikely(err))
- goto out_put;
-
batch->private = pool;
return batch;
+out_unpin:
+ i915_vma_unpin(batch);
out_put:
intel_gt_buffer_pool_put(pool);
out_pm:
@@ -321,10 +352,9 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
struct drm_i915_gem_object *dst,
struct intel_context *ce)
{
- struct drm_gem_object *objs[] = { &src->base, &dst->base };
struct i915_address_space *vm = ce->vm;
struct i915_vma *vma[2], *batch;
- struct ww_acquire_ctx acquire;
+ struct i915_gem_ww_ctx ww;
struct i915_request *rq;
int err, i;
@@ -332,25 +362,36 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
if (IS_ERR(vma[0]))
return PTR_ERR(vma[0]);
- err = i915_vma_pin(vma[0], 0, 0, PIN_USER);
- if (unlikely(err))
- return err;
-
vma[1] = i915_vma_instance(dst, vm, NULL);
if (IS_ERR(vma[1]))
- goto out_unpin_src;
+ return PTR_ERR(vma);
- err = i915_vma_pin(vma[1], 0, 0, PIN_USER);
+ i915_gem_ww_ctx_init(&ww, true);
+ intel_engine_pm_get(ce->engine);
+retry:
+ err = i915_gem_object_lock(src, &ww);
+ if (!err)
+ err = i915_gem_object_lock(dst, &ww);
+ if (!err)
+ err = intel_context_pin_ww(ce, &ww);
+ if (err)
+ goto out;
+
+ err = i915_vma_pin_ww(vma[0], &ww, 0, 0, PIN_USER);
+ if (err)
+ goto out_ctx;
+
+ err = i915_vma_pin_ww(vma[1], &ww, 0, 0, PIN_USER);
if (unlikely(err))
goto out_unpin_src;
- batch = intel_emit_vma_copy_blt(ce, vma[0], vma[1]);
+ batch = intel_emit_vma_copy_blt(ce, &ww, vma[0], vma[1]);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
goto out_unpin_dst;
}
- rq = intel_context_create_request(ce);
+ rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out_batch;
@@ -360,14 +401,10 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
if (unlikely(err))
goto out_request;
- err = drm_gem_lock_reservations(objs, ARRAY_SIZE(objs), &acquire);
- if (unlikely(err))
- goto out_request;
-
for (i = 0; i < ARRAY_SIZE(vma); i++) {
err = move_obj_to_gpu(vma[i]->obj, rq, i);
if (unlikely(err))
- goto out_unlock;
+ goto out_request;
}
for (i = 0; i < ARRAY_SIZE(vma); i++) {
@@ -375,20 +412,19 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
err = i915_vma_move_to_active(vma[i], rq, flags);
if (unlikely(err))
- goto out_unlock;
+ goto out_request;
}
if (rq->engine->emit_init_breadcrumb) {
err = rq->engine->emit_init_breadcrumb(rq);
if (unlikely(err))
- goto out_unlock;
+ goto out_request;
}
err = rq->engine->emit_bb_start(rq,
batch->node.start, batch->node.size,
0);
-out_unlock:
- drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire);
+
out_request:
if (unlikely(err))
i915_request_set_error_once(rq, err);
@@ -400,6 +436,16 @@ out_unpin_dst:
i915_vma_unpin(vma[1]);
out_unpin_src:
i915_vma_unpin(vma[0]);
+out_ctx:
+ intel_context_unpin(ce);
+out:
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
+ intel_engine_pm_put(ce->engine);
return err;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
index 8bcd336a90dc..2409fdcccf0e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
@@ -13,12 +13,15 @@
#include "i915_vma.h"
struct drm_i915_gem_object;
+struct i915_gem_ww_ctx;
struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
struct i915_vma *vma,
+ struct i915_gem_ww_ctx *ww,
u32 value);
struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
+ struct i915_gem_ww_ctx *ww,
struct i915_vma *src,
struct i915_vma *dst);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 5335f799b548..b5c15557cc87 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -123,6 +123,15 @@ struct drm_i915_gem_object {
struct list_head lut_list;
spinlock_t lut_lock; /* guards lut_list */
+ /**
+ * @obj_link: Link into @i915_gem_ww_ctx.obj_list
+ *
+ * When we lock this object through i915_gem_object_lock() with a
+ * context, we add it to the list to ensure we can unlock everything
+ * when i915_gem_ww_ctx_backoff() or i915_gem_ww_ctx_fini() are called.
+ */
+ struct list_head obj_link;
+
/** Stolen memory for this object, instead of being backed by shmem. */
struct drm_mm_node *stolen;
union {
@@ -282,6 +291,7 @@ struct drm_i915_gem_object {
} userptr;
unsigned long scratch;
+ u64 encode;
void *gvt_info;
};
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 3d215164dd5a..40d3e40500fa 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -84,7 +84,7 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
- i915_gem_object_lock(obj);
+ i915_gem_object_lock(obj, NULL);
drm_WARN_ON(&i915->drm,
i915_gem_object_set_to_gtt_domain(obj, false));
i915_gem_object_unlock(obj);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
index 540ef0551789..1929d6cf4150 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
@@ -9,6 +9,7 @@
#include <drm/drm_file.h>
#include "i915_drv.h"
+#include "i915_gem_context.h"
#include "i915_gem_ioctls.h"
#include "i915_gem_object.h"
@@ -35,9 +36,10 @@ int
i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
+ const unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
struct drm_i915_file_private *file_priv = file->driver_priv;
- unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
- struct i915_request *request, *target = NULL;
+ struct i915_gem_context *ctx;
+ unsigned long idx;
long ret;
/* ABI: return -EIO if already wedged */
@@ -45,27 +47,54 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
if (ret)
return ret;
- spin_lock(&file_priv->mm.lock);
- list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
- if (time_after_eq(request->emitted_jiffies, recent_enough))
- break;
+ rcu_read_lock();
+ xa_for_each(&file_priv->context_xa, idx, ctx) {
+ struct i915_gem_engines_iter it;
+ struct intel_context *ce;
- if (target && xchg(&target->file_priv, NULL))
- list_del(&target->client_link);
+ if (!kref_get_unless_zero(&ctx->ref))
+ continue;
+ rcu_read_unlock();
- target = request;
- }
- if (target)
- i915_request_get(target);
- spin_unlock(&file_priv->mm.lock);
+ for_each_gem_engine(ce,
+ i915_gem_context_lock_engines(ctx),
+ it) {
+ struct i915_request *rq, *target = NULL;
+
+ if (!ce->timeline)
+ continue;
+
+ mutex_lock(&ce->timeline->mutex);
+ list_for_each_entry_reverse(rq,
+ &ce->timeline->requests,
+ link) {
+ if (i915_request_completed(rq))
+ break;
- if (!target)
- return 0;
+ if (time_after(rq->emitted_jiffies,
+ recent_enough))
+ continue;
- ret = i915_request_wait(target,
- I915_WAIT_INTERRUPTIBLE,
- MAX_SCHEDULE_TIMEOUT);
- i915_request_put(target);
+ target = i915_request_get(rq);
+ break;
+ }
+ mutex_unlock(&ce->timeline->mutex);
+ if (!target)
+ continue;
+
+ ret = i915_request_wait(target,
+ I915_WAIT_INTERRUPTIBLE,
+ MAX_SCHEDULE_TIMEOUT);
+ i915_request_put(target);
+ if (ret < 0)
+ break;
+ }
+ i915_gem_context_unlock_engines(ctx);
+ i915_gem_context_put(ctx);
+
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
return ret < 0 ? ret : 0;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
index ff72ee2fd9cd..ffcaee74a249 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
@@ -249,7 +249,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
* whilst executing a fenced command for an untiled object.
*/
- i915_gem_object_lock(obj);
+ i915_gem_object_lock(obj, NULL);
if (i915_gem_object_is_framebuffer(obj)) {
i915_gem_object_unlock(obj);
return -EBUSY;
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index 8291ede6902c..5daf4a2be422 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -393,7 +393,7 @@ static int igt_mock_exhaust_device_supported_pages(void *arg)
*/
for (i = 1; i < BIT(ARRAY_SIZE(page_sizes)); i++) {
- unsigned int combination = 0;
+ unsigned int combination = SZ_4K; /* Required for ppGTT */
for (j = 0; j < ARRAY_SIZE(page_sizes); j++) {
if (i & BIT(j))
@@ -947,7 +947,7 @@ static int gpu_write(struct intel_context *ce,
{
int err;
- i915_gem_object_lock(vma->obj);
+ i915_gem_object_lock(vma->obj, NULL);
err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
i915_gem_object_unlock(vma->obj);
if (err)
@@ -964,9 +964,10 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val)
unsigned long n;
int err;
+ i915_gem_object_lock(obj, NULL);
err = i915_gem_object_prepare_read(obj, &needs_flush);
if (err)
- return err;
+ goto err_unlock;
for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) {
u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n));
@@ -986,6 +987,8 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val)
}
i915_gem_object_finish_access(obj);
+err_unlock:
+ i915_gem_object_unlock(obj);
return err;
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
index 299c29e9ad86..4e36d4897ea6 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -75,7 +75,7 @@ static int __igt_client_fill(struct intel_engine_cs *engine)
if (err)
goto err_unpin;
- i915_gem_object_lock(obj);
+ i915_gem_object_lock(obj, NULL);
err = i915_gem_object_set_to_cpu_domain(obj, false);
i915_gem_object_unlock(obj);
if (err)
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index 87d7d8aa080f..7049a6bbc03d 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -27,9 +27,10 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
u32 *cpu;
int err;
+ i915_gem_object_lock(ctx->obj, NULL);
err = i915_gem_object_prepare_write(ctx->obj, &needs_clflush);
if (err)
- return err;
+ goto out;
page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
map = kmap_atomic(page);
@@ -46,7 +47,9 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
kunmap_atomic(map);
i915_gem_object_finish_access(ctx->obj);
- return 0;
+out:
+ i915_gem_object_unlock(ctx->obj);
+ return err;
}
static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
@@ -57,9 +60,10 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
u32 *cpu;
int err;
+ i915_gem_object_lock(ctx->obj, NULL);
err = i915_gem_object_prepare_read(ctx->obj, &needs_clflush);
if (err)
- return err;
+ goto out;
page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
map = kmap_atomic(page);
@@ -73,7 +77,9 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
kunmap_atomic(map);
i915_gem_object_finish_access(ctx->obj);
- return 0;
+out:
+ i915_gem_object_unlock(ctx->obj);
+ return err;
}
static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
@@ -82,7 +88,7 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
u32 __iomem *map;
int err = 0;
- i915_gem_object_lock(ctx->obj);
+ i915_gem_object_lock(ctx->obj, NULL);
err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
i915_gem_object_unlock(ctx->obj);
if (err)
@@ -115,7 +121,7 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
u32 __iomem *map;
int err = 0;
- i915_gem_object_lock(ctx->obj);
+ i915_gem_object_lock(ctx->obj, NULL);
err = i915_gem_object_set_to_gtt_domain(ctx->obj, false);
i915_gem_object_unlock(ctx->obj);
if (err)
@@ -147,7 +153,7 @@ static int wc_set(struct context *ctx, unsigned long offset, u32 v)
u32 *map;
int err;
- i915_gem_object_lock(ctx->obj);
+ i915_gem_object_lock(ctx->obj, NULL);
err = i915_gem_object_set_to_wc_domain(ctx->obj, true);
i915_gem_object_unlock(ctx->obj);
if (err)
@@ -170,7 +176,7 @@ static int wc_get(struct context *ctx, unsigned long offset, u32 *v)
u32 *map;
int err;
- i915_gem_object_lock(ctx->obj);
+ i915_gem_object_lock(ctx->obj, NULL);
err = i915_gem_object_set_to_wc_domain(ctx->obj, false);
i915_gem_object_unlock(ctx->obj);
if (err)
@@ -193,27 +199,27 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v)
u32 *cs;
int err;
- i915_gem_object_lock(ctx->obj);
+ i915_gem_object_lock(ctx->obj, NULL);
err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
- i915_gem_object_unlock(ctx->obj);
if (err)
- return err;
+ goto out_unlock;
vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0);
- if (IS_ERR(vma))
- return PTR_ERR(vma);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto out_unlock;
+ }
rq = intel_engine_create_kernel_request(ctx->engine);
if (IS_ERR(rq)) {
- i915_vma_unpin(vma);
- return PTR_ERR(rq);
+ err = PTR_ERR(rq);
+ goto out_unpin;
}
cs = intel_ring_begin(rq, 4);
if (IS_ERR(cs)) {
- i915_request_add(rq);
- i915_vma_unpin(vma);
- return PTR_ERR(cs);
+ err = PTR_ERR(cs);
+ goto out_rq;
}
if (INTEL_GEN(ctx->engine->i915) >= 8) {
@@ -234,14 +240,16 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v)
}
intel_ring_advance(rq, cs);
- i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, true);
if (err == 0)
err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
- i915_vma_unlock(vma);
- i915_vma_unpin(vma);
+out_rq:
i915_request_add(rq);
+out_unpin:
+ i915_vma_unpin(vma);
+out_unlock:
+ i915_gem_object_unlock(ctx->obj);
return err;
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 7ffc3c751432..99becb86abd3 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -461,9 +461,10 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
unsigned int n, m, need_flush;
int err;
+ i915_gem_object_lock(obj, NULL);
err = i915_gem_object_prepare_write(obj, &need_flush);
if (err)
- return err;
+ goto out;
for (n = 0; n < real_page_count(obj); n++) {
u32 *map;
@@ -479,7 +480,9 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
i915_gem_object_finish_access(obj);
obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
obj->write_domain = 0;
- return 0;
+out:
+ i915_gem_object_unlock(obj);
+ return err;
}
static noinline int cpu_check(struct drm_i915_gem_object *obj,
@@ -488,9 +491,10 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj,
unsigned int n, m, needs_flush;
int err;
+ i915_gem_object_lock(obj, NULL);
err = i915_gem_object_prepare_read(obj, &needs_flush);
if (err)
- return err;
+ goto out_unlock;
for (n = 0; n < real_page_count(obj); n++) {
u32 *map;
@@ -527,6 +531,8 @@ out_unmap:
}
i915_gem_object_finish_access(obj);
+out_unlock:
+ i915_gem_object_unlock(obj);
return err;
}
@@ -887,24 +893,15 @@ out_file:
return err;
}
-static struct i915_vma *rpcs_query_batch(struct i915_vma *vma)
+static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, struct i915_vma *vma)
{
- struct drm_i915_gem_object *obj;
u32 *cmd;
- int err;
- if (INTEL_GEN(vma->vm->i915) < 8)
- return ERR_PTR(-EINVAL);
+ GEM_BUG_ON(INTEL_GEN(vma->vm->i915) < 8);
- obj = i915_gem_object_create_internal(vma->vm->i915, PAGE_SIZE);
- if (IS_ERR(obj))
- return ERR_CAST(obj);
-
- cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
- if (IS_ERR(cmd)) {
- err = PTR_ERR(cmd);
- goto err;
- }
+ cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB);
+ if (IS_ERR(cmd))
+ return PTR_ERR(cmd);
*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
*cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
@@ -912,26 +909,12 @@ static struct i915_vma *rpcs_query_batch(struct i915_vma *vma)
*cmd++ = upper_32_bits(vma->node.start);
*cmd = MI_BATCH_BUFFER_END;
- __i915_gem_object_flush_map(obj, 0, 64);
- i915_gem_object_unpin_map(obj);
+ __i915_gem_object_flush_map(rpcs, 0, 64);
+ i915_gem_object_unpin_map(rpcs);
intel_gt_chipset_flush(vma->vm->gt);
- vma = i915_vma_instance(obj, vma->vm, NULL);
- if (IS_ERR(vma)) {
- err = PTR_ERR(vma);
- goto err;
- }
-
- err = i915_vma_pin(vma, 0, 0, PIN_USER);
- if (err)
- goto err;
-
- return vma;
-
-err:
- i915_gem_object_put(obj);
- return ERR_PTR(err);
+ return 0;
}
static int
@@ -939,52 +922,68 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
struct intel_context *ce,
struct i915_request **rq_out)
{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct i915_request *rq;
+ struct i915_gem_ww_ctx ww;
struct i915_vma *batch;
struct i915_vma *vma;
+ struct drm_i915_gem_object *rpcs;
int err;
GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
+ if (INTEL_GEN(i915) < 8)
+ return -EINVAL;
+
vma = i915_vma_instance(obj, ce->vm, NULL);
if (IS_ERR(vma))
return PTR_ERR(vma);
- i915_gem_object_lock(obj);
- err = i915_gem_object_set_to_gtt_domain(obj, false);
- i915_gem_object_unlock(obj);
- if (err)
- return err;
-
- err = i915_vma_pin(vma, 0, 0, PIN_USER);
- if (err)
- return err;
+ rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE);
+ if (IS_ERR(rpcs))
+ return PTR_ERR(rpcs);
- batch = rpcs_query_batch(vma);
+ batch = i915_vma_instance(rpcs, ce->vm, NULL);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
- goto err_vma;
+ goto err_put;
}
+ i915_gem_ww_ctx_init(&ww, false);
+retry:
+ err = i915_gem_object_lock(obj, &ww);
+ if (!err)
+ err = i915_gem_object_lock(rpcs, &ww);
+ if (!err)
+ err = i915_gem_object_set_to_gtt_domain(obj, false);
+ if (!err)
+ err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
+ if (err)
+ goto err_put;
+
+ err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER);
+ if (err)
+ goto err_vma;
+
+ err = rpcs_query_batch(rpcs, vma);
+ if (err)
+ goto err_batch;
+
rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_batch;
}
- i915_vma_lock(batch);
err = i915_request_await_object(rq, batch->obj, false);
if (err == 0)
err = i915_vma_move_to_active(batch, rq, 0);
- i915_vma_unlock(batch);
if (err)
goto skip_request;
- i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, true);
if (err == 0)
err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
- i915_vma_unlock(vma);
if (err)
goto skip_request;
@@ -1000,23 +999,24 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
if (err)
goto skip_request;
- i915_vma_unpin_and_release(&batch, 0);
- i915_vma_unpin(vma);
-
*rq_out = i915_request_get(rq);
- i915_request_add(rq);
-
- return 0;
-
skip_request:
- i915_request_set_error_once(rq, err);
+ if (err)
+ i915_request_set_error_once(rq, err);
i915_request_add(rq);
err_batch:
- i915_vma_unpin_and_release(&batch, 0);
+ i915_vma_unpin(batch);
err_vma:
i915_vma_unpin(vma);
-
+err_put:
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
+ i915_gem_object_put(rpcs);
return err;
}
@@ -1709,7 +1709,7 @@ static int read_from_scratch(struct i915_gem_context *ctx,
i915_request_add(rq);
- i915_gem_object_lock(obj);
+ i915_gem_object_lock(obj, NULL);
err = i915_gem_object_set_to_cpu_domain(obj, false);
i915_gem_object_unlock(obj);
if (err)
@@ -1748,7 +1748,7 @@ static int check_scratch_page(struct i915_gem_context *ctx, u32 *out)
if (!vm)
return -ENODEV;
- page = vm->scratch[0].base.page;
+ page = __px_page(vm->scratch[0]);
if (!page) {
pr_err("No scratch page!\n");
return -EINVAL;
@@ -1914,8 +1914,8 @@ static int mock_context_barrier(void *arg)
return -ENOMEM;
counter = 0;
- err = context_barrier_task(ctx, 0,
- NULL, NULL, mock_barrier_task, &counter);
+ err = context_barrier_task(ctx, 0, NULL, NULL, NULL,
+ mock_barrier_task, &counter);
if (err) {
pr_err("Failed at line %d, err=%d\n", __LINE__, err);
goto out;
@@ -1927,11 +1927,8 @@ static int mock_context_barrier(void *arg)
}
counter = 0;
- err = context_barrier_task(ctx, ALL_ENGINES,
- skip_unused_engines,
- NULL,
- mock_barrier_task,
- &counter);
+ err = context_barrier_task(ctx, ALL_ENGINES, skip_unused_engines,
+ NULL, NULL, mock_barrier_task, &counter);
if (err) {
pr_err("Failed at line %d, err=%d\n", __LINE__, err);
goto out;
@@ -1951,8 +1948,8 @@ static int mock_context_barrier(void *arg)
counter = 0;
context_barrier_inject_fault = BIT(RCS0);
- err = context_barrier_task(ctx, ALL_ENGINES,
- NULL, NULL, mock_barrier_task, &counter);
+ err = context_barrier_task(ctx, ALL_ENGINES, NULL, NULL, NULL,
+ mock_barrier_task, &counter);
context_barrier_inject_fault = 0;
if (err == -ENXIO)
err = 0;
@@ -1966,11 +1963,8 @@ static int mock_context_barrier(void *arg)
goto out;
counter = 0;
- err = context_barrier_task(ctx, ALL_ENGINES,
- skip_unused_engines,
- NULL,
- mock_barrier_task,
- &counter);
+ err = context_barrier_task(ctx, ALL_ENGINES, skip_unused_engines,
+ NULL, NULL, mock_barrier_task, &counter);
if (err) {
pr_err("Failed at line %d, err=%d\n", __LINE__, err);
goto out;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
index a49016f8ee0d..e1d50a5a1477 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
@@ -32,46 +32,39 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
if (IS_ERR(vma))
return PTR_ERR(vma);
- err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
+ err = i915_gem_object_lock(obj, &eb->ww);
+ if (err)
+ return err;
+
+ err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, PIN_USER | PIN_HIGH);
if (err)
return err;
/* 8-Byte aligned */
- if (!__reloc_entry_gpu(eb, vma,
- offsets[0] * sizeof(u32),
- 0)) {
- err = -EIO;
- goto unpin_vma;
- }
+ err = __reloc_entry_gpu(eb, vma, offsets[0] * sizeof(u32), 0);
+ if (err <= 0)
+ goto reloc_err;
/* !8-Byte aligned */
- if (!__reloc_entry_gpu(eb, vma,
- offsets[1] * sizeof(u32),
- 1)) {
- err = -EIO;
- goto unpin_vma;
- }
+ err = __reloc_entry_gpu(eb, vma, offsets[1] * sizeof(u32), 1);
+ if (err <= 0)
+ goto reloc_err;
/* Skip to the end of the cmd page */
- i = PAGE_SIZE / sizeof(u32) - RELOC_TAIL - 1;
+ i = PAGE_SIZE / sizeof(u32) - 1;
i -= eb->reloc_cache.rq_size;
memset32(eb->reloc_cache.rq_cmd + eb->reloc_cache.rq_size,
MI_NOOP, i);
eb->reloc_cache.rq_size += i;
- /* Force batch chaining */
- if (!__reloc_entry_gpu(eb, vma,
- offsets[2] * sizeof(u32),
- 2)) {
- err = -EIO;
- goto unpin_vma;
- }
+ /* Force next batch */
+ err = __reloc_entry_gpu(eb, vma, offsets[2] * sizeof(u32), 2);
+ if (err <= 0)
+ goto reloc_err;
GEM_BUG_ON(!eb->reloc_cache.rq);
rq = i915_request_get(eb->reloc_cache.rq);
- err = reloc_gpu_flush(&eb->reloc_cache);
- if (err)
- goto put_rq;
+ reloc_gpu_flush(eb, &eb->reloc_cache);
GEM_BUG_ON(eb->reloc_cache.rq);
err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, HZ / 2);
@@ -103,6 +96,11 @@ put_rq:
unpin_vma:
i915_vma_unpin(vma);
return err;
+
+reloc_err:
+ if (!err)
+ err = -EIO;
+ goto unpin_vma;
}
static int igt_gpu_reloc(void *arg)
@@ -124,6 +122,8 @@ static int igt_gpu_reloc(void *arg)
goto err_scratch;
}
+ intel_gt_pm_get(&eb.i915->gt);
+
for_each_uabi_engine(eb.engine, eb.i915) {
reloc_cache_init(&eb.reloc_cache, eb.i915);
memset(map, POISON_INUSE, 4096);
@@ -134,15 +134,29 @@ static int igt_gpu_reloc(void *arg)
err = PTR_ERR(eb.context);
goto err_pm;
}
+ eb.reloc_pool = NULL;
+ eb.reloc_context = NULL;
- err = intel_context_pin(eb.context);
- if (err)
- goto err_put;
+ i915_gem_ww_ctx_init(&eb.ww, false);
+retry:
+ err = intel_context_pin_ww(eb.context, &eb.ww);
+ if (!err) {
+ err = __igt_gpu_reloc(&eb, scratch);
+
+ intel_context_unpin(eb.context);
+ }
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&eb.ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&eb.ww);
- err = __igt_gpu_reloc(&eb, scratch);
+ if (eb.reloc_pool)
+ intel_gt_buffer_pool_put(eb.reloc_pool);
+ if (eb.reloc_context)
+ intel_context_put(eb.reloc_context);
- intel_context_unpin(eb.context);
-err_put:
intel_context_put(eb.context);
err_pm:
intel_engine_pm_put(eb.engine);
@@ -153,6 +167,7 @@ err_pm:
if (igt_flush_test(eb.i915))
err = -EIO;
+ intel_gt_pm_put(&eb.i915->gt);
err_scratch:
i915_gem_object_put(scratch);
return err;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 9c7402ce5bf9..d27d87a678c8 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -103,7 +103,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
- i915_gem_object_lock(obj);
+ i915_gem_object_lock(obj, NULL);
err = i915_gem_object_set_to_gtt_domain(obj, true);
i915_gem_object_unlock(obj);
if (err) {
@@ -188,7 +188,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj,
GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
- i915_gem_object_lock(obj);
+ i915_gem_object_lock(obj, NULL);
err = i915_gem_object_set_to_gtt_domain(obj, true);
i915_gem_object_unlock(obj);
if (err) {
@@ -528,31 +528,42 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
for_each_uabi_engine(engine, i915) {
struct i915_request *rq;
struct i915_vma *vma;
+ struct i915_gem_ww_ctx ww;
int err;
vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
if (IS_ERR(vma))
return PTR_ERR(vma);
- err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ i915_gem_ww_ctx_init(&ww, false);
+retry:
+ err = i915_gem_object_lock(obj, &ww);
+ if (!err)
+ err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
if (err)
- return err;
+ goto err;
rq = intel_engine_create_kernel_request(engine);
if (IS_ERR(rq)) {
- i915_vma_unpin(vma);
- return PTR_ERR(rq);
+ err = PTR_ERR(rq);
+ goto err_unpin;
}
- i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, true);
if (err == 0)
err = i915_vma_move_to_active(vma, rq,
EXEC_OBJECT_WRITE);
- i915_vma_unlock(vma);
i915_request_add(rq);
+err_unpin:
i915_vma_unpin(vma);
+err:
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
if (err)
return err;
}
@@ -1123,6 +1134,7 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
for_each_uabi_engine(engine, i915) {
struct i915_request *rq;
struct i915_vma *vma;
+ struct i915_gem_ww_ctx ww;
vma = i915_vma_instance(obj, engine->kernel_context->vm, NULL);
if (IS_ERR(vma)) {
@@ -1130,9 +1142,13 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
goto out_unmap;
}
- err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ i915_gem_ww_ctx_init(&ww, false);
+retry:
+ err = i915_gem_object_lock(obj, &ww);
+ if (!err)
+ err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
if (err)
- goto out_unmap;
+ goto out_ww;
rq = i915_request_create(engine->kernel_context);
if (IS_ERR(rq)) {
@@ -1140,11 +1156,9 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
goto out_unpin;
}
- i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, false);
if (err == 0)
err = i915_vma_move_to_active(vma, rq, 0);
- i915_vma_unlock(vma);
err = engine->emit_bb_start(rq, vma->node.start, 0, 0);
i915_request_get(rq);
@@ -1166,6 +1180,13 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
out_unpin:
i915_vma_unpin(vma);
+out_ww:
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
if (err)
goto out_unmap;
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
index 34932871b3a5..a94243dc4c5c 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
@@ -44,7 +44,7 @@ static int mock_phys_object(void *arg)
}
/* Make the object dirty so that put_pages must do copy back the data */
- i915_gem_object_lock(obj);
+ i915_gem_object_lock(obj, NULL);
err = i915_gem_object_set_to_gtt_domain(obj, true);
i915_gem_object_unlock(obj);
if (err) {
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index cdc0b9c54305..fd0d24d28763 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -16,8 +16,10 @@ static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt,
const unsigned int pde,
const struct i915_page_table *pt)
{
+ dma_addr_t addr = pt ? px_dma(pt) : px_dma(ppgtt->base.vm.scratch[1]);
+
/* Caller needs to make sure the write completes if necessary */
- iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
+ iowrite32(GEN6_PDE_ADDR_ENCODE(addr) | GEN6_PDE_VALID,
ppgtt->pd_addr + pde);
}
@@ -79,7 +81,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
{
struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
const unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
- const gen6_pte_t scratch_pte = vm->scratch[0].encode;
+ const gen6_pte_t scratch_pte = vm->scratch[0]->encode;
unsigned int pde = first_entry / GEN6_PTES;
unsigned int pte = first_entry % GEN6_PTES;
unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
@@ -90,8 +92,6 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
const unsigned int count = min(num_entries, GEN6_PTES - pte);
gen6_pte_t *vaddr;
- GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1]));
-
num_entries -= count;
GEM_BUG_ON(count > atomic_read(&pt->used));
@@ -127,7 +127,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
struct sgt_dma iter = sgt_dma(vma);
gen6_pte_t *vaddr;
- GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch[1]);
+ GEM_BUG_ON(!pd->entry[act_pt]);
vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt));
do {
@@ -177,39 +177,36 @@ static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end)
mutex_unlock(&ppgtt->flush);
}
-static int gen6_alloc_va_range(struct i915_address_space *vm,
- u64 start, u64 length)
+static void gen6_alloc_va_range(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ u64 start, u64 length)
{
struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
struct i915_page_directory * const pd = ppgtt->base.pd;
- struct i915_page_table *pt, *alloc = NULL;
+ struct i915_page_table *pt;
bool flush = false;
u64 from = start;
unsigned int pde;
- int ret = 0;
spin_lock(&pd->lock);
gen6_for_each_pde(pt, pd, start, length, pde) {
const unsigned int count = gen6_pte_count(start, length);
- if (px_base(pt) == px_base(&vm->scratch[1])) {
+ if (!pt) {
spin_unlock(&pd->lock);
- pt = fetch_and_zero(&alloc);
- if (!pt)
- pt = alloc_pt(vm);
- if (IS_ERR(pt)) {
- ret = PTR_ERR(pt);
- goto unwind_out;
- }
+ pt = stash->pt[0];
+ __i915_gem_object_pin_pages(pt->base);
+ i915_gem_object_make_unshrinkable(pt->base);
- fill32_px(pt, vm->scratch[0].encode);
+ fill32_px(pt, vm->scratch[0]->encode);
spin_lock(&pd->lock);
- if (pd->entry[pde] == &vm->scratch[1]) {
+ if (!pd->entry[pde]) {
+ stash->pt[0] = pt->stash;
+ atomic_set(&pt->used, 0);
pd->entry[pde] = pt;
} else {
- alloc = pt;
pt = pd->entry[pde];
}
@@ -226,38 +223,32 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
with_intel_runtime_pm(&vm->i915->runtime_pm, wakeref)
gen6_flush_pd(ppgtt, from, start);
}
-
- goto out;
-
-unwind_out:
- gen6_ppgtt_clear_range(vm, from, start - from);
-out:
- if (alloc)
- free_px(vm, alloc);
- return ret;
}
static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
{
struct i915_address_space * const vm = &ppgtt->base.vm;
- struct i915_page_directory * const pd = ppgtt->base.pd;
int ret;
- ret = setup_scratch_page(vm, __GFP_HIGHMEM);
+ ret = setup_scratch_page(vm);
if (ret)
return ret;
- vm->scratch[0].encode =
- vm->pte_encode(px_dma(&vm->scratch[0]),
+ vm->scratch[0]->encode =
+ vm->pte_encode(px_dma(vm->scratch[0]),
I915_CACHE_NONE, PTE_READ_ONLY);
- if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[1])))) {
- cleanup_scratch_page(vm);
- return -ENOMEM;
+ vm->scratch[1] = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
+ if (IS_ERR(vm->scratch[1]))
+ return PTR_ERR(vm->scratch[1]);
+
+ ret = pin_pt_dma(vm, vm->scratch[1]);
+ if (ret) {
+ i915_gem_object_put(vm->scratch[1]);
+ return ret;
}
- fill32_px(&vm->scratch[1], vm->scratch[0].encode);
- memset_p(pd->entry, &vm->scratch[1], I915_PDES);
+ fill32_px(vm->scratch[1], vm->scratch[0]->encode);
return 0;
}
@@ -265,14 +256,12 @@ static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
{
struct i915_page_directory * const pd = ppgtt->base.pd;
- struct i915_page_dma * const scratch =
- px_base(&ppgtt->base.vm.scratch[1]);
struct i915_page_table *pt;
u32 pde;
gen6_for_all_pdes(pt, pd, pde)
- if (px_base(pt) != scratch)
- free_px(&ppgtt->base.vm, pt);
+ if (pt)
+ free_pt(&ppgtt->base.vm, pt);
}
static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
@@ -286,7 +275,8 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
mutex_destroy(&ppgtt->flush);
mutex_destroy(&ppgtt->pin_mutex);
- kfree(ppgtt->base.pd);
+
+ free_pd(&ppgtt->base.vm, ppgtt->base.pd);
}
static int pd_vma_set_pages(struct i915_vma *vma)
@@ -302,28 +292,26 @@ static void pd_vma_clear_pages(struct i915_vma *vma)
vma->pages = NULL;
}
-static int pd_vma_bind(struct i915_address_space *vm,
- struct i915_vma *vma,
- enum i915_cache_level cache_level,
- u32 unused)
+static void pd_vma_bind(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 unused)
{
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
struct gen6_ppgtt *ppgtt = vma->private;
u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE;
- px_base(ppgtt->base.pd)->ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
+ ppgtt->pp_dir = ggtt_offset * sizeof(gen6_pte_t) << 10;
ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total);
- return 0;
}
static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma)
{
struct gen6_ppgtt *ppgtt = vma->private;
struct i915_page_directory * const pd = ppgtt->base.pd;
- struct i915_page_dma * const scratch =
- px_base(&ppgtt->base.vm.scratch[1]);
struct i915_page_table *pt;
unsigned int pde;
@@ -332,11 +320,11 @@ static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma)
/* Free all no longer used page tables */
gen6_for_all_pdes(pt, ppgtt->base.pd, pde) {
- if (px_base(pt) == scratch || atomic_read(&pt->used))
+ if (!pt || atomic_read(&pt->used))
continue;
- free_px(&ppgtt->base.vm, pt);
- pd->entry[pde] = scratch;
+ free_pt(&ppgtt->base.vm, pt);
+ pd->entry[pde] = NULL;
}
ppgtt->scan_for_unused_pt = false;
@@ -380,7 +368,7 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
return vma;
}
-int gen6_ppgtt_pin(struct i915_ppgtt *base)
+int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww)
{
struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
int err;
@@ -406,7 +394,7 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base)
*/
err = 0;
if (!atomic_read(&ppgtt->pin_count))
- err = i915_ggtt_pin(ppgtt->vma, GEN6_PD_ALIGN, PIN_HIGH);
+ err = i915_ggtt_pin(ppgtt->vma, ww, GEN6_PD_ALIGN, PIN_HIGH);
if (!err)
atomic_inc(&ppgtt->pin_count);
mutex_unlock(&ppgtt->pin_mutex);
@@ -448,6 +436,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
mutex_init(&ppgtt->pin_mutex);
ppgtt_init(&ppgtt->base, gt);
+ ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t));
ppgtt->base.vm.top = 1;
ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND;
@@ -456,9 +445,10 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
+ ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma;
ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
- ppgtt->base.pd = __alloc_pd(sizeof(*ppgtt->base.pd));
+ ppgtt->base.pd = __alloc_pd(I915_PDES);
if (!ppgtt->base.pd) {
err = -ENOMEM;
goto err_free;
@@ -479,7 +469,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
err_scratch:
free_scratch(&ppgtt->base.vm);
err_pd:
- kfree(ppgtt->base.pd);
+ free_pd(&ppgtt->base.vm, ppgtt->base.pd);
err_free:
mutex_destroy(&ppgtt->pin_mutex);
kfree(ppgtt);
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
index 72e481806c96..3357228f3304 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
@@ -8,12 +8,15 @@
#include "intel_gtt.h"
+struct i915_gem_ww_ctx;
+
struct gen6_ppgtt {
struct i915_ppgtt base;
struct mutex flush;
struct i915_vma *vma;
gen6_pte_t __iomem *pd_addr;
+ u32 pp_dir;
atomic_t pin_count;
struct mutex pin_mutex;
@@ -66,7 +69,7 @@ static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base)
(pt = i915_pt_entry(pd, iter), true); \
++iter)
-int gen6_ppgtt_pin(struct i915_ppgtt *base);
+int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww);
void gen6_ppgtt_unpin(struct i915_ppgtt *base);
void gen6_ppgtt_unpin_all(struct i915_ppgtt *base);
void gen6_ppgtt_enable(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 699125928272..eb64f474a78c 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -181,7 +181,7 @@ static void __gen8_ppgtt_cleanup(struct i915_address_space *vm,
} while (pde++, --count);
}
- free_px(vm, pd);
+ free_px(vm, &pd->pt, lvl);
}
static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
@@ -199,7 +199,7 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
struct i915_page_directory * const pd,
u64 start, const u64 end, int lvl)
{
- const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
+ const struct drm_i915_gem_object * const scratch = vm->scratch[lvl];
unsigned int idx, len;
GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
@@ -239,7 +239,7 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
vaddr = kmap_atomic_px(pt);
memset64(vaddr + gen8_pd_index(start, 0),
- vm->scratch[0].encode,
+ vm->scratch[0]->encode,
count);
kunmap_atomic(vaddr);
@@ -248,7 +248,7 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
}
if (release_pd_entry(pd, idx, pt, scratch))
- free_px(vm, pt);
+ free_px(vm, pt, lvl);
} while (idx++, --len);
return start;
@@ -269,14 +269,12 @@ static void gen8_ppgtt_clear(struct i915_address_space *vm,
start, start + length, vm->top);
}
-static int __gen8_ppgtt_alloc(struct i915_address_space * const vm,
- struct i915_page_directory * const pd,
- u64 * const start, const u64 end, int lvl)
+static void __gen8_ppgtt_alloc(struct i915_address_space * const vm,
+ struct i915_vm_pt_stash *stash,
+ struct i915_page_directory * const pd,
+ u64 * const start, const u64 end, int lvl)
{
- const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
- struct i915_page_table *alloc = NULL;
unsigned int idx, len;
- int ret = 0;
GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
@@ -297,49 +295,31 @@ static int __gen8_ppgtt_alloc(struct i915_address_space * const vm,
DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
__func__, vm, lvl + 1, idx);
- pt = fetch_and_zero(&alloc);
- if (lvl) {
- if (!pt) {
- pt = &alloc_pd(vm)->pt;
- if (IS_ERR(pt)) {
- ret = PTR_ERR(pt);
- goto out;
- }
- }
-
- fill_px(pt, vm->scratch[lvl].encode);
- } else {
- if (!pt) {
- pt = alloc_pt(vm);
- if (IS_ERR(pt)) {
- ret = PTR_ERR(pt);
- goto out;
- }
- }
-
- if (intel_vgpu_active(vm->i915) ||
- gen8_pt_count(*start, end) < I915_PDES)
- fill_px(pt, vm->scratch[lvl].encode);
- }
+ pt = stash->pt[!!lvl];
+ __i915_gem_object_pin_pages(pt->base);
+ i915_gem_object_make_unshrinkable(pt->base);
+
+ if (lvl ||
+ gen8_pt_count(*start, end) < I915_PDES ||
+ intel_vgpu_active(vm->i915))
+ fill_px(pt, vm->scratch[lvl]->encode);
spin_lock(&pd->lock);
- if (likely(!pd->entry[idx]))
+ if (likely(!pd->entry[idx])) {
+ stash->pt[!!lvl] = pt->stash;
+ atomic_set(&pt->used, 0);
set_pd_entry(pd, idx, pt);
- else
- alloc = pt, pt = pd->entry[idx];
+ } else {
+ pt = pd->entry[idx];
+ }
}
if (lvl) {
atomic_inc(&pt->used);
spin_unlock(&pd->lock);
- ret = __gen8_ppgtt_alloc(vm, as_pd(pt),
- start, end, lvl);
- if (unlikely(ret)) {
- if (release_pd_entry(pd, idx, pt, scratch))
- free_px(vm, pt);
- goto out;
- }
+ __gen8_ppgtt_alloc(vm, stash,
+ as_pd(pt), start, end, lvl);
spin_lock(&pd->lock);
atomic_dec(&pt->used);
@@ -359,18 +339,12 @@ static int __gen8_ppgtt_alloc(struct i915_address_space * const vm,
}
} while (idx++, --len);
spin_unlock(&pd->lock);
-out:
- if (alloc)
- free_px(vm, alloc);
- return ret;
}
-static int gen8_ppgtt_alloc(struct i915_address_space *vm,
- u64 start, u64 length)
+static void gen8_ppgtt_alloc(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ u64 start, u64 length)
{
- u64 from;
- int err;
-
GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
GEM_BUG_ON(range_overflows(start, length, vm->total));
@@ -378,25 +352,9 @@ static int gen8_ppgtt_alloc(struct i915_address_space *vm,
start >>= GEN8_PTE_SHIFT;
length >>= GEN8_PTE_SHIFT;
GEM_BUG_ON(length == 0);
- from = start;
- err = __gen8_ppgtt_alloc(vm, i915_vm_to_ppgtt(vm)->pd,
- &start, start + length, vm->top);
- if (unlikely(err && from != start))
- __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
- from, start, vm->top);
-
- return err;
-}
-
-static __always_inline void
-write_pte(gen8_pte_t *pte, const gen8_pte_t val)
-{
- /* Magic delays? Or can we refine these to flush all in one pass? */
- *pte = val;
- wmb(); /* cpu to cache */
- clflush(pte); /* cache to memory */
- wmb(); /* visible to all */
+ __gen8_ppgtt_alloc(vm, stash, i915_vm_to_ppgtt(vm)->pd,
+ &start, start + length, vm->top);
}
static __always_inline u64
@@ -415,8 +373,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
do {
GEM_BUG_ON(iter->sg->length < I915_GTT_PAGE_SIZE);
- write_pte(&vaddr[gen8_pd_index(idx, 0)],
- pte_encode | iter->dma);
+ vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
iter->dma += I915_GTT_PAGE_SIZE;
if (iter->dma >= iter->max) {
@@ -439,10 +396,12 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
pd = pdp->entry[gen8_pd_index(idx, 2)];
}
+ clflush_cache_range(vaddr, PAGE_SIZE);
kunmap_atomic(vaddr);
vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
}
} while (1);
+ clflush_cache_range(vaddr, PAGE_SIZE);
kunmap_atomic(vaddr);
return idx;
@@ -498,7 +457,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
do {
GEM_BUG_ON(iter->sg->length < page_size);
- write_pte(&vaddr[index++], encode | iter->dma);
+ vaddr[index++] = encode | iter->dma;
start += page_size;
iter->dma += page_size;
@@ -523,6 +482,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
}
} while (rem >= page_size && index < I915_PDES);
+ clflush_cache_range(vaddr, PAGE_SIZE);
kunmap_atomic(vaddr);
/*
@@ -554,7 +514,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) {
u16 i;
- encode = vma->vm->scratch[0].encode;
+ encode = vma->vm->scratch[0]->encode;
vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K));
for (i = 1; i < index; i += 16)
@@ -608,27 +568,37 @@ static int gen8_init_scratch(struct i915_address_space *vm)
GEM_BUG_ON(!clone->has_read_only);
vm->scratch_order = clone->scratch_order;
- memcpy(vm->scratch, clone->scratch, sizeof(vm->scratch));
- px_dma(&vm->scratch[0]) = 0; /* no xfer of ownership */
+ for (i = 0; i <= vm->top; i++)
+ vm->scratch[i] = i915_gem_object_get(clone->scratch[i]);
+
return 0;
}
- ret = setup_scratch_page(vm, __GFP_HIGHMEM);
+ ret = setup_scratch_page(vm);
if (ret)
return ret;
- vm->scratch[0].encode =
- gen8_pte_encode(px_dma(&vm->scratch[0]),
+ vm->scratch[0]->encode =
+ gen8_pte_encode(px_dma(vm->scratch[0]),
I915_CACHE_LLC, vm->has_read_only);
for (i = 1; i <= vm->top; i++) {
- if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[i]))))
+ struct drm_i915_gem_object *obj;
+
+ obj = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
+ if (IS_ERR(obj))
+ goto free_scratch;
+
+ ret = pin_pt_dma(vm, obj);
+ if (ret) {
+ i915_gem_object_put(obj);
goto free_scratch;
+ }
- fill_px(&vm->scratch[i], vm->scratch[i - 1].encode);
- vm->scratch[i].encode =
- gen8_pde_encode(px_dma(&vm->scratch[i]),
- I915_CACHE_LLC);
+ fill_px(obj, vm->scratch[i - 1]->encode);
+ obj->encode = gen8_pde_encode(px_dma(obj), I915_CACHE_LLC);
+
+ vm->scratch[i] = obj;
}
return 0;
@@ -649,12 +619,20 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) {
struct i915_page_directory *pde;
+ int err;
pde = alloc_pd(vm);
if (IS_ERR(pde))
return PTR_ERR(pde);
- fill_px(pde, vm->scratch[1].encode);
+ err = pin_pt_dma(vm, pde->pt.base);
+ if (err) {
+ i915_gem_object_put(pde->pt.base);
+ free_pd(vm, pde);
+ return err;
+ }
+
+ fill_px(pde, vm->scratch[1]->encode);
set_pd_entry(pd, idx, pde);
atomic_inc(px_used(pde)); /* keep pinned */
}
@@ -668,21 +646,32 @@ gen8_alloc_top_pd(struct i915_address_space *vm)
{
const unsigned int count = gen8_pd_top_count(vm);
struct i915_page_directory *pd;
+ int err;
- GEM_BUG_ON(count > ARRAY_SIZE(pd->entry));
+ GEM_BUG_ON(count > I915_PDES);
- pd = __alloc_pd(offsetof(typeof(*pd), entry[count]));
+ pd = __alloc_pd(count);
if (unlikely(!pd))
return ERR_PTR(-ENOMEM);
- if (unlikely(setup_page_dma(vm, px_base(pd)))) {
- kfree(pd);
- return ERR_PTR(-ENOMEM);
+ pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
+ if (IS_ERR(pd->pt.base)) {
+ err = PTR_ERR(pd->pt.base);
+ pd->pt.base = NULL;
+ goto err_pd;
}
- fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count);
+ err = pin_pt_dma(vm, pd->pt.base);
+ if (err)
+ goto err_pd;
+
+ fill_page_dma(px_base(pd), vm->scratch[vm->top]->encode, count);
atomic_inc(px_used(pd)); /* mark as pinned */
return pd;
+
+err_pd:
+ free_pd(vm, pd);
+ return ERR_PTR(err);
}
/*
@@ -703,6 +692,7 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
ppgtt_init(ppgtt, gt);
ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
+ ppgtt->vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen8_pte_t));
/*
* From bdw, there is hw support for read-only pages in the PPGTT.
@@ -714,12 +704,7 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
*/
ppgtt->vm.has_read_only = !IS_GEN_RANGE(gt->i915, 11, 12);
- /*
- * There are only few exceptions for gen >=6. chv and bxt.
- * And we are not sure about the latter so play safe for now.
- */
- if (IS_CHERRYVIEW(gt->i915) || IS_BROXTON(gt->i915))
- ppgtt->vm.pt_kmap_wc = true;
+ ppgtt->vm.alloc_pt_dma = alloc_pt_dma;
err = gen8_init_scratch(&ppgtt->vm);
if (err)
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 91786310c114..d8b206e53660 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -28,6 +28,8 @@
#include "i915_drv.h"
#include "i915_trace.h"
+#include "intel_breadcrumbs.h"
+#include "intel_context.h"
#include "intel_gt_pm.h"
#include "intel_gt_requests.h"
@@ -53,33 +55,65 @@ static void irq_disable(struct intel_engine_cs *engine)
spin_unlock(&engine->gt->irq_lock);
}
-static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
+static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
{
- struct intel_engine_cs *engine =
- container_of(b, struct intel_engine_cs, breadcrumbs);
+ lockdep_assert_held(&b->irq_lock);
+
+ if (!b->irq_engine || b->irq_armed)
+ return;
+
+ if (!intel_gt_pm_get_if_awake(b->irq_engine->gt))
+ return;
+
+ /*
+ * The breadcrumb irq will be disarmed on the interrupt after the
+ * waiters are signaled. This gives us a single interrupt window in
+ * which we can add a new waiter and avoid the cost of re-enabling
+ * the irq.
+ */
+ WRITE_ONCE(b->irq_armed, true);
+
+ /*
+ * Since we are waiting on a request, the GPU should be busy
+ * and should have its own rpm reference. This is tracked
+ * by i915->gt.awake, we can forgo holding our own wakref
+ * for the interrupt as before i915->gt.awake is released (when
+ * the driver is idle) we disarm the breadcrumbs.
+ */
+ if (!b->irq_enabled++)
+ irq_enable(b->irq_engine);
+}
+
+static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
+{
lockdep_assert_held(&b->irq_lock);
+ if (!b->irq_engine || !b->irq_armed)
+ return;
+
GEM_BUG_ON(!b->irq_enabled);
if (!--b->irq_enabled)
- irq_disable(engine);
+ irq_disable(b->irq_engine);
WRITE_ONCE(b->irq_armed, false);
- intel_gt_pm_put_async(engine->gt);
+ intel_gt_pm_put_async(b->irq_engine->gt);
}
-void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
+static void add_signaling_context(struct intel_breadcrumbs *b,
+ struct intel_context *ce)
{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
- unsigned long flags;
-
- if (!READ_ONCE(b->irq_armed))
- return;
+ intel_context_get(ce);
+ list_add_tail(&ce->signal_link, &b->signalers);
+ if (list_is_first(&ce->signal_link, &b->signalers))
+ __intel_breadcrumbs_arm_irq(b);
+}
- spin_lock_irqsave(&b->irq_lock, flags);
- if (b->irq_armed)
- __intel_breadcrumbs_disarm_irq(b);
- spin_unlock_irqrestore(&b->irq_lock, flags);
+static void remove_signaling_context(struct intel_breadcrumbs *b,
+ struct intel_context *ce)
+{
+ list_del(&ce->signal_link);
+ intel_context_put(ce);
}
static inline bool __request_completed(const struct i915_request *rq)
@@ -90,6 +124,9 @@ static inline bool __request_completed(const struct i915_request *rq)
__maybe_unused static bool
check_signal_order(struct intel_context *ce, struct i915_request *rq)
{
+ if (rq->context != ce)
+ return false;
+
if (!list_is_last(&rq->signal_link, &ce->signals) &&
i915_seqno_passed(rq->fence.seqno,
list_next_entry(rq, signal_link)->fence.seqno))
@@ -133,25 +170,21 @@ __dma_fence_signal__notify(struct dma_fence *fence,
static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
{
- struct intel_engine_cs *engine =
- container_of(b, struct intel_engine_cs, breadcrumbs);
-
- if (unlikely(intel_engine_is_virtual(engine)))
- engine = intel_virtual_engine_get_sibling(engine, 0);
-
- intel_engine_add_retire(engine, tl);
+ if (b->irq_engine)
+ intel_engine_add_retire(b->irq_engine, tl);
}
-static void __signal_request(struct i915_request *rq, struct list_head *signals)
+static bool __signal_request(struct i915_request *rq, struct list_head *signals)
{
- GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
- if (!__dma_fence_signal(&rq->fence))
- return;
+ if (!__dma_fence_signal(&rq->fence)) {
+ i915_request_put(rq);
+ return false;
+ }
- i915_request_get(rq);
list_add_tail(&rq->signal_link, signals);
+ return true;
}
static void signal_irq_work(struct irq_work *work)
@@ -164,7 +197,7 @@ static void signal_irq_work(struct irq_work *work)
spin_lock(&b->irq_lock);
- if (b->irq_armed && list_empty(&b->signalers))
+ if (list_empty(&b->signalers))
__intel_breadcrumbs_disarm_irq(b);
list_splice_init(&b->signaled_requests, &signal);
@@ -197,8 +230,8 @@ static void signal_irq_work(struct irq_work *work)
/* Advance the list to the first incomplete request */
__list_del_many(&ce->signals, pos);
if (&ce->signals == pos) { /* now empty */
- list_del_init(&ce->signal_link);
add_retire(b, ce->timeline);
+ remove_signaling_context(b, ce);
}
}
}
@@ -220,116 +253,89 @@ static void signal_irq_work(struct irq_work *work)
}
}
-static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
+struct intel_breadcrumbs *
+intel_breadcrumbs_create(struct intel_engine_cs *irq_engine)
{
- struct intel_engine_cs *engine =
- container_of(b, struct intel_engine_cs, breadcrumbs);
-
- lockdep_assert_held(&b->irq_lock);
- if (b->irq_armed)
- return true;
-
- if (!intel_gt_pm_get_if_awake(engine->gt))
- return false;
-
- /*
- * The breadcrumb irq will be disarmed on the interrupt after the
- * waiters are signaled. This gives us a single interrupt window in
- * which we can add a new waiter and avoid the cost of re-enabling
- * the irq.
- */
- WRITE_ONCE(b->irq_armed, true);
-
- /*
- * Since we are waiting on a request, the GPU should be busy
- * and should have its own rpm reference. This is tracked
- * by i915->gt.awake, we can forgo holding our own wakref
- * for the interrupt as before i915->gt.awake is released (when
- * the driver is idle) we disarm the breadcrumbs.
- */
-
- if (!b->irq_enabled++)
- irq_enable(engine);
+ struct intel_breadcrumbs *b;
- return true;
-}
-
-void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
-{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
+ b = kzalloc(sizeof(*b), GFP_KERNEL);
+ if (!b)
+ return NULL;
spin_lock_init(&b->irq_lock);
INIT_LIST_HEAD(&b->signalers);
INIT_LIST_HEAD(&b->signaled_requests);
init_irq_work(&b->irq_work, signal_irq_work);
+
+ b->irq_engine = irq_engine;
+
+ return b;
}
-void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
+void intel_breadcrumbs_reset(struct intel_breadcrumbs *b)
{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
unsigned long flags;
+ if (!b->irq_engine)
+ return;
+
spin_lock_irqsave(&b->irq_lock, flags);
if (b->irq_enabled)
- irq_enable(engine);
+ irq_enable(b->irq_engine);
else
- irq_disable(engine);
+ irq_disable(b->irq_engine);
spin_unlock_irqrestore(&b->irq_lock, flags);
}
-void intel_engine_transfer_stale_breadcrumbs(struct intel_engine_cs *engine,
- struct intel_context *ce)
+void intel_breadcrumbs_park(struct intel_breadcrumbs *b)
{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
unsigned long flags;
- spin_lock_irqsave(&b->irq_lock, flags);
- if (!list_empty(&ce->signals)) {
- struct i915_request *rq, *next;
-
- /* Queue for executing the signal callbacks in the irq_work */
- list_for_each_entry_safe(rq, next, &ce->signals, signal_link) {
- GEM_BUG_ON(rq->engine != engine);
- GEM_BUG_ON(!__request_completed(rq));
-
- __signal_request(rq, &b->signaled_requests);
- }
+ if (!READ_ONCE(b->irq_armed))
+ return;
- INIT_LIST_HEAD(&ce->signals);
- list_del_init(&ce->signal_link);
+ spin_lock_irqsave(&b->irq_lock, flags);
+ __intel_breadcrumbs_disarm_irq(b);
+ spin_unlock_irqrestore(&b->irq_lock, flags);
+ if (!list_empty(&b->signalers))
irq_work_queue(&b->irq_work);
- }
- spin_unlock_irqrestore(&b->irq_lock, flags);
}
-void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
+void intel_breadcrumbs_free(struct intel_breadcrumbs *b)
{
+ kfree(b);
}
-bool i915_request_enable_breadcrumb(struct i915_request *rq)
+static void insert_breadcrumb(struct i915_request *rq,
+ struct intel_breadcrumbs *b)
{
- lockdep_assert_held(&rq->lock);
-
- if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
- return true;
+ struct intel_context *ce = rq->context;
+ struct list_head *pos;
- if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
- struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
- struct intel_context *ce = rq->context;
- struct list_head *pos;
-
- spin_lock(&b->irq_lock);
+ if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
+ return;
- if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
- goto unlock;
+ i915_request_get(rq);
- if (!__intel_breadcrumbs_arm_irq(b))
- goto unlock;
+ /*
+ * If the request is already completed, we can transfer it
+ * straight onto a signaled list, and queue the irq worker for
+ * its signal completion.
+ */
+ if (__request_completed(rq)) {
+ if (__signal_request(rq, &b->signaled_requests))
+ irq_work_queue(&b->irq_work);
+ return;
+ }
+ if (list_empty(&ce->signals)) {
+ add_signaling_context(b, ce);
+ pos = &ce->signals;
+ } else {
/*
* We keep the seqno in retirement order, so we can break
* inside intel_engine_signal_breadcrumbs as soon as we've
@@ -351,24 +357,75 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno))
break;
}
- list_add(&rq->signal_link, pos);
- if (pos == &ce->signals) /* catch transitions from empty list */
- list_move_tail(&ce->signal_link, &b->signalers);
- GEM_BUG_ON(!check_signal_order(ce, rq));
+ }
+ list_add(&rq->signal_link, pos);
+ GEM_BUG_ON(!check_signal_order(ce, rq));
+ set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+
+ /* Check after attaching to irq, interrupt may have already fired. */
+ if (__request_completed(rq))
+ irq_work_queue(&b->irq_work);
+}
- set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
-unlock:
+bool i915_request_enable_breadcrumb(struct i915_request *rq)
+{
+ struct intel_breadcrumbs *b;
+
+ /* Serialises with i915_request_retire() using rq->lock */
+ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
+ return true;
+
+ /*
+ * Peek at i915_request_submit()/i915_request_unsubmit() status.
+ *
+ * If the request is not yet active (and not signaled), we will
+ * attach the breadcrumb later.
+ */
+ if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
+ return true;
+
+ /*
+ * rq->engine is locked by rq->engine->active.lock. That however
+ * is not known until after rq->engine has been dereferenced and
+ * the lock acquired. Hence we acquire the lock and then validate
+ * that rq->engine still matches the lock we hold for it.
+ *
+ * Here, we are using the breadcrumb lock as a proxy for the
+ * rq->engine->active.lock, and we know that since the breadcrumb
+ * will be serialised within i915_request_submit/i915_request_unsubmit,
+ * the engine cannot change while active as long as we hold the
+ * breadcrumb lock on that engine.
+ *
+ * From the dma_fence_enable_signaling() path, we are outside of the
+ * request submit/unsubmit path, and so we must be more careful to
+ * acquire the right lock.
+ */
+ b = READ_ONCE(rq->engine)->breadcrumbs;
+ spin_lock(&b->irq_lock);
+ while (unlikely(b != READ_ONCE(rq->engine)->breadcrumbs)) {
spin_unlock(&b->irq_lock);
+ b = READ_ONCE(rq->engine)->breadcrumbs;
+ spin_lock(&b->irq_lock);
}
- return !__request_completed(rq);
+ /*
+ * Now that we are finally serialised with request submit/unsubmit,
+ * [with b->irq_lock] and with i915_request_retire() [via checking
+ * SIGNALED with rq->lock] confirm the request is indeed active. If
+ * it is no longer active, the breadcrumb will be attached upon
+ * i915_request_submit().
+ */
+ if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
+ insert_breadcrumb(rq, b);
+
+ spin_unlock(&b->irq_lock);
+
+ return true;
}
void i915_request_cancel_breadcrumb(struct i915_request *rq)
{
- struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
-
- lockdep_assert_held(&rq->lock);
+ struct intel_breadcrumbs *b = rq->engine->breadcrumbs;
/*
* We must wait for b->irq_lock so that we know the interrupt handler
@@ -382,23 +439,19 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
list_del(&rq->signal_link);
if (list_empty(&ce->signals))
- list_del_init(&ce->signal_link);
+ remove_signaling_context(b, ce);
clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+ i915_request_put(rq);
}
spin_unlock(&b->irq_lock);
}
-void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
- struct drm_printer *p)
+static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p)
{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
struct intel_context *ce;
struct i915_request *rq;
- if (list_empty(&b->signalers))
- return;
-
drm_printf(p, "Signals:\n");
spin_lock_irq(&b->irq_lock);
@@ -414,3 +467,17 @@ void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
}
spin_unlock_irq(&b->irq_lock);
}
+
+void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
+ struct drm_printer *p)
+{
+ struct intel_breadcrumbs *b;
+
+ b = engine->breadcrumbs;
+ if (!b)
+ return;
+
+ drm_printf(p, "IRQ: %s\n", enableddisabled(b->irq_armed));
+ if (!list_empty(&b->signalers))
+ print_signals(b, p);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h
new file mode 100644
index 000000000000..ed3d1deabfbd
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_BREADCRUMBS__
+#define __INTEL_BREADCRUMBS__
+
+#include <linux/irq_work.h>
+
+#include "intel_engine_types.h"
+
+struct drm_printer;
+struct i915_request;
+struct intel_breadcrumbs;
+
+struct intel_breadcrumbs *
+intel_breadcrumbs_create(struct intel_engine_cs *irq_engine);
+void intel_breadcrumbs_free(struct intel_breadcrumbs *b);
+
+void intel_breadcrumbs_reset(struct intel_breadcrumbs *b);
+void intel_breadcrumbs_park(struct intel_breadcrumbs *b);
+
+static inline void
+intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
+{
+ irq_work_queue(&engine->breadcrumbs->irq_work);
+}
+
+void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
+ struct drm_printer *p);
+
+bool i915_request_enable_breadcrumb(struct i915_request *request);
+void i915_request_cancel_breadcrumb(struct i915_request *request);
+
+#endif /* __INTEL_BREADCRUMBS__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
new file mode 100644
index 000000000000..8e53b9942695
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_BREADCRUMBS_TYPES__
+#define __INTEL_BREADCRUMBS_TYPES__
+
+#include <linux/irq_work.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+/*
+ * Rather than have every client wait upon all user interrupts,
+ * with the herd waking after every interrupt and each doing the
+ * heavyweight seqno dance, we delegate the task (of being the
+ * bottom-half of the user interrupt) to the first client. After
+ * every interrupt, we wake up one client, who does the heavyweight
+ * coherent seqno read and either goes back to sleep (if incomplete),
+ * or wakes up all the completed clients in parallel, before then
+ * transferring the bottom-half status to the next client in the queue.
+ *
+ * Compared to walking the entire list of waiters in a single dedicated
+ * bottom-half, we reduce the latency of the first waiter by avoiding
+ * a context switch, but incur additional coherent seqno reads when
+ * following the chain of request breadcrumbs. Since it is most likely
+ * that we have a single client waiting on each seqno, then reducing
+ * the overhead of waking that client is much preferred.
+ */
+struct intel_breadcrumbs {
+ spinlock_t irq_lock; /* protects the lists used in hardirq context */
+
+ /* Not all breadcrumbs are attached to physical HW */
+ struct intel_engine_cs *irq_engine;
+
+ struct list_head signalers;
+ struct list_head signaled_requests;
+
+ struct irq_work irq_work; /* for use from inside irq_lock */
+
+ unsigned int irq_enabled;
+
+ bool irq_armed;
+};
+
+#endif /* __INTEL_BREADCRUMBS_TYPES__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 52db2bde44a3..d301dda1b261 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -93,57 +93,210 @@ static void intel_context_active_release(struct intel_context *ce)
i915_active_release(&ce->active);
}
-int __intel_context_do_pin(struct intel_context *ce)
+static int __context_pin_state(struct i915_vma *vma, struct i915_gem_ww_ctx *ww)
+{
+ unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
+ int err;
+
+ err = i915_ggtt_pin(vma, ww, 0, bias | PIN_HIGH);
+ if (err)
+ return err;
+
+ err = i915_active_acquire(&vma->active);
+ if (err)
+ goto err_unpin;
+
+ /*
+ * And mark it as a globally pinned object to let the shrinker know
+ * it cannot reclaim the object until we release it.
+ */
+ i915_vma_make_unshrinkable(vma);
+ vma->obj->mm.dirty = true;
+
+ return 0;
+
+err_unpin:
+ i915_vma_unpin(vma);
+ return err;
+}
+
+static void __context_unpin_state(struct i915_vma *vma)
+{
+ i915_vma_make_shrinkable(vma);
+ i915_active_release(&vma->active);
+ __i915_vma_unpin(vma);
+}
+
+static int __ring_active(struct intel_ring *ring,
+ struct i915_gem_ww_ctx *ww)
+{
+ int err;
+
+ err = intel_ring_pin(ring, ww);
+ if (err)
+ return err;
+
+ err = i915_active_acquire(&ring->vma->active);
+ if (err)
+ goto err_pin;
+
+ return 0;
+
+err_pin:
+ intel_ring_unpin(ring);
+ return err;
+}
+
+static void __ring_retire(struct intel_ring *ring)
+{
+ i915_active_release(&ring->vma->active);
+ intel_ring_unpin(ring);
+}
+
+static int intel_context_pre_pin(struct intel_context *ce,
+ struct i915_gem_ww_ctx *ww)
{
int err;
+ CE_TRACE(ce, "active\n");
+
+ err = __ring_active(ce->ring, ww);
+ if (err)
+ return err;
+
+ err = intel_timeline_pin(ce->timeline, ww);
+ if (err)
+ goto err_ring;
+
+ if (!ce->state)
+ return 0;
+
+ err = __context_pin_state(ce->state, ww);
+ if (err)
+ goto err_timeline;
+
+
+ return 0;
+
+err_timeline:
+ intel_timeline_unpin(ce->timeline);
+err_ring:
+ __ring_retire(ce->ring);
+ return err;
+}
+
+static void intel_context_post_unpin(struct intel_context *ce)
+{
+ if (ce->state)
+ __context_unpin_state(ce->state);
+
+ intel_timeline_unpin(ce->timeline);
+ __ring_retire(ce->ring);
+}
+
+int __intel_context_do_pin_ww(struct intel_context *ce,
+ struct i915_gem_ww_ctx *ww)
+{
+ bool handoff = false;
+ void *vaddr;
+ int err = 0;
+
if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
err = intel_context_alloc_state(ce);
if (err)
return err;
}
- err = i915_active_acquire(&ce->active);
+ /*
+ * We always pin the context/ring/timeline here, to ensure a pin
+ * refcount for __intel_context_active(), which prevent a lock
+ * inversion of ce->pin_mutex vs dma_resv_lock().
+ */
+
+ err = i915_gem_object_lock(ce->timeline->hwsp_ggtt->obj, ww);
+ if (!err && ce->ring->vma->obj)
+ err = i915_gem_object_lock(ce->ring->vma->obj, ww);
+ if (!err && ce->state)
+ err = i915_gem_object_lock(ce->state->obj, ww);
+ if (!err)
+ err = intel_context_pre_pin(ce, ww);
if (err)
return err;
- if (mutex_lock_interruptible(&ce->pin_mutex)) {
- err = -EINTR;
- goto out_release;
- }
+ err = i915_active_acquire(&ce->active);
+ if (err)
+ goto err_ctx_unpin;
+
+ err = ce->ops->pre_pin(ce, ww, &vaddr);
+ if (err)
+ goto err_release;
+
+ err = mutex_lock_interruptible(&ce->pin_mutex);
+ if (err)
+ goto err_post_unpin;
if (unlikely(intel_context_is_closed(ce))) {
err = -ENOENT;
- goto out_unlock;
+ goto err_unlock;
}
if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
err = intel_context_active_acquire(ce);
if (unlikely(err))
- goto out_unlock;
+ goto err_unlock;
- err = ce->ops->pin(ce);
- if (unlikely(err))
- goto err_active;
+ err = ce->ops->pin(ce, vaddr);
+ if (err) {
+ intel_context_active_release(ce);
+ goto err_unlock;
+ }
CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n",
i915_ggtt_offset(ce->ring->vma),
ce->ring->head, ce->ring->tail);
+ handoff = true;
smp_mb__before_atomic(); /* flush pin before it is visible */
atomic_inc(&ce->pin_count);
}
GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
- GEM_BUG_ON(i915_active_is_idle(&ce->active));
- goto out_unlock;
-err_active:
- intel_context_active_release(ce);
-out_unlock:
+err_unlock:
mutex_unlock(&ce->pin_mutex);
-out_release:
+err_post_unpin:
+ if (!handoff)
+ ce->ops->post_unpin(ce);
+err_release:
i915_active_release(&ce->active);
+err_ctx_unpin:
+ intel_context_post_unpin(ce);
+
+ /*
+ * Unlock the hwsp_ggtt object since it's shared.
+ * In principle we can unlock all the global state locked above
+ * since it's pinned and doesn't need fencing, and will
+ * thus remain resident until it is explicitly unpinned.
+ */
+ i915_gem_ww_unlock_single(ce->timeline->hwsp_ggtt->obj);
+
+ return err;
+}
+
+int __intel_context_do_pin(struct intel_context *ce)
+{
+ struct i915_gem_ww_ctx ww;
+ int err;
+
+ i915_gem_ww_ctx_init(&ww, true);
+retry:
+ err = __intel_context_do_pin_ww(ce, &ww);
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
return err;
}
@@ -154,6 +307,7 @@ void intel_context_unpin(struct intel_context *ce)
CE_TRACE(ce, "unpin\n");
ce->ops->unpin(ce);
+ ce->ops->post_unpin(ce);
/*
* Once released, we may asynchronously drop the active reference.
@@ -166,65 +320,6 @@ void intel_context_unpin(struct intel_context *ce)
intel_context_put(ce);
}
-static int __context_pin_state(struct i915_vma *vma)
-{
- unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
- int err;
-
- err = i915_ggtt_pin(vma, 0, bias | PIN_HIGH);
- if (err)
- return err;
-
- err = i915_active_acquire(&vma->active);
- if (err)
- goto err_unpin;
-
- /*
- * And mark it as a globally pinned object to let the shrinker know
- * it cannot reclaim the object until we release it.
- */
- i915_vma_make_unshrinkable(vma);
- vma->obj->mm.dirty = true;
-
- return 0;
-
-err_unpin:
- i915_vma_unpin(vma);
- return err;
-}
-
-static void __context_unpin_state(struct i915_vma *vma)
-{
- i915_vma_make_shrinkable(vma);
- i915_active_release(&vma->active);
- __i915_vma_unpin(vma);
-}
-
-static int __ring_active(struct intel_ring *ring)
-{
- int err;
-
- err = intel_ring_pin(ring);
- if (err)
- return err;
-
- err = i915_active_acquire(&ring->vma->active);
- if (err)
- goto err_pin;
-
- return 0;
-
-err_pin:
- intel_ring_unpin(ring);
- return err;
-}
-
-static void __ring_retire(struct intel_ring *ring)
-{
- i915_active_release(&ring->vma->active);
- intel_ring_unpin(ring);
-}
-
__i915_active_call
static void __intel_context_retire(struct i915_active *active)
{
@@ -235,48 +330,29 @@ static void __intel_context_retire(struct i915_active *active)
intel_context_get_avg_runtime_ns(ce));
set_bit(CONTEXT_VALID_BIT, &ce->flags);
- if (ce->state)
- __context_unpin_state(ce->state);
-
- intel_timeline_unpin(ce->timeline);
- __ring_retire(ce->ring);
-
+ intel_context_post_unpin(ce);
intel_context_put(ce);
}
static int __intel_context_active(struct i915_active *active)
{
struct intel_context *ce = container_of(active, typeof(*ce), active);
- int err;
-
- CE_TRACE(ce, "active\n");
intel_context_get(ce);
- err = __ring_active(ce->ring);
- if (err)
- goto err_put;
+ /* everything should already be activated by intel_context_pre_pin() */
+ GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->ring->vma->active));
+ __intel_ring_pin(ce->ring);
- err = intel_timeline_pin(ce->timeline);
- if (err)
- goto err_ring;
+ __intel_timeline_pin(ce->timeline);
- if (!ce->state)
- return 0;
-
- err = __context_pin_state(ce->state);
- if (err)
- goto err_timeline;
+ if (ce->state) {
+ GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->state->active));
+ __i915_vma_pin(ce->state);
+ i915_vma_make_unshrinkable(ce->state);
+ }
return 0;
-
-err_timeline:
- intel_timeline_unpin(ce->timeline);
-err_ring:
- __ring_retire(ce->ring);
-err_put:
- intel_context_put(ce);
- return err;
}
void
@@ -382,15 +458,37 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
struct i915_request *intel_context_create_request(struct intel_context *ce)
{
+ struct i915_gem_ww_ctx ww;
struct i915_request *rq;
int err;
- err = intel_context_pin(ce);
- if (unlikely(err))
- return ERR_PTR(err);
+ i915_gem_ww_ctx_init(&ww, true);
+retry:
+ err = intel_context_pin_ww(ce, &ww);
+ if (!err) {
+ rq = i915_request_create(ce);
+ intel_context_unpin(ce);
+ } else if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err)
+ goto retry;
+ } else {
+ rq = ERR_PTR(err);
+ }
+
+ i915_gem_ww_ctx_fini(&ww);
- rq = i915_request_create(ce);
- intel_context_unpin(ce);
+ if (IS_ERR(rq))
+ return rq;
+
+ /*
+ * timeline->mutex should be the inner lock, but is used as outer lock.
+ * Hack around this to shut up lockdep in selftests..
+ */
+ lockdep_unpin_lock(&ce->timeline->mutex, rq->cookie);
+ mutex_release(&ce->timeline->mutex.dep_map, _RET_IP_);
+ mutex_acquire(&ce->timeline->mutex.dep_map, SINGLE_DEPTH_NESTING, 0, _RET_IP_);
+ rq->cookie = lockdep_pin_lock(&ce->timeline->mutex);
return rq;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 07be021882cc..fda2eba81e22 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -25,6 +25,8 @@
##__VA_ARGS__); \
} while (0)
+struct i915_gem_ww_ctx;
+
void intel_context_init(struct intel_context *ce,
struct intel_engine_cs *engine);
void intel_context_fini(struct intel_context *ce);
@@ -81,6 +83,8 @@ static inline void intel_context_unlock_pinned(struct intel_context *ce)
}
int __intel_context_do_pin(struct intel_context *ce);
+int __intel_context_do_pin_ww(struct intel_context *ce,
+ struct i915_gem_ww_ctx *ww);
static inline bool intel_context_pin_if_active(struct intel_context *ce)
{
@@ -95,6 +99,15 @@ static inline int intel_context_pin(struct intel_context *ce)
return __intel_context_do_pin(ce);
}
+static inline int intel_context_pin_ww(struct intel_context *ce,
+ struct i915_gem_ww_ctx *ww)
+{
+ if (likely(intel_context_pin_if_active(ce)))
+ return 0;
+
+ return __intel_context_do_pin_ww(ce, ww);
+}
+
static inline void __intel_context_pin(struct intel_context *ce)
{
GEM_BUG_ON(!intel_context_is_pinned(ce));
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 4954b0df4864..552cb57a2e8c 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -23,6 +23,7 @@
DECLARE_EWMA(runtime, 3, 8);
struct i915_gem_context;
+struct i915_gem_ww_ctx;
struct i915_vma;
struct intel_context;
struct intel_ring;
@@ -30,8 +31,10 @@ struct intel_ring;
struct intel_context_ops {
int (*alloc)(struct intel_context *ce);
- int (*pin)(struct intel_context *ce);
+ int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr);
+ int (*pin)(struct intel_context *ce, void *vaddr);
void (*unpin)(struct intel_context *ce);
+ void (*post_unpin)(struct intel_context *ce);
void (*enter)(struct intel_context *ce);
void (*exit)(struct intel_context *ce);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index a9249a23903a..08e2c000dcc3 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -223,26 +223,6 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine,
void intel_engine_init_execlists(struct intel_engine_cs *engine);
-void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
-void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
-
-void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
-
-static inline void
-intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
-{
- irq_work_queue(&engine->breadcrumbs.irq_work);
-}
-
-void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
-void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
-
-void intel_engine_transfer_stale_breadcrumbs(struct intel_engine_cs *engine,
- struct intel_context *ce);
-
-void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
- struct drm_printer *p);
-
static inline u32 *__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
{
memset(batch, 0, 6 * sizeof(u32));
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 26087dd79782..5bfb5f7ed02c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -28,6 +28,7 @@
#include "i915_drv.h"
+#include "intel_breadcrumbs.h"
#include "intel_context.h"
#include "intel_engine.h"
#include "intel_engine_pm.h"
@@ -634,7 +635,7 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine,
else
flags = PIN_HIGH;
- return i915_ggtt_pin(vma, 0, flags);
+ return i915_ggtt_pin(vma, NULL, 0, flags);
}
static int init_status_page(struct intel_engine_cs *engine)
@@ -700,8 +701,13 @@ static int engine_setup_common(struct intel_engine_cs *engine)
if (err)
return err;
+ engine->breadcrumbs = intel_breadcrumbs_create(engine);
+ if (!engine->breadcrumbs) {
+ err = -ENOMEM;
+ goto err_status;
+ }
+
intel_engine_init_active(engine, ENGINE_PHYSICAL);
- intel_engine_init_breadcrumbs(engine);
intel_engine_init_execlists(engine);
intel_engine_init_cmd_parser(engine);
intel_engine_init__pm(engine);
@@ -716,6 +722,10 @@ static int engine_setup_common(struct intel_engine_cs *engine)
intel_engine_init_ctx_wa(engine);
return 0;
+
+err_status:
+ cleanup_status_page(engine);
+ return err;
}
struct measure_breadcrumb {
@@ -785,9 +795,11 @@ intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
}
static struct intel_context *
-create_kernel_context(struct intel_engine_cs *engine)
+create_pinned_context(struct intel_engine_cs *engine,
+ unsigned int hwsp,
+ struct lock_class_key *key,
+ const char *name)
{
- static struct lock_class_key kernel;
struct intel_context *ce;
int err;
@@ -796,6 +808,7 @@ create_kernel_context(struct intel_engine_cs *engine)
return ce;
__set_bit(CONTEXT_BARRIER_BIT, &ce->flags);
+ ce->timeline = page_pack_bits(NULL, hwsp);
err = intel_context_pin(ce); /* perma-pin so it is always available */
if (err) {
@@ -809,11 +822,20 @@ create_kernel_context(struct intel_engine_cs *engine)
* should we need to inject GPU operations during their request
* construction.
*/
- lockdep_set_class(&ce->timeline->mutex, &kernel);
+ lockdep_set_class_and_name(&ce->timeline->mutex, key, name);
return ce;
}
+static struct intel_context *
+create_kernel_context(struct intel_engine_cs *engine)
+{
+ static struct lock_class_key kernel;
+
+ return create_pinned_context(engine, I915_GEM_HWS_SEQNO_ADDR,
+ &kernel, "kernel_context");
+}
+
/**
* intel_engines_init_common - initialize cengine state which might require hw access
* @engine: Engine to initialize.
@@ -902,9 +924,9 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
tasklet_kill(&engine->execlists.tasklet); /* flush the callback */
cleanup_status_page(engine);
+ intel_breadcrumbs_free(engine->breadcrumbs);
intel_engine_fini_retire(engine);
- intel_engine_fini_breadcrumbs(engine);
intel_engine_cleanup_cmd_parser(engine);
if (engine->default_state)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 8ec3eecf3e39..f7b2e07e2229 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -6,6 +6,7 @@
#include "i915_drv.h"
+#include "intel_breadcrumbs.h"
#include "intel_context.h"
#include "intel_engine.h"
#include "intel_engine_heartbeat.h"
@@ -247,7 +248,7 @@ static int __engine_park(struct intel_wakeref *wf)
call_idle_barriers(engine); /* cleanup after wedging */
intel_engine_park_heartbeat(engine);
- intel_engine_disarm_breadcrumbs(engine);
+ intel_breadcrumbs_park(engine->breadcrumbs);
/* Must be reset upon idling, or we may miss the busy wakeup. */
GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 8de92fd7d392..c400aaa2287b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -22,6 +22,7 @@
#include "i915_pmu.h"
#include "i915_priolist_types.h"
#include "i915_selftest.h"
+#include "intel_breadcrumbs_types.h"
#include "intel_sseu.h"
#include "intel_timeline_types.h"
#include "intel_uncore.h"
@@ -373,34 +374,8 @@ struct intel_engine_cs {
*/
struct ewma__engine_latency latency;
- /* Rather than have every client wait upon all user interrupts,
- * with the herd waking after every interrupt and each doing the
- * heavyweight seqno dance, we delegate the task (of being the
- * bottom-half of the user interrupt) to the first client. After
- * every interrupt, we wake up one client, who does the heavyweight
- * coherent seqno read and either goes back to sleep (if incomplete),
- * or wakes up all the completed clients in parallel, before then
- * transferring the bottom-half status to the next client in the queue.
- *
- * Compared to walking the entire list of waiters in a single dedicated
- * bottom-half, we reduce the latency of the first waiter by avoiding
- * a context switch, but incur additional coherent seqno reads when
- * following the chain of request breadcrumbs. Since it is most likely
- * that we have a single client waiting on each seqno, then reducing
- * the overhead of waking that client is much preferred.
- */
- struct intel_breadcrumbs {
- spinlock_t irq_lock;
- struct list_head signalers;
-
- struct list_head signaled_requests;
-
- struct irq_work irq_work; /* for use from inside irq_lock */
-
- unsigned int irq_enabled;
-
- bool irq_armed;
- } breadcrumbs;
+ /* Keep track of all the seqno used, a trail of breadcrumbs */
+ struct intel_breadcrumbs *breadcrumbs;
struct intel_engine_pmu {
/**
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 99e28d9021e8..81c05f551b9c 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -78,8 +78,6 @@ int i915_ggtt_init_hw(struct drm_i915_private *i915)
{
int ret;
- stash_init(&i915->mm.wc_stash);
-
/*
* Note that we use page colouring to enforce a guard page at the
* end of the address space. This is required as the CS may prefetch
@@ -232,7 +230,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
/* Fill the allocated but "unused" space beyond the end of the buffer */
while (gte < end)
- gen8_set_pte(gte++, vm->scratch[0].encode);
+ gen8_set_pte(gte++, vm->scratch[0]->encode);
/*
* We want to flush the TLBs only after we're certain all the PTE
@@ -283,7 +281,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
/* Fill the allocated but "unused" space beyond the end of the buffer */
while (gte < end)
- iowrite32(vm->scratch[0].encode, gte++);
+ iowrite32(vm->scratch[0]->encode, gte++);
/*
* We want to flush the TLBs only after we're certain all the PTE
@@ -303,7 +301,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
- const gen8_pte_t scratch_pte = vm->scratch[0].encode;
+ const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
gen8_pte_t __iomem *gtt_base =
(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
const int max_entries = ggtt_total_entries(ggtt) - first_entry;
@@ -401,7 +399,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
first_entry, num_entries, max_entries))
num_entries = max_entries;
- scratch_pte = vm->scratch[0].encode;
+ scratch_pte = vm->scratch[0]->encode;
for (i = 0; i < num_entries; i++)
iowrite32(scratch_pte, &gtt_base[i]);
}
@@ -436,16 +434,17 @@ static void i915_ggtt_clear_range(struct i915_address_space *vm,
intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
}
-static int ggtt_bind_vma(struct i915_address_space *vm,
- struct i915_vma *vma,
- enum i915_cache_level cache_level,
- u32 flags)
+static void ggtt_bind_vma(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags)
{
struct drm_i915_gem_object *obj = vma->obj;
u32 pte_flags;
if (i915_vma_is_bound(vma, ~flags & I915_VMA_BIND_MASK))
- return 0;
+ return;
/* Applicable to VLV (gen8+ do not support RO in the GGTT) */
pte_flags = 0;
@@ -454,8 +453,6 @@ static int ggtt_bind_vma(struct i915_address_space *vm,
vm->insert_entries(vm, vma, cache_level, pte_flags);
vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
-
- return 0;
}
static void ggtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma)
@@ -568,31 +565,25 @@ err:
return ret;
}
-static int aliasing_gtt_bind_vma(struct i915_address_space *vm,
- struct i915_vma *vma,
- enum i915_cache_level cache_level,
- u32 flags)
+static void aliasing_gtt_bind_vma(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags)
{
u32 pte_flags;
- int ret;
/* Currently applicable only to VLV */
pte_flags = 0;
if (i915_gem_object_is_readonly(vma->obj))
pte_flags |= PTE_READ_ONLY;
- if (flags & I915_VMA_LOCAL_BIND) {
- struct i915_ppgtt *alias = i915_vm_to_ggtt(vm)->alias;
-
- ret = ppgtt_bind_vma(&alias->vm, vma, cache_level, flags);
- if (ret)
- return ret;
- }
+ if (flags & I915_VMA_LOCAL_BIND)
+ ppgtt_bind_vma(&i915_vm_to_ggtt(vm)->alias->vm,
+ stash, vma, cache_level, flags);
if (flags & I915_VMA_GLOBAL_BIND)
vm->insert_entries(vm, vma, cache_level, pte_flags);
-
- return 0;
}
static void aliasing_gtt_unbind_vma(struct i915_address_space *vm,
@@ -607,6 +598,7 @@ static void aliasing_gtt_unbind_vma(struct i915_address_space *vm,
static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
{
+ struct i915_vm_pt_stash stash = {};
struct i915_ppgtt *ppgtt;
int err;
@@ -619,15 +611,21 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
goto err_ppgtt;
}
+ err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, ggtt->vm.total);
+ if (err)
+ goto err_ppgtt;
+
+ err = i915_vm_pin_pt_stash(&ppgtt->vm, &stash);
+ if (err)
+ goto err_stash;
+
/*
* Note we only pre-allocate as far as the end of the global
* GTT. On 48b / 4-level page-tables, the difference is very,
* very significant! We have to preallocate as GVT/vgpu does
* not like the page directory disappearing.
*/
- err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total);
- if (err)
- goto err_ppgtt;
+ ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total);
ggtt->alias = ppgtt;
ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
@@ -638,8 +636,11 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma);
ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
+ i915_vm_free_pt_stash(&ppgtt->vm, &stash);
return 0;
+err_stash:
+ i915_vm_free_pt_stash(&ppgtt->vm, &stash);
err_ppgtt:
i915_vm_put(&ppgtt->vm);
return err;
@@ -715,18 +716,11 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
void i915_ggtt_driver_release(struct drm_i915_private *i915)
{
struct i915_ggtt *ggtt = &i915->ggtt;
- struct pagevec *pvec;
fini_aliasing_ppgtt(ggtt);
intel_ggtt_fini_fences(ggtt);
ggtt_cleanup_hw(ggtt);
-
- pvec = &i915->mm.wc_stash.pvec;
- if (pvec->nr) {
- set_pages_array_wb(pvec->pages, pvec->nr);
- __pagevec_release(pvec);
- }
}
static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
@@ -789,7 +783,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
return -ENOMEM;
}
- ret = setup_scratch_page(&ggtt->vm, GFP_DMA32);
+ ret = setup_scratch_page(&ggtt->vm);
if (ret) {
drm_err(&i915->drm, "Scratch setup failed\n");
/* iounmap will also get called at remove, but meh */
@@ -797,8 +791,8 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
return ret;
}
- ggtt->vm.scratch[0].encode =
- ggtt->vm.pte_encode(px_dma(&ggtt->vm.scratch[0]),
+ ggtt->vm.scratch[0]->encode =
+ ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]),
I915_CACHE_NONE, 0);
return 0;
@@ -824,7 +818,7 @@ static void gen6_gmch_remove(struct i915_address_space *vm)
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
iounmap(ggtt->gsm);
- cleanup_scratch_page(vm);
+ free_scratch(vm);
}
static struct resource pci_resource(struct pci_dev *pdev, int bar)
@@ -852,6 +846,8 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
else
size = gen8_get_total_gtt_size(snb_gmch_ctl);
+ ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+
ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
ggtt->vm.cleanup = gen6_gmch_remove;
ggtt->vm.insert_page = gen8_ggtt_insert_page;
@@ -1000,6 +996,8 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
size = gen6_get_total_gtt_size(snb_gmch_ctl);
ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
+ ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+
ggtt->vm.clear_range = nop_clear_range;
if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
ggtt->vm.clear_range = gen6_ggtt_clear_range;
@@ -1050,6 +1048,8 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt)
ggtt->gmadr =
(struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end);
+ ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+
ggtt->do_idle_maps = needs_idle_maps(i915);
ggtt->vm.insert_page = i915_ggtt_insert_page;
ggtt->vm.insert_entries = i915_ggtt_insert_entries;
@@ -1165,11 +1165,6 @@ void i915_ggtt_disable_guc(struct i915_ggtt *ggtt)
ggtt->invalidate(ggtt);
}
-static unsigned int clear_bind(struct i915_vma *vma)
-{
- return atomic_fetch_and(~I915_VMA_BIND_MASK, &vma->flags);
-}
-
void i915_ggtt_resume(struct i915_ggtt *ggtt)
{
struct i915_vma *vma;
@@ -1187,11 +1182,13 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
/* clflush objects bound into the GGTT and rebind them. */
list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) {
struct drm_i915_gem_object *obj = vma->obj;
- unsigned int was_bound = clear_bind(vma);
+ unsigned int was_bound =
+ atomic_read(&vma->flags) & I915_VMA_BIND_MASK;
- WARN_ON(i915_vma_bind(vma,
- obj ? obj->cache_level : 0,
- was_bound, NULL));
+ GEM_BUG_ON(!was_bound);
+ vma->ops->bind_vma(&ggtt->vm, NULL, vma,
+ obj ? obj->cache_level : 0,
+ was_bound);
if (obj) { /* only used during resume => exclusive access */
flush |= fetch_and_zero(&obj->write_domain);
obj->read_domains |= I915_GEM_DOMAIN_GTT;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index e0755f1a904b..39b428c5049c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -356,7 +356,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
goto err_unref;
}
- ret = i915_ggtt_pin(vma, 0, PIN_HIGH);
+ ret = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
if (ret)
goto err_unref;
@@ -406,21 +406,20 @@ static int __engines_record_defaults(struct intel_gt *gt)
/* We must be able to switch to something! */
GEM_BUG_ON(!engine->kernel_context);
- err = intel_renderstate_init(&so, engine);
- if (err)
- goto out;
-
ce = intel_context_create(engine);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
goto out;
}
- rq = intel_context_create_request(ce);
+ err = intel_renderstate_init(&so, ce);
+ if (err)
+ goto err;
+
+ rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
- intel_context_put(ce);
- goto out;
+ goto err_fini;
}
err = intel_engine_emit_ctx_wa(rq);
@@ -434,9 +433,13 @@ static int __engines_record_defaults(struct intel_gt *gt)
err_rq:
requests[id] = i915_request_get(rq);
i915_request_add(rq);
- intel_renderstate_fini(&so);
- if (err)
+err_fini:
+ intel_renderstate_fini(&so, ce);
+err:
+ if (err) {
+ intel_context_put(ce);
goto out;
+ }
}
/* Flush the default context image to memory, and enable powersaving. */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
index 418ae184cecf..4b7671ac5dca 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
@@ -35,39 +35,65 @@ static void node_free(struct intel_gt_buffer_pool_node *node)
{
i915_gem_object_put(node->obj);
i915_active_fini(&node->active);
- kfree(node);
+ kfree_rcu(node, rcu);
}
-static void pool_free_work(struct work_struct *wrk)
+static bool pool_free_older_than(struct intel_gt_buffer_pool *pool, long keep)
{
- struct intel_gt_buffer_pool *pool =
- container_of(wrk, typeof(*pool), work.work);
- struct intel_gt_buffer_pool_node *node, *next;
- unsigned long old = jiffies - HZ;
+ struct intel_gt_buffer_pool_node *node, *stale = NULL;
bool active = false;
- LIST_HEAD(stale);
int n;
/* Free buffers that have not been used in the past second */
- spin_lock_irq(&pool->lock);
for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
struct list_head *list = &pool->cache_list[n];
- /* Most recent at head; oldest at tail */
- list_for_each_entry_safe_reverse(node, next, list, link) {
- if (time_before(node->age, old))
- break;
+ if (list_empty(list))
+ continue;
+
+ if (spin_trylock_irq(&pool->lock)) {
+ struct list_head *pos;
+
+ /* Most recent at head; oldest at tail */
+ list_for_each_prev(pos, list) {
+ unsigned long age;
+
+ node = list_entry(pos, typeof(*node), link);
+
+ age = READ_ONCE(node->age);
+ if (!age || jiffies - age < keep)
+ break;
+
+ /* Check we are the first to claim this node */
+ if (!xchg(&node->age, 0))
+ break;
- list_move(&node->link, &stale);
+ node->free = stale;
+ stale = node;
+ }
+ if (!list_is_last(pos, list))
+ __list_del_many(pos, list);
+
+ spin_unlock_irq(&pool->lock);
}
+
active |= !list_empty(list);
}
- spin_unlock_irq(&pool->lock);
- list_for_each_entry_safe(node, next, &stale, link)
+ while ((node = stale)) {
+ stale = stale->free;
node_free(node);
+ }
+
+ return active;
+}
+
+static void pool_free_work(struct work_struct *wrk)
+{
+ struct intel_gt_buffer_pool *pool =
+ container_of(wrk, typeof(*pool), work.work);
- if (active)
+ if (pool_free_older_than(pool, HZ))
schedule_delayed_work(&pool->work,
round_jiffies_up_relative(HZ));
}
@@ -109,8 +135,8 @@ static void pool_retire(struct i915_active *ref)
i915_gem_object_make_purgeable(node->obj);
spin_lock_irqsave(&pool->lock, flags);
- node->age = jiffies;
- list_add(&node->link, list);
+ list_add_rcu(&node->link, list);
+ WRITE_ONCE(node->age, jiffies ?: 1); /* 0 reserved for active nodes */
spin_unlock_irqrestore(&pool->lock, flags);
schedule_delayed_work(&pool->work,
@@ -151,20 +177,30 @@ intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size)
struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
struct intel_gt_buffer_pool_node *node;
struct list_head *list;
- unsigned long flags;
int ret;
size = PAGE_ALIGN(size);
list = bucket_for_size(pool, size);
- spin_lock_irqsave(&pool->lock, flags);
- list_for_each_entry(node, list, link) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(node, list, link) {
+ unsigned long age;
+
if (node->obj->base.size < size)
continue;
- list_del(&node->link);
- break;
+
+ age = READ_ONCE(node->age);
+ if (!age)
+ continue;
+
+ if (cmpxchg(&node->age, age, 0) == age) {
+ spin_lock_irq(&pool->lock);
+ list_del_rcu(&node->link);
+ spin_unlock_irq(&pool->lock);
+ break;
+ }
}
- spin_unlock_irqrestore(&pool->lock, flags);
+ rcu_read_unlock();
if (&node->link == list) {
node = node_create(pool, size);
@@ -192,28 +228,13 @@ void intel_gt_init_buffer_pool(struct intel_gt *gt)
INIT_DELAYED_WORK(&pool->work, pool_free_work);
}
-static void pool_free_imm(struct intel_gt_buffer_pool *pool)
-{
- int n;
-
- spin_lock_irq(&pool->lock);
- for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
- struct intel_gt_buffer_pool_node *node, *next;
- struct list_head *list = &pool->cache_list[n];
-
- list_for_each_entry_safe(node, next, list, link)
- node_free(node);
- INIT_LIST_HEAD(list);
- }
- spin_unlock_irq(&pool->lock);
-}
-
void intel_gt_flush_buffer_pool(struct intel_gt *gt)
{
struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
do {
- pool_free_imm(pool);
+ while (pool_free_older_than(pool, 0))
+ ;
} while (cancel_delayed_work_sync(&pool->work));
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
index e28bdda771ed..bcf1658c9633 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
@@ -25,7 +25,11 @@ struct intel_gt_buffer_pool_node {
struct i915_active active;
struct drm_i915_gem_object *obj;
struct list_head link;
- struct intel_gt_buffer_pool *pool;
+ union {
+ struct intel_gt_buffer_pool *pool;
+ struct intel_gt_buffer_pool_node *free;
+ struct rcu_head rcu;
+ };
unsigned long age;
};
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index b05da68e52f4..257063a57101 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -8,6 +8,7 @@
#include "i915_drv.h"
#include "i915_irq.h"
+#include "intel_breadcrumbs.h"
#include "intel_gt.h"
#include "intel_gt_irq.h"
#include "intel_uncore.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 2a72cce63fd9..3f1114b58b01 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -11,160 +11,24 @@
#include "intel_gt.h"
#include "intel_gtt.h"
-void stash_init(struct pagestash *stash)
+struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz)
{
- pagevec_init(&stash->pvec);
- spin_lock_init(&stash->lock);
-}
-
-static struct page *stash_pop_page(struct pagestash *stash)
-{
- struct page *page = NULL;
-
- spin_lock(&stash->lock);
- if (likely(stash->pvec.nr))
- page = stash->pvec.pages[--stash->pvec.nr];
- spin_unlock(&stash->lock);
-
- return page;
-}
-
-static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec)
-{
- unsigned int nr;
-
- spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING);
-
- nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec));
- memcpy(stash->pvec.pages + stash->pvec.nr,
- pvec->pages + pvec->nr - nr,
- sizeof(pvec->pages[0]) * nr);
- stash->pvec.nr += nr;
-
- spin_unlock(&stash->lock);
-
- pvec->nr -= nr;
-}
-
-static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
-{
- struct pagevec stack;
- struct page *page;
-
if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
i915_gem_shrink_all(vm->i915);
- page = stash_pop_page(&vm->free_pages);
- if (page)
- return page;
-
- if (!vm->pt_kmap_wc)
- return alloc_page(gfp);
-
- /* Look in our global stash of WC pages... */
- page = stash_pop_page(&vm->i915->mm.wc_stash);
- if (page)
- return page;
-
- /*
- * Otherwise batch allocate pages to amortize cost of set_pages_wc.
- *
- * We have to be careful as page allocation may trigger the shrinker
- * (via direct reclaim) which will fill up the WC stash underneath us.
- * So we add our WB pages into a temporary pvec on the stack and merge
- * them into the WC stash after all the allocations are complete.
- */
- pagevec_init(&stack);
- do {
- struct page *page;
-
- page = alloc_page(gfp);
- if (unlikely(!page))
- break;
-
- stack.pages[stack.nr++] = page;
- } while (pagevec_space(&stack));
-
- if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) {
- page = stack.pages[--stack.nr];
-
- /* Merge spare WC pages to the global stash */
- if (stack.nr)
- stash_push_pagevec(&vm->i915->mm.wc_stash, &stack);
-
- /* Push any surplus WC pages onto the local VM stash */
- if (stack.nr)
- stash_push_pagevec(&vm->free_pages, &stack);
- }
-
- /* Return unwanted leftovers */
- if (unlikely(stack.nr)) {
- WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr));
- __pagevec_release(&stack);
- }
-
- return page;
+ return i915_gem_object_create_internal(vm->i915, sz);
}
-static void vm_free_pages_release(struct i915_address_space *vm,
- bool immediate)
+int pin_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj)
{
- struct pagevec *pvec = &vm->free_pages.pvec;
- struct pagevec stack;
-
- lockdep_assert_held(&vm->free_pages.lock);
- GEM_BUG_ON(!pagevec_count(pvec));
-
- if (vm->pt_kmap_wc) {
- /*
- * When we use WC, first fill up the global stash and then
- * only if full immediately free the overflow.
- */
- stash_push_pagevec(&vm->i915->mm.wc_stash, pvec);
-
- /*
- * As we have made some room in the VM's free_pages,
- * we can wait for it to fill again. Unless we are
- * inside i915_address_space_fini() and must
- * immediately release the pages!
- */
- if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1))
- return;
+ int err;
- /*
- * We have to drop the lock to allow ourselves to sleep,
- * so take a copy of the pvec and clear the stash for
- * others to use it as we sleep.
- */
- stack = *pvec;
- pagevec_reinit(pvec);
- spin_unlock(&vm->free_pages.lock);
-
- pvec = &stack;
- set_pages_array_wb(pvec->pages, pvec->nr);
-
- spin_lock(&vm->free_pages.lock);
- }
+ err = i915_gem_object_pin_pages(obj);
+ if (err)
+ return err;
- __pagevec_release(pvec);
-}
-
-static void vm_free_page(struct i915_address_space *vm, struct page *page)
-{
- /*
- * On !llc, we need to change the pages back to WB. We only do so
- * in bulk, so we rarely need to change the page attributes here,
- * but doing so requires a stop_machine() from deep inside arch/x86/mm.
- * To make detection of the possible sleep more likely, use an
- * unconditional might_sleep() for everybody.
- */
- might_sleep();
- spin_lock(&vm->free_pages.lock);
- while (!pagevec_space(&vm->free_pages.pvec))
- vm_free_pages_release(vm, false);
- GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE);
- pagevec_add(&vm->free_pages.pvec, page);
- spin_unlock(&vm->free_pages.lock);
+ i915_gem_object_make_unshrinkable(obj);
+ return 0;
}
void __i915_vm_close(struct i915_address_space *vm)
@@ -194,14 +58,7 @@ void __i915_vm_close(struct i915_address_space *vm)
void i915_address_space_fini(struct i915_address_space *vm)
{
- spin_lock(&vm->free_pages.lock);
- if (pagevec_count(&vm->free_pages.pvec))
- vm_free_pages_release(vm, true);
- GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec));
- spin_unlock(&vm->free_pages.lock);
-
drm_mm_takedown(&vm->mm);
-
mutex_destroy(&vm->mutex);
}
@@ -246,8 +103,6 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
drm_mm_init(&vm->mm, 0, vm->total);
vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
- stash_init(&vm->free_pages);
-
INIT_LIST_HEAD(&vm->bound_list);
}
@@ -264,64 +119,50 @@ void clear_pages(struct i915_vma *vma)
memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
}
-static int __setup_page_dma(struct i915_address_space *vm,
- struct i915_page_dma *p,
- gfp_t gfp)
-{
- p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL);
- if (unlikely(!p->page))
- return -ENOMEM;
-
- p->daddr = dma_map_page_attrs(vm->dma,
- p->page, 0, PAGE_SIZE,
- PCI_DMA_BIDIRECTIONAL,
- DMA_ATTR_SKIP_CPU_SYNC |
- DMA_ATTR_NO_WARN);
- if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
- vm_free_page(vm, p->page);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p)
+dma_addr_t __px_dma(struct drm_i915_gem_object *p)
{
- return __setup_page_dma(vm, p, __GFP_HIGHMEM);
+ GEM_BUG_ON(!i915_gem_object_has_pages(p));
+ return sg_dma_address(p->mm.pages->sgl);
}
-void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p)
+struct page *__px_page(struct drm_i915_gem_object *p)
{
- dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
- vm_free_page(vm, p->page);
+ GEM_BUG_ON(!i915_gem_object_has_pages(p));
+ return sg_page(p->mm.pages->sgl);
}
void
-fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count)
+fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count)
{
- kunmap_atomic(memset64(kmap_atomic(p->page), val, count));
+ struct page *page = __px_page(p);
+ void *vaddr;
+
+ vaddr = kmap(page);
+ memset64(vaddr, val, count);
+ clflush_cache_range(vaddr, PAGE_SIZE);
+ kunmap(page);
}
-static void poison_scratch_page(struct page *page, unsigned long size)
+static void poison_scratch_page(struct drm_i915_gem_object *scratch)
{
- if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
- return;
+ struct sgt_iter sgt;
+ struct page *page;
+ u8 val;
- GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
+ val = 0;
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ val = POISON_FREE;
- do {
+ for_each_sgt_page(page, sgt, scratch->mm.pages) {
void *vaddr;
vaddr = kmap(page);
- memset(vaddr, POISON_FREE, PAGE_SIZE);
+ memset(vaddr, val, PAGE_SIZE);
kunmap(page);
-
- page = pfn_to_page(page_to_pfn(page) + 1);
- size -= PAGE_SIZE;
- } while (size);
+ }
}
-int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
+int setup_scratch_page(struct i915_address_space *vm)
{
unsigned long size;
@@ -338,21 +179,27 @@ int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
*/
size = I915_GTT_PAGE_SIZE_4K;
if (i915_vm_is_4lvl(vm) &&
- HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
+ HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K))
size = I915_GTT_PAGE_SIZE_64K;
- gfp |= __GFP_NOWARN;
- }
- gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL;
do {
- unsigned int order = get_order(size);
- struct page *page;
- dma_addr_t addr;
+ struct drm_i915_gem_object *obj;
- page = alloc_pages(gfp, order);
- if (unlikely(!page))
+ obj = vm->alloc_pt_dma(vm, size);
+ if (IS_ERR(obj))
goto skip;
+ if (pin_pt_dma(vm, obj))
+ goto skip_obj;
+
+ /* We need a single contiguous page for our scratch */
+ if (obj->mm.page_sizes.sg < size)
+ goto skip_obj;
+
+ /* And it needs to be correspondingly aligned */
+ if (__px_dma(obj) & (size - 1))
+ goto skip_obj;
+
/*
* Use a non-zero scratch page for debugging.
*
@@ -362,61 +209,28 @@ int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
* should it ever be accidentally used, the effect should be
* fairly benign.
*/
- poison_scratch_page(page, size);
-
- addr = dma_map_page_attrs(vm->dma,
- page, 0, size,
- PCI_DMA_BIDIRECTIONAL,
- DMA_ATTR_SKIP_CPU_SYNC |
- DMA_ATTR_NO_WARN);
- if (unlikely(dma_mapping_error(vm->dma, addr)))
- goto free_page;
-
- if (unlikely(!IS_ALIGNED(addr, size)))
- goto unmap_page;
-
- vm->scratch[0].base.page = page;
- vm->scratch[0].base.daddr = addr;
- vm->scratch_order = order;
+ poison_scratch_page(obj);
+
+ vm->scratch[0] = obj;
+ vm->scratch_order = get_order(size);
return 0;
-unmap_page:
- dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL);
-free_page:
- __free_pages(page, order);
+skip_obj:
+ i915_gem_object_put(obj);
skip:
if (size == I915_GTT_PAGE_SIZE_4K)
return -ENOMEM;
size = I915_GTT_PAGE_SIZE_4K;
- gfp &= ~__GFP_NOWARN;
} while (1);
}
-void cleanup_scratch_page(struct i915_address_space *vm)
-{
- struct i915_page_dma *p = px_base(&vm->scratch[0]);
- unsigned int order = vm->scratch_order;
-
- dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT,
- PCI_DMA_BIDIRECTIONAL);
- __free_pages(p->page, order);
-}
-
void free_scratch(struct i915_address_space *vm)
{
int i;
- if (!px_dma(&vm->scratch[0])) /* set to 0 on clones */
- return;
-
- for (i = 1; i <= vm->top; i++) {
- if (!px_dma(&vm->scratch[i]))
- break;
- cleanup_page_dma(vm, px_base(&vm->scratch[i]));
- }
-
- cleanup_scratch_page(vm);
+ for (i = 0; i <= vm->top; i++)
+ i915_gem_object_put(vm->scratch[i]);
}
void gtt_write_workarounds(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index f2b75078e05f..c13c650ced22 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -134,38 +134,29 @@ typedef u64 gen8_pte_t;
#define GEN8_PDE_IPS_64K BIT(11)
#define GEN8_PDE_PS_2M BIT(7)
+enum i915_cache_level;
+
+struct drm_i915_file_private;
+struct drm_i915_gem_object;
struct i915_fence_reg;
+struct i915_vma;
+struct intel_gt;
#define for_each_sgt_daddr(__dp, __iter, __sgt) \
__for_each_sgt_daddr(__dp, __iter, __sgt, I915_GTT_PAGE_SIZE)
-struct i915_page_dma {
- struct page *page;
+struct i915_page_table {
+ struct drm_i915_gem_object *base;
union {
- dma_addr_t daddr;
-
- /*
- * For gen6/gen7 only. This is the offset in the GGTT
- * where the page directory entries for PPGTT begin
- */
- u32 ggtt_offset;
+ atomic_t used;
+ struct i915_page_table *stash;
};
};
-struct i915_page_scratch {
- struct i915_page_dma base;
- u64 encode;
-};
-
-struct i915_page_table {
- struct i915_page_dma base;
- atomic_t used;
-};
-
struct i915_page_directory {
struct i915_page_table pt;
spinlock_t lock;
- void *entry[512];
+ void **entry;
};
#define __px_choose_expr(x, type, expr, other) \
@@ -176,12 +167,14 @@ struct i915_page_directory {
other)
#define px_base(px) \
- __px_choose_expr(px, struct i915_page_dma *, __x, \
- __px_choose_expr(px, struct i915_page_scratch *, &__x->base, \
- __px_choose_expr(px, struct i915_page_table *, &__x->base, \
- __px_choose_expr(px, struct i915_page_directory *, &__x->pt.base, \
- (void)0))))
-#define px_dma(px) (px_base(px)->daddr)
+ __px_choose_expr(px, struct drm_i915_gem_object *, __x, \
+ __px_choose_expr(px, struct i915_page_table *, __x->base, \
+ __px_choose_expr(px, struct i915_page_directory *, __x->pt.base, \
+ (void)0)))
+
+struct page *__px_page(struct drm_i915_gem_object *p);
+dma_addr_t __px_dma(struct drm_i915_gem_object *p);
+#define px_dma(px) (__px_dma(px_base(px)))
#define px_pt(px) \
__px_choose_expr(px, struct i915_page_table *, __x, \
@@ -189,19 +182,18 @@ struct i915_page_directory {
(void)0))
#define px_used(px) (&px_pt(px)->used)
-enum i915_cache_level;
-
-struct drm_i915_file_private;
-struct drm_i915_gem_object;
-struct i915_vma;
-struct intel_gt;
+struct i915_vm_pt_stash {
+ /* preallocated chains of page tables/directories */
+ struct i915_page_table *pt[2];
+};
struct i915_vma_ops {
/* Map an object into an address space with the given cache flags. */
- int (*bind_vma)(struct i915_address_space *vm,
- struct i915_vma *vma,
- enum i915_cache_level cache_level,
- u32 flags);
+ void (*bind_vma)(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags);
/*
* Unmap an object from an address space. This usually consists of
* setting the valid PTE entries to a reserved scratch page.
@@ -213,13 +205,6 @@ struct i915_vma_ops {
void (*clear_pages)(struct i915_vma *vma);
};
-struct pagestash {
- spinlock_t lock;
- struct pagevec pvec;
-};
-
-void stash_init(struct pagestash *stash);
-
struct i915_address_space {
struct kref ref;
struct rcu_work rcu;
@@ -256,33 +241,33 @@ struct i915_address_space {
#define VM_CLASS_GGTT 0
#define VM_CLASS_PPGTT 1
- struct i915_page_scratch scratch[4];
- unsigned int scratch_order;
- unsigned int top;
-
+ struct drm_i915_gem_object *scratch[4];
/**
* List of vma currently bound.
*/
struct list_head bound_list;
- struct pagestash free_pages;
-
/* Global GTT */
bool is_ggtt:1;
- /* Some systems require uncached updates of the page directories */
- bool pt_kmap_wc:1;
-
/* Some systems support read-only mappings for GGTT and/or PPGTT */
bool has_read_only:1;
+ u8 top;
+ u8 pd_shift;
+ u8 scratch_order;
+
+ struct drm_i915_gem_object *
+ (*alloc_pt_dma)(struct i915_address_space *vm, int sz);
+
u64 (*pte_encode)(dma_addr_t addr,
enum i915_cache_level level,
u32 flags); /* Create a valid PTE */
#define PTE_READ_ONLY BIT(0)
- int (*allocate_va_range)(struct i915_address_space *vm,
- u64 start, u64 length);
+ void (*allocate_va_range)(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ u64 start, u64 length);
void (*clear_range)(struct i915_address_space *vm,
u64 start, u64 length);
void (*insert_page)(struct i915_address_space *vm,
@@ -490,9 +475,9 @@ i915_pd_entry(const struct i915_page_directory * const pdp,
static inline dma_addr_t
i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
{
- struct i915_page_dma *pt = ppgtt->pd->entry[n];
+ struct i915_page_table *pt = ppgtt->pd->entry[n];
- return px_dma(pt ?: px_base(&ppgtt->vm.scratch[ppgtt->vm.top]));
+ return __px_dma(pt ? px_base(pt) : ppgtt->vm.scratch[ppgtt->vm.top]);
}
void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt);
@@ -517,13 +502,10 @@ struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt);
void i915_ggtt_suspend(struct i915_ggtt *gtt);
void i915_ggtt_resume(struct i915_ggtt *ggtt);
-int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p);
-void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p);
-
-#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
+#define kmap_atomic_px(px) kmap_atomic(__px_page(px_base(px)))
void
-fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count);
+fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count);
#define fill_px(px, v) fill_page_dma(px_base(px), (v), PAGE_SIZE / sizeof(u64))
#define fill32_px(px, v) do { \
@@ -531,47 +513,51 @@ fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count);
fill_px((px), v__ << 32 | v__); \
} while (0)
-int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp);
-void cleanup_scratch_page(struct i915_address_space *vm);
+int setup_scratch_page(struct i915_address_space *vm);
void free_scratch(struct i915_address_space *vm);
+struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz);
struct i915_page_table *alloc_pt(struct i915_address_space *vm);
struct i915_page_directory *alloc_pd(struct i915_address_space *vm);
-struct i915_page_directory *__alloc_pd(size_t sz);
+struct i915_page_directory *__alloc_pd(int npde);
-void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd);
+int pin_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj);
-#define free_px(vm, px) free_pd(vm, px_base(px))
+void free_px(struct i915_address_space *vm,
+ struct i915_page_table *pt, int lvl);
+#define free_pt(vm, px) free_px(vm, px, 0)
+#define free_pd(vm, px) free_px(vm, px_pt(px), 1)
void
__set_pd_entry(struct i915_page_directory * const pd,
const unsigned short idx,
- struct i915_page_dma * const to,
+ struct i915_page_table *pt,
u64 (*encode)(const dma_addr_t, const enum i915_cache_level));
#define set_pd_entry(pd, idx, to) \
- __set_pd_entry((pd), (idx), px_base(to), gen8_pde_encode)
+ __set_pd_entry((pd), (idx), px_pt(to), gen8_pde_encode)
void
clear_pd_entry(struct i915_page_directory * const pd,
const unsigned short idx,
- const struct i915_page_scratch * const scratch);
+ const struct drm_i915_gem_object * const scratch);
bool
release_pd_entry(struct i915_page_directory * const pd,
const unsigned short idx,
struct i915_page_table * const pt,
- const struct i915_page_scratch * const scratch);
+ const struct drm_i915_gem_object * const scratch);
void gen6_ggtt_invalidate(struct i915_ggtt *ggtt);
int ggtt_set_pages(struct i915_vma *vma);
int ppgtt_set_pages(struct i915_vma *vma);
void clear_pages(struct i915_vma *vma);
-int ppgtt_bind_vma(struct i915_address_space *vm,
- struct i915_vma *vma,
- enum i915_cache_level cache_level,
- u32 flags);
+void ppgtt_bind_vma(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags);
void ppgtt_unbind_vma(struct i915_address_space *vm,
struct i915_vma *vma);
@@ -579,6 +565,14 @@ void gtt_write_workarounds(struct intel_gt *gt);
void setup_private_pat(struct intel_uncore *uncore);
+int i915_vm_alloc_pt_stash(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ u64 size);
+int i915_vm_pin_pt_stash(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash);
+void i915_vm_free_pt_stash(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash);
+
static inline struct sgt_dma {
struct scatterlist *sg;
dma_addr_t dma, max;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index d03da2f64a49..0412a44f25f2 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -137,6 +137,7 @@
#include "i915_perf.h"
#include "i915_trace.h"
#include "i915_vgpu.h"
+#include "intel_breadcrumbs.h"
#include "intel_context.h"
#include "intel_engine_pm.h"
#include "intel_gt.h"
@@ -1148,20 +1149,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
} else {
struct intel_engine_cs *owner = rq->context->engine;
- /*
- * Decouple the virtual breadcrumb before moving it
- * back to the virtual engine -- we don't want the
- * request to complete in the background and try
- * and cancel the breadcrumb on the virtual engine
- * (instead of the old engine where it is linked)!
- */
- if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
- &rq->fence.flags)) {
- spin_lock_nested(&rq->lock,
- SINGLE_DEPTH_NESTING);
- i915_request_cancel_breadcrumb(rq);
- spin_unlock(&rq->lock);
- }
WRITE_ONCE(rq->engine, owner);
owner->submit_request(rq);
active = NULL;
@@ -1819,16 +1806,31 @@ static bool virtual_matches(const struct virtual_engine *ve,
return true;
}
-static void virtual_xfer_breadcrumbs(struct virtual_engine *ve)
+static void virtual_xfer_context(struct virtual_engine *ve,
+ struct intel_engine_cs *engine)
{
+ unsigned int n;
+
+ if (likely(engine == ve->siblings[0]))
+ return;
+
+ GEM_BUG_ON(READ_ONCE(ve->context.inflight));
+ if (!intel_engine_has_relative_mmio(engine))
+ virtual_update_register_offsets(ve->context.lrc_reg_state,
+ engine);
+
/*
- * All the outstanding signals on ve->siblings[0] must have
- * been completed, just pending the interrupt handler. As those
- * signals still refer to the old sibling (via rq->engine), we must
- * transfer those to the old irq_worker to keep our locking
- * consistent.
+ * Move the bound engine to the top of the list for
+ * future execution. We then kick this tasklet first
+ * before checking others, so that we preferentially
+ * reuse this set of bound registers.
*/
- intel_engine_transfer_stale_breadcrumbs(ve->siblings[0], &ve->context);
+ for (n = 1; n < ve->num_siblings; n++) {
+ if (ve->siblings[n] == engine) {
+ swap(ve->siblings[n], ve->siblings[0]);
+ break;
+ }
+ }
}
#define for_each_waiter(p__, rq__) \
@@ -2060,6 +2062,14 @@ static inline void clear_ports(struct i915_request **ports, int count)
memset_p((void **)ports, NULL, count);
}
+static inline void
+copy_ports(struct i915_request **dst, struct i915_request **src, int count)
+{
+ /* A memcpy_p() would be very useful here! */
+ while (count--)
+ WRITE_ONCE(*dst++, *src++); /* avoid write tearing */
+}
+
static void execlists_dequeue(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -2271,38 +2281,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
GEM_BUG_ON(!(rq->execution_mask & engine->mask));
WRITE_ONCE(rq->engine, engine);
- if (engine != ve->siblings[0]) {
- u32 *regs = ve->context.lrc_reg_state;
- unsigned int n;
-
- GEM_BUG_ON(READ_ONCE(ve->context.inflight));
-
- if (!intel_engine_has_relative_mmio(engine))
- virtual_update_register_offsets(regs,
- engine);
-
- if (!list_empty(&ve->context.signals))
- virtual_xfer_breadcrumbs(ve);
-
+ if (__i915_request_submit(rq)) {
/*
- * Move the bound engine to the top of the list
- * for future execution. We then kick this
- * tasklet first before checking others, so that
- * we preferentially reuse this set of bound
- * registers.
+ * Only after we confirm that we will submit
+ * this request (i.e. it has not already
+ * completed), do we want to update the context.
+ *
+ * This serves two purposes. It avoids
+ * unnecessary work if we are resubmitting an
+ * already completed request after timeslicing.
+ * But more importantly, it prevents us altering
+ * ve->siblings[] on an idle context, where
+ * we may be using ve->siblings[] in
+ * virtual_context_enter / virtual_context_exit.
*/
- for (n = 1; n < ve->num_siblings; n++) {
- if (ve->siblings[n] == engine) {
- swap(ve->siblings[n],
- ve->siblings[0]);
- break;
- }
- }
-
+ virtual_xfer_context(ve, engine);
GEM_BUG_ON(ve->siblings[0] != engine);
- }
- if (__i915_request_submit(rq)) {
submit = true;
last = rq;
}
@@ -2648,10 +2643,9 @@ static void process_csb(struct intel_engine_cs *engine)
/* switch pending to inflight */
GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
- memcpy(execlists->inflight,
- execlists->pending,
- execlists_num_ports(execlists) *
- sizeof(*execlists->pending));
+ copy_ports(execlists->inflight,
+ execlists->pending,
+ execlists_num_ports(execlists));
smp_wmb(); /* complete the seqlock */
WRITE_ONCE(execlists->active, execlists->inflight);
@@ -3309,7 +3303,10 @@ static void execlists_context_unpin(struct intel_context *ce)
{
check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
ce->engine);
+}
+static void execlists_context_post_unpin(struct intel_context *ce)
+{
i915_gem_object_unpin_map(ce->state->obj);
}
@@ -3471,20 +3468,24 @@ __execlists_update_reg_state(const struct intel_context *ce,
}
static int
-__execlists_context_pin(struct intel_context *ce,
- struct intel_engine_cs *engine)
+execlists_context_pre_pin(struct intel_context *ce,
+ struct i915_gem_ww_ctx *ww, void **vaddr)
{
- void *vaddr;
-
GEM_BUG_ON(!ce->state);
GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
- vaddr = i915_gem_object_pin_map(ce->state->obj,
- i915_coherent_map_type(engine->i915) |
+ *vaddr = i915_gem_object_pin_map(ce->state->obj,
+ i915_coherent_map_type(ce->engine->i915) |
I915_MAP_OVERRIDE);
- if (IS_ERR(vaddr))
- return PTR_ERR(vaddr);
+ return PTR_ERR_OR_ZERO(*vaddr);
+}
+
+static int
+__execlists_context_pin(struct intel_context *ce,
+ struct intel_engine_cs *engine,
+ void *vaddr)
+{
ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
__execlists_update_reg_state(ce, engine, ce->ring->tail);
@@ -3492,9 +3493,9 @@ __execlists_context_pin(struct intel_context *ce,
return 0;
}
-static int execlists_context_pin(struct intel_context *ce)
+static int execlists_context_pin(struct intel_context *ce, void *vaddr)
{
- return __execlists_context_pin(ce, ce->engine);
+ return __execlists_context_pin(ce, ce->engine, vaddr);
}
static int execlists_context_alloc(struct intel_context *ce)
@@ -3520,8 +3521,10 @@ static void execlists_context_reset(struct intel_context *ce)
static const struct intel_context_ops execlists_context_ops = {
.alloc = execlists_context_alloc,
+ .pre_pin = execlists_context_pre_pin,
.pin = execlists_context_pin,
.unpin = execlists_context_unpin,
+ .post_unpin = execlists_context_post_unpin,
.enter = intel_context_enter_engine,
.exit = intel_context_exit_engine,
@@ -3885,7 +3888,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
goto err;
}
- err = i915_ggtt_pin(vma, 0, PIN_HIGH);
+ err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
if (err)
goto err;
@@ -4126,7 +4129,7 @@ static int execlists_resume(struct intel_engine_cs *engine)
{
intel_mocs_init_engine(engine);
- intel_engine_reset_breadcrumbs(engine);
+ intel_breadcrumbs_reset(engine->breadcrumbs);
if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
struct drm_printer p = drm_debug_printer(__func__);
@@ -4757,14 +4760,21 @@ static int gen12_emit_flush(struct i915_request *request, u32 mode)
intel_engine_mask_t aux_inv = 0;
u32 cmd, *cs;
+ cmd = 4;
+ if (mode & EMIT_INVALIDATE)
+ cmd += 2;
if (mode & EMIT_INVALIDATE)
aux_inv = request->engine->mask & ~BIT(BCS0);
+ if (aux_inv)
+ cmd += 2 * hweight8(aux_inv) + 2;
- cs = intel_ring_begin(request,
- 4 + (aux_inv ? 2 * hweight8(aux_inv) + 2 : 0));
+ cs = intel_ring_begin(request, cmd);
if (IS_ERR(cs))
return PTR_ERR(cs);
+ if (mode & EMIT_INVALIDATE)
+ *cs++ = preparser_disable(true);
+
cmd = MI_FLUSH_DW + 1;
/* We always require a command barrier so that subsequent
@@ -4797,6 +4807,10 @@ static int gen12_emit_flush(struct i915_request *request, u32 mode)
}
*cs++ = MI_NOOP;
}
+
+ if (mode & EMIT_INVALIDATE)
+ *cs++ = preparser_disable(false);
+
intel_ring_advance(request, cs);
return 0;
@@ -5295,6 +5309,14 @@ populate_lr_context(struct intel_context *ce,
return 0;
}
+static struct intel_timeline *pinned_timeline(struct intel_context *ce)
+{
+ struct intel_timeline *tl = fetch_and_zero(&ce->timeline);
+
+ return intel_timeline_create_from_engine(ce->engine,
+ page_unmask_bits(tl));
+}
+
static int __execlists_context_alloc(struct intel_context *ce,
struct intel_engine_cs *engine)
{
@@ -5325,19 +5347,17 @@ static int __execlists_context_alloc(struct intel_context *ce,
goto error_deref_obj;
}
- if (!ce->timeline) {
+ if (!page_mask_bits(ce->timeline)) {
struct intel_timeline *tl;
- struct i915_vma *hwsp;
/*
* Use the static global HWSP for the kernel context, and
* a dynamically allocated cacheline for everyone else.
*/
- hwsp = NULL;
- if (unlikely(intel_context_is_barrier(ce)))
- hwsp = engine->status_page.vma;
-
- tl = intel_timeline_create(engine->gt, hwsp);
+ if (unlikely(ce->timeline))
+ tl = pinned_timeline(ce);
+ else
+ tl = intel_timeline_create(engine->gt);
if (IS_ERR(tl)) {
ret = PTR_ERR(tl);
goto error_deref_obj;
@@ -5443,12 +5463,12 @@ static int virtual_context_alloc(struct intel_context *ce)
return __execlists_context_alloc(ce, ve->siblings[0]);
}
-static int virtual_context_pin(struct intel_context *ce)
+static int virtual_context_pin(struct intel_context *ce, void *vaddr)
{
struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
/* Note: we must use a real engine class for setting up reg state */
- return __execlists_context_pin(ce, ve->siblings[0]);
+ return __execlists_context_pin(ce, ve->siblings[0], vaddr);
}
static void virtual_context_enter(struct intel_context *ce)
@@ -5476,8 +5496,10 @@ static void virtual_context_exit(struct intel_context *ce)
static const struct intel_context_ops virtual_context_ops = {
.alloc = virtual_context_alloc,
+ .pre_pin = execlists_context_pre_pin,
.pin = virtual_context_pin,
.unpin = execlists_context_unpin,
+ .post_unpin = execlists_context_post_unpin,
.enter = virtual_context_enter,
.exit = virtual_context_exit,
@@ -5711,9 +5733,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
- intel_engine_init_breadcrumbs(&ve->base);
intel_engine_init_execlists(&ve->base);
- ve->base.breadcrumbs.irq_armed = true; /* fake HW, used for irq_work */
ve->base.cops = &virtual_context_ops;
ve->base.request_alloc = execlists_request_alloc;
@@ -5730,6 +5750,12 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
intel_context_init(&ve->context, &ve->base);
+ ve->base.breadcrumbs = intel_breadcrumbs_create(NULL);
+ if (!ve->base.breadcrumbs) {
+ err = -ENOMEM;
+ goto err_put;
+ }
+
for (n = 0; n < count; n++) {
struct intel_engine_cs *sibling = siblings[n];
diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
index f0862e924d11..46d9aceda64c 100644
--- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
@@ -18,7 +18,8 @@ struct i915_page_table *alloc_pt(struct i915_address_space *vm)
if (unlikely(!pt))
return ERR_PTR(-ENOMEM);
- if (unlikely(setup_page_dma(vm, &pt->base))) {
+ pt->base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
+ if (IS_ERR(pt->base)) {
kfree(pt);
return ERR_PTR(-ENOMEM);
}
@@ -27,14 +28,20 @@ struct i915_page_table *alloc_pt(struct i915_address_space *vm)
return pt;
}
-struct i915_page_directory *__alloc_pd(size_t sz)
+struct i915_page_directory *__alloc_pd(int count)
{
struct i915_page_directory *pd;
- pd = kzalloc(sz, I915_GFP_ALLOW_FAIL);
+ pd = kzalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL);
if (unlikely(!pd))
return NULL;
+ pd->entry = kcalloc(count, sizeof(*pd->entry), I915_GFP_ALLOW_FAIL);
+ if (unlikely(!pd->entry)) {
+ kfree(pd);
+ return NULL;
+ }
+
spin_lock_init(&pd->lock);
return pd;
}
@@ -43,11 +50,13 @@ struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
{
struct i915_page_directory *pd;
- pd = __alloc_pd(sizeof(*pd));
+ pd = __alloc_pd(I915_PDES);
if (unlikely(!pd))
return ERR_PTR(-ENOMEM);
- if (unlikely(setup_page_dma(vm, px_base(pd)))) {
+ pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
+ if (IS_ERR(pd->pt.base)) {
+ kfree(pd->entry);
kfree(pd);
return ERR_PTR(-ENOMEM);
}
@@ -55,41 +64,52 @@ struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
return pd;
}
-void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd)
+void free_px(struct i915_address_space *vm, struct i915_page_table *pt, int lvl)
{
- cleanup_page_dma(vm, pd);
- kfree(pd);
+ BUILD_BUG_ON(offsetof(struct i915_page_directory, pt));
+
+ if (lvl) {
+ struct i915_page_directory *pd =
+ container_of(pt, typeof(*pd), pt);
+ kfree(pd->entry);
+ }
+
+ if (pt->base)
+ i915_gem_object_put(pt->base);
+
+ kfree(pt);
}
static inline void
-write_dma_entry(struct i915_page_dma * const pdma,
+write_dma_entry(struct drm_i915_gem_object * const pdma,
const unsigned short idx,
const u64 encoded_entry)
{
- u64 * const vaddr = kmap_atomic(pdma->page);
+ u64 * const vaddr = kmap_atomic(__px_page(pdma));
vaddr[idx] = encoded_entry;
+ clflush_cache_range(&vaddr[idx], sizeof(u64));
kunmap_atomic(vaddr);
}
void
__set_pd_entry(struct i915_page_directory * const pd,
const unsigned short idx,
- struct i915_page_dma * const to,
+ struct i915_page_table * const to,
u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
{
/* Each thread pre-pins the pd, and we may have a thread per pde. */
- GEM_BUG_ON(atomic_read(px_used(pd)) > NALLOC * ARRAY_SIZE(pd->entry));
+ GEM_BUG_ON(atomic_read(px_used(pd)) > NALLOC * I915_PDES);
atomic_inc(px_used(pd));
pd->entry[idx] = to;
- write_dma_entry(px_base(pd), idx, encode(to->daddr, I915_CACHE_LLC));
+ write_dma_entry(px_base(pd), idx, encode(px_dma(to), I915_CACHE_LLC));
}
void
clear_pd_entry(struct i915_page_directory * const pd,
const unsigned short idx,
- const struct i915_page_scratch * const scratch)
+ const struct drm_i915_gem_object * const scratch)
{
GEM_BUG_ON(atomic_read(px_used(pd)) == 0);
@@ -102,7 +122,7 @@ bool
release_pd_entry(struct i915_page_directory * const pd,
const unsigned short idx,
struct i915_page_table * const pt,
- const struct i915_page_scratch * const scratch)
+ const struct drm_i915_gem_object * const scratch)
{
bool free = false;
@@ -155,19 +175,16 @@ struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt)
return ppgtt;
}
-int ppgtt_bind_vma(struct i915_address_space *vm,
- struct i915_vma *vma,
- enum i915_cache_level cache_level,
- u32 flags)
+void ppgtt_bind_vma(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags)
{
u32 pte_flags;
- int err;
if (!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) {
- err = vm->allocate_va_range(vm, vma->node.start, vma->size);
- if (err)
- return err;
-
+ vm->allocate_va_range(vm, stash, vma->node.start, vma->size);
set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
}
@@ -178,8 +195,6 @@ int ppgtt_bind_vma(struct i915_address_space *vm,
vm->insert_entries(vm, vma, cache_level, pte_flags);
wmb();
-
- return 0;
}
void ppgtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma)
@@ -188,12 +203,93 @@ void ppgtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma)
vm->clear_range(vm, vma->node.start, vma->size);
}
+static unsigned long pd_count(u64 size, int shift)
+{
+ /* Beware later misalignment */
+ return (size + 2 * (BIT_ULL(shift) - 1)) >> shift;
+}
+
+int i915_vm_alloc_pt_stash(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ u64 size)
+{
+ unsigned long count;
+ int shift, n;
+
+ shift = vm->pd_shift;
+ if (!shift)
+ return 0;
+
+ count = pd_count(size, shift);
+ while (count--) {
+ struct i915_page_table *pt;
+
+ pt = alloc_pt(vm);
+ if (IS_ERR(pt)) {
+ i915_vm_free_pt_stash(vm, stash);
+ return PTR_ERR(pt);
+ }
+
+ pt->stash = stash->pt[0];
+ stash->pt[0] = pt;
+ }
+
+ for (n = 1; n < vm->top; n++) {
+ shift += ilog2(I915_PDES); /* Each PD holds 512 entries */
+ count = pd_count(size, shift);
+ while (count--) {
+ struct i915_page_directory *pd;
+
+ pd = alloc_pd(vm);
+ if (IS_ERR(pd)) {
+ i915_vm_free_pt_stash(vm, stash);
+ return PTR_ERR(pd);
+ }
+
+ pd->pt.stash = stash->pt[1];
+ stash->pt[1] = &pd->pt;
+ }
+ }
+
+ return 0;
+}
+
+int i915_vm_pin_pt_stash(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash)
+{
+ struct i915_page_table *pt;
+ int n, err;
+
+ for (n = 0; n < ARRAY_SIZE(stash->pt); n++) {
+ for (pt = stash->pt[n]; pt; pt = pt->stash) {
+ err = pin_pt_dma(vm, pt->base);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+void i915_vm_free_pt_stash(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash)
+{
+ struct i915_page_table *pt;
+ int n;
+
+ for (n = 0; n < ARRAY_SIZE(stash->pt); n++) {
+ while ((pt = stash->pt[n])) {
+ stash->pt[n] = pt->stash;
+ free_px(vm, pt, n);
+ }
+ }
+}
+
int ppgtt_set_pages(struct i915_vma *vma)
{
GEM_BUG_ON(vma->pages);
vma->pages = vma->obj->mm.pages;
-
vma->page_sizes = vma->obj->mm.page_sizes;
return 0;
diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c
index 1bfad589c63b..ea2a77c7b469 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.c
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c
@@ -27,6 +27,7 @@
#include "i915_drv.h"
#include "intel_renderstate.h"
+#include "gt/intel_context.h"
#include "intel_ring.h"
static const struct intel_renderstate_rodata *
@@ -157,33 +158,47 @@ out:
#undef OUT_BATCH
int intel_renderstate_init(struct intel_renderstate *so,
- struct intel_engine_cs *engine)
+ struct intel_context *ce)
{
- struct drm_i915_gem_object *obj;
+ struct intel_engine_cs *engine = ce->engine;
+ struct drm_i915_gem_object *obj = NULL;
int err;
memset(so, 0, sizeof(*so));
so->rodata = render_state_get_rodata(engine);
- if (!so->rodata)
- return 0;
+ if (so->rodata) {
+ if (so->rodata->batch_items * 4 > PAGE_SIZE)
+ return -EINVAL;
+
+ obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ so->vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
+ if (IS_ERR(so->vma)) {
+ err = PTR_ERR(so->vma);
+ goto err_obj;
+ }
+ }
- if (so->rodata->batch_items * 4 > PAGE_SIZE)
- return -EINVAL;
+ i915_gem_ww_ctx_init(&so->ww, true);
+retry:
+ err = intel_context_pin_ww(ce, &so->ww);
+ if (err)
+ goto err_fini;
- obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
+ /* return early if there's nothing to setup */
+ if (!err && !so->rodata)
+ return 0;
- so->vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
- if (IS_ERR(so->vma)) {
- err = PTR_ERR(so->vma);
- goto err_obj;
- }
+ err = i915_gem_object_lock(so->vma->obj, &so->ww);
+ if (err)
+ goto err_context;
err = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
if (err)
- goto err_obj;
+ goto err_context;
err = render_state_setup(so, engine->i915);
if (err)
@@ -193,8 +208,18 @@ int intel_renderstate_init(struct intel_renderstate *so,
err_unpin:
i915_vma_unpin(so->vma);
+err_context:
+ intel_context_unpin(ce);
+err_fini:
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&so->ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&so->ww);
err_obj:
- i915_gem_object_put(obj);
+ if (obj)
+ i915_gem_object_put(obj);
so->vma = NULL;
return err;
}
@@ -208,11 +233,9 @@ int intel_renderstate_emit(struct intel_renderstate *so,
if (!so->vma)
return 0;
- i915_vma_lock(so->vma);
err = i915_request_await_object(rq, so->vma->obj, false);
if (err == 0)
err = i915_vma_move_to_active(so->vma, rq, 0);
- i915_vma_unlock(so->vma);
if (err)
return err;
@@ -233,7 +256,17 @@ int intel_renderstate_emit(struct intel_renderstate *so,
return 0;
}
-void intel_renderstate_fini(struct intel_renderstate *so)
+void intel_renderstate_fini(struct intel_renderstate *so,
+ struct intel_context *ce)
{
- i915_vma_unpin_and_release(&so->vma, 0);
+ if (so->vma) {
+ i915_vma_unpin(so->vma);
+ i915_vma_close(so->vma);
+ }
+
+ intel_context_unpin(ce);
+ i915_gem_ww_ctx_fini(&so->ww);
+
+ if (so->vma)
+ i915_gem_object_put(so->vma->obj);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.h b/drivers/gpu/drm/i915/gt/intel_renderstate.h
index 5700be69a05a..713aa1e86c80 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.h
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.h
@@ -25,9 +25,10 @@
#define _INTEL_RENDERSTATE_H_
#include <linux/types.h>
+#include "i915_gem.h"
struct i915_request;
-struct intel_engine_cs;
+struct intel_context;
struct i915_vma;
struct intel_renderstate_rodata {
@@ -49,6 +50,7 @@ extern const struct intel_renderstate_rodata gen8_null_state;
extern const struct intel_renderstate_rodata gen9_null_state;
struct intel_renderstate {
+ struct i915_gem_ww_ctx ww;
const struct intel_renderstate_rodata *rodata;
struct i915_vma *vma;
u32 batch_offset;
@@ -58,9 +60,10 @@ struct intel_renderstate {
};
int intel_renderstate_init(struct intel_renderstate *so,
- struct intel_engine_cs *engine);
+ struct intel_context *ce);
int intel_renderstate_emit(struct intel_renderstate *so,
struct i915_request *rq);
-void intel_renderstate_fini(struct intel_renderstate *so);
+void intel_renderstate_fini(struct intel_renderstate *so,
+ struct intel_context *ce);
#endif /* _INTEL_RENDERSTATE_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 46a5ceffc22f..ac36b67fb46b 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -15,6 +15,7 @@
#include "i915_drv.h"
#include "i915_gpu_error.h"
#include "i915_irq.h"
+#include "intel_breadcrumbs.h"
#include "intel_engine_pm.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
index bdb324167ef3..4034a4bac7f0 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -21,7 +21,13 @@ unsigned int intel_ring_update_space(struct intel_ring *ring)
return space;
}
-int intel_ring_pin(struct intel_ring *ring)
+void __intel_ring_pin(struct intel_ring *ring)
+{
+ GEM_BUG_ON(!atomic_read(&ring->pin_count));
+ atomic_inc(&ring->pin_count);
+}
+
+int intel_ring_pin(struct intel_ring *ring, struct i915_gem_ww_ctx *ww)
{
struct i915_vma *vma = ring->vma;
unsigned int flags;
@@ -39,7 +45,7 @@ int intel_ring_pin(struct intel_ring *ring)
else
flags |= PIN_HIGH;
- ret = i915_ggtt_pin(vma, 0, flags);
+ ret = i915_ggtt_pin(vma, ww, 0, flags);
if (unlikely(ret))
goto err_unpin;
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h
index cc0ebca65167..1700579bdc93 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.h
+++ b/drivers/gpu/drm/i915/gt/intel_ring.h
@@ -21,7 +21,8 @@ int intel_ring_cacheline_align(struct i915_request *rq);
unsigned int intel_ring_update_space(struct intel_ring *ring);
-int intel_ring_pin(struct intel_ring *ring);
+void __intel_ring_pin(struct intel_ring *ring);
+int intel_ring_pin(struct intel_ring *ring, struct i915_gem_ww_ctx *ww);
void intel_ring_unpin(struct intel_ring *ring);
void intel_ring_reset(struct intel_ring *ring, u32 tail);
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 898593ca4889..16b48e72c369 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -32,6 +32,7 @@
#include "gen6_ppgtt.h"
#include "gen7_renderclear.h"
#include "i915_drv.h"
+#include "intel_breadcrumbs.h"
#include "intel_context.h"
#include "intel_gt.h"
#include "intel_reset.h"
@@ -201,16 +202,18 @@ static struct i915_address_space *vm_alias(struct i915_address_space *vm)
return vm;
}
+static u32 pp_dir(struct i915_address_space *vm)
+{
+ return to_gen6_ppgtt(i915_vm_to_ppgtt(vm))->pp_dir;
+}
+
static void set_pp_dir(struct intel_engine_cs *engine)
{
struct i915_address_space *vm = vm_alias(engine->gt->vm);
if (vm) {
- struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-
ENGINE_WRITE(engine, RING_PP_DIR_DCLV, PP_DIR_DCLV_2G);
- ENGINE_WRITE(engine, RING_PP_DIR_BASE,
- px_base(ppgtt->pd)->ggtt_offset << 10);
+ ENGINE_WRITE(engine, RING_PP_DIR_BASE, pp_dir(vm));
}
}
@@ -255,7 +258,7 @@ static int xcs_resume(struct intel_engine_cs *engine)
else
ring_setup_status_page(engine);
- intel_engine_reset_breadcrumbs(engine);
+ intel_breadcrumbs_reset(engine->breadcrumbs);
/* Enforce ordering by reading HEAD register back */
ENGINE_POSTING_READ(engine, RING_HEAD);
@@ -474,14 +477,16 @@ static void ring_context_destroy(struct kref *ref)
intel_context_free(ce);
}
-static int __context_pin_ppgtt(struct intel_context *ce)
+static int ring_context_pre_pin(struct intel_context *ce,
+ struct i915_gem_ww_ctx *ww,
+ void **unused)
{
struct i915_address_space *vm;
int err = 0;
vm = vm_alias(ce->vm);
if (vm)
- err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)));
+ err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)), ww);
return err;
}
@@ -497,6 +502,10 @@ static void __context_unpin_ppgtt(struct intel_context *ce)
static void ring_context_unpin(struct intel_context *ce)
{
+}
+
+static void ring_context_post_unpin(struct intel_context *ce)
+{
__context_unpin_ppgtt(ce);
}
@@ -584,9 +593,9 @@ static int ring_context_alloc(struct intel_context *ce)
return 0;
}
-static int ring_context_pin(struct intel_context *ce)
+static int ring_context_pin(struct intel_context *ce, void *unused)
{
- return __context_pin_ppgtt(ce);
+ return 0;
}
static void ring_context_reset(struct intel_context *ce)
@@ -597,8 +606,10 @@ static void ring_context_reset(struct intel_context *ce)
static const struct intel_context_ops ring_context_ops = {
.alloc = ring_context_alloc,
+ .pre_pin = ring_context_pre_pin,
.pin = ring_context_pin,
.unpin = ring_context_unpin,
+ .post_unpin = ring_context_post_unpin,
.enter = intel_context_enter_engine,
.exit = intel_context_exit_engine,
@@ -608,7 +619,7 @@ static const struct intel_context_ops ring_context_ops = {
};
static int load_pd_dir(struct i915_request *rq,
- const struct i915_ppgtt *ppgtt,
+ struct i915_address_space *vm,
u32 valid)
{
const struct intel_engine_cs * const engine = rq->engine;
@@ -624,7 +635,7 @@ static int load_pd_dir(struct i915_request *rq,
*cs++ = MI_LOAD_REGISTER_IMM(1);
*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
- *cs++ = px_base(ppgtt->pd)->ggtt_offset << 10;
+ *cs++ = pp_dir(vm);
/* Stall until the page table load is complete? */
*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
@@ -826,7 +837,7 @@ static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
* post-sync op, this extra pass appears vital before a
* mm switch!
*/
- ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
+ ret = load_pd_dir(rq, vm, PP_DIR_DCLV_2G);
if (ret)
return ret;
@@ -1250,14 +1261,15 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine)
return -ENODEV;
}
- timeline = intel_timeline_create(engine->gt, engine->status_page.vma);
+ timeline = intel_timeline_create_from_engine(engine,
+ I915_GEM_HWS_SEQNO_ADDR);
if (IS_ERR(timeline)) {
err = PTR_ERR(timeline);
goto err;
}
GEM_BUG_ON(timeline->has_initial_breadcrumb);
- err = intel_timeline_pin(timeline);
+ err = intel_timeline_pin(timeline, NULL);
if (err)
goto err_timeline;
@@ -1267,7 +1279,7 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine)
goto err_timeline_unpin;
}
- err = intel_ring_pin(ring);
+ err = intel_ring_pin(ring, NULL);
if (err)
goto err_ring;
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index 97ba14ad52e4..e6a00eea0631 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -7,6 +7,7 @@
#include <drm/i915_drm.h>
#include "i915_drv.h"
+#include "intel_breadcrumbs.h"
#include "intel_gt.h"
#include "intel_gt_clock_utils.h"
#include "intel_gt_irq.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 46d20f5f3ddc..a2f74cefe4c3 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -215,7 +215,8 @@ static void cacheline_free(struct intel_timeline_cacheline *cl)
static int intel_timeline_init(struct intel_timeline *timeline,
struct intel_gt *gt,
- struct i915_vma *hwsp)
+ struct i915_vma *hwsp,
+ unsigned int offset)
{
void *vaddr;
@@ -246,8 +247,7 @@ static int intel_timeline_init(struct intel_timeline *timeline,
vaddr = page_mask_bits(cl->vaddr);
} else {
- timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
-
+ timeline->hwsp_offset = offset;
vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB);
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
@@ -297,7 +297,9 @@ static void intel_timeline_fini(struct intel_timeline *timeline)
}
struct intel_timeline *
-intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp)
+__intel_timeline_create(struct intel_gt *gt,
+ struct i915_vma *global_hwsp,
+ unsigned int offset)
{
struct intel_timeline *timeline;
int err;
@@ -306,7 +308,7 @@ intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp)
if (!timeline)
return ERR_PTR(-ENOMEM);
- err = intel_timeline_init(timeline, gt, global_hwsp);
+ err = intel_timeline_init(timeline, gt, global_hwsp, offset);
if (err) {
kfree(timeline);
return ERR_PTR(err);
@@ -315,14 +317,20 @@ intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp)
return timeline;
}
-int intel_timeline_pin(struct intel_timeline *tl)
+void __intel_timeline_pin(struct intel_timeline *tl)
+{
+ GEM_BUG_ON(!atomic_read(&tl->pin_count));
+ atomic_inc(&tl->pin_count);
+}
+
+int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww)
{
int err;
if (atomic_add_unless(&tl->pin_count, 1, 0))
return 0;
- err = i915_ggtt_pin(tl->hwsp_ggtt, 0, PIN_HIGH);
+ err = i915_ggtt_pin(tl->hwsp_ggtt, ww, 0, PIN_HIGH);
if (err)
return err;
@@ -465,7 +473,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
goto err_rollback;
}
- err = i915_ggtt_pin(vma, 0, PIN_HIGH);
+ err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
if (err) {
__idle_hwsp_free(vma->private, cacheline);
goto err_rollback;
@@ -484,7 +492,9 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
* free it after the current request is retired, which ensures that
* all writes into the cacheline from previous requests are complete.
*/
- err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence);
+ err = i915_active_ref(&tl->hwsp_cacheline->active,
+ tl->fence_context,
+ &rq->fence);
if (err)
goto err_cacheline;
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h
index 4298b9ac7327..9882cd911d8e 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.h
@@ -29,10 +29,27 @@
#include "i915_active.h"
#include "i915_syncmap.h"
-#include "gt/intel_timeline_types.h"
+#include "intel_timeline_types.h"
struct intel_timeline *
-intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp);
+__intel_timeline_create(struct intel_gt *gt,
+ struct i915_vma *global_hwsp,
+ unsigned int offset);
+
+static inline struct intel_timeline *
+intel_timeline_create(struct intel_gt *gt)
+{
+ return __intel_timeline_create(gt, NULL, 0);
+}
+
+static inline struct intel_timeline *
+intel_timeline_create_from_engine(struct intel_engine_cs *engine,
+ unsigned int offset)
+{
+ return __intel_timeline_create(engine->gt,
+ engine->status_page.vma,
+ offset);
+}
static inline struct intel_timeline *
intel_timeline_get(struct intel_timeline *timeline)
@@ -71,7 +88,8 @@ static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl,
return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno);
}
-int intel_timeline_pin(struct intel_timeline *tl);
+void __intel_timeline_pin(struct intel_timeline *tl);
+int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww);
void intel_timeline_enter(struct intel_timeline *tl);
int intel_timeline_get_seqno(struct intel_timeline *tl,
struct i915_request *rq,
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index be5a4685c991..a3f72b75c61e 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -2088,6 +2088,7 @@ static int engine_wa_list_verify(struct intel_context *ce,
const struct i915_wa *wa;
struct i915_request *rq;
struct i915_vma *vma;
+ struct i915_gem_ww_ctx ww;
unsigned int i;
u32 *results;
int err;
@@ -2100,29 +2101,34 @@ static int engine_wa_list_verify(struct intel_context *ce,
return PTR_ERR(vma);
intel_engine_pm_get(ce->engine);
- rq = intel_context_create_request(ce);
- intel_engine_pm_put(ce->engine);
+ i915_gem_ww_ctx_init(&ww, false);
+retry:
+ err = i915_gem_object_lock(vma->obj, &ww);
+ if (err == 0)
+ err = intel_context_pin_ww(ce, &ww);
+ if (err)
+ goto err_pm;
+
+ rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
- goto err_vma;
+ goto err_unpin;
}
- i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, true);
if (err == 0)
err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
- i915_vma_unlock(vma);
- if (err) {
- i915_request_add(rq);
- goto err_vma;
- }
-
- err = wa_list_srm(rq, wal, vma);
- if (err)
- goto err_vma;
+ if (err == 0)
+ err = wa_list_srm(rq, wal, vma);
i915_request_get(rq);
+ if (err)
+ i915_request_set_error_once(rq, err);
i915_request_add(rq);
+
+ if (err)
+ goto err_rq;
+
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
err = -ETIME;
goto err_rq;
@@ -2147,7 +2153,16 @@ static int engine_wa_list_verify(struct intel_context *ce,
err_rq:
i915_request_put(rq);
-err_vma:
+err_unpin:
+ intel_context_unpin(ce);
+err_pm:
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
+ intel_engine_pm_put(ce->engine);
i915_vma_unpin(vma);
i915_vma_put(vma);
return err;
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index b8dd3cbc8696..dfd1cfb8a7ec 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -131,6 +131,10 @@ static void mock_context_unpin(struct intel_context *ce)
{
}
+static void mock_context_post_unpin(struct intel_context *ce)
+{
+}
+
static void mock_context_destroy(struct kref *ref)
{
struct intel_context *ce = container_of(ref, typeof(*ce), ref);
@@ -152,8 +156,7 @@ static int mock_context_alloc(struct intel_context *ce)
if (!ce->ring)
return -ENOMEM;
- GEM_BUG_ON(ce->timeline);
- ce->timeline = intel_timeline_create(ce->engine->gt, NULL);
+ ce->timeline = intel_timeline_create(ce->engine->gt);
if (IS_ERR(ce->timeline)) {
kfree(ce->engine);
return PTR_ERR(ce->timeline);
@@ -164,7 +167,13 @@ static int mock_context_alloc(struct intel_context *ce)
return 0;
}
-static int mock_context_pin(struct intel_context *ce)
+static int mock_context_pre_pin(struct intel_context *ce,
+ struct i915_gem_ww_ctx *ww, void **unused)
+{
+ return 0;
+}
+
+static int mock_context_pin(struct intel_context *ce, void *unused)
{
return 0;
}
@@ -176,8 +185,10 @@ static void mock_context_reset(struct intel_context *ce)
static const struct intel_context_ops mock_context_ops = {
.alloc = mock_context_alloc,
+ .pre_pin = mock_context_pre_pin,
.pin = mock_context_pin,
.unpin = mock_context_unpin,
+ .post_unpin = mock_context_post_unpin,
.enter = intel_context_enter_engine,
.exit = intel_context_exit_engine,
@@ -261,11 +272,12 @@ static void mock_engine_release(struct intel_engine_cs *engine)
GEM_BUG_ON(timer_pending(&mock->hw_delay));
+ intel_breadcrumbs_free(engine->breadcrumbs);
+
intel_context_unpin(engine->kernel_context);
intel_context_put(engine->kernel_context);
intel_engine_fini_retire(engine);
- intel_engine_fini_breadcrumbs(engine);
}
struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
@@ -323,20 +335,26 @@ int mock_engine_init(struct intel_engine_cs *engine)
struct intel_context *ce;
intel_engine_init_active(engine, ENGINE_MOCK);
- intel_engine_init_breadcrumbs(engine);
intel_engine_init_execlists(engine);
intel_engine_init__pm(engine);
intel_engine_init_retire(engine);
+ engine->breadcrumbs = intel_breadcrumbs_create(NULL);
+ if (!engine->breadcrumbs)
+ return -ENOMEM;
+
ce = create_kernel_context(engine);
if (IS_ERR(ce))
goto err_breadcrumbs;
+ /* We insist the kernel context is using the status_page */
+ engine->status_page.vma = ce->timeline->hwsp_ggtt;
+
engine->kernel_context = ce;
return 0;
err_breadcrumbs:
- intel_engine_fini_breadcrumbs(engine);
+ intel_breadcrumbs_free(engine->breadcrumbs);
return -ENOMEM;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
index 52af1cee9a94..1f4020e906a8 100644
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -68,6 +68,8 @@ static int context_sync(struct intel_context *ce)
} while (!err);
mutex_unlock(&tl->mutex);
+ /* Wait for all barriers to complete (remote CPU) before we check */
+ i915_active_unlock_wait(&ce->active);
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 3fc5de961280..95d41c01d0e0 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -2729,7 +2729,7 @@ static int create_gang(struct intel_engine_cs *engine,
i915_gem_object_put(obj);
intel_context_put(ce);
- rq->client_link.next = &(*prev)->client_link;
+ rq->mock.link.next = &(*prev)->mock.link;
*prev = rq;
return 0;
@@ -2970,8 +2970,7 @@ static int live_preempt_gang(void *arg)
}
while (rq) { /* wait for each rq from highest to lowest prio */
- struct i915_request *n =
- list_next_entry(rq, client_link);
+ struct i915_request *n = list_next_entry(rq, mock.link);
if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
struct drm_printer p =
@@ -3090,7 +3089,7 @@ static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
return vma;
}
- err = i915_ggtt_pin(vma, 0, 0);
+ err = i915_ggtt_pin(vma, NULL, 0, 0);
if (err) {
i915_vma_put(vma);
return ERR_PTR(err);
@@ -4997,6 +4996,7 @@ static int __live_lrc_state(struct intel_engine_cs *engine,
{
struct intel_context *ce;
struct i915_request *rq;
+ struct i915_gem_ww_ctx ww;
enum {
RING_START_IDX = 0,
RING_TAIL_IDX,
@@ -5011,7 +5011,11 @@ static int __live_lrc_state(struct intel_engine_cs *engine,
if (IS_ERR(ce))
return PTR_ERR(ce);
- err = intel_context_pin(ce);
+ i915_gem_ww_ctx_init(&ww, false);
+retry:
+ err = i915_gem_object_lock(scratch->obj, &ww);
+ if (!err)
+ err = intel_context_pin_ww(ce, &ww);
if (err)
goto err_put;
@@ -5040,11 +5044,9 @@ static int __live_lrc_state(struct intel_engine_cs *engine,
*cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
*cs++ = 0;
- i915_vma_lock(scratch);
err = i915_request_await_object(rq, scratch->obj, true);
if (!err)
err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
- i915_vma_unlock(scratch);
i915_request_get(rq);
i915_request_add(rq);
@@ -5081,6 +5083,12 @@ err_rq:
err_unpin:
intel_context_unpin(ce);
err_put:
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
intel_context_put(ce);
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index 34b403d47840..3540ba9bd459 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -77,20 +77,20 @@ create_spin_counter(struct intel_engine_cs *engine,
vma = i915_vma_instance(obj, vm, NULL);
if (IS_ERR(vma)) {
- i915_gem_object_put(obj);
- return vma;
+ err = PTR_ERR(vma);
+ goto err_put;
}
err = i915_vma_pin(vma, 0, 0, PIN_USER);
- if (err) {
- i915_vma_put(vma);
- return ERR_PTR(err);
- }
+ if (err)
+ goto err_unlock;
+
+ i915_vma_lock(vma);
base = i915_gem_object_pin_map(obj, I915_MAP_WC);
if (IS_ERR(base)) {
- i915_gem_object_put(obj);
- return ERR_CAST(base);
+ err = PTR_ERR(base);
+ goto err_unpin;
}
cs = base;
@@ -134,6 +134,14 @@ create_spin_counter(struct intel_engine_cs *engine,
*cancel = base + loop;
*counter = srm ? memset32(base + end, 0, 1) : NULL;
return vma;
+
+err_unpin:
+ i915_vma_unpin(vma);
+err_unlock:
+ i915_vma_unlock(vma);
+err_put:
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
}
static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
@@ -639,7 +647,6 @@ int live_rps_frequency_cs(void *arg)
goto err_vma;
}
- i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, false);
if (!err)
err = i915_vma_move_to_active(vma, rq, 0);
@@ -647,7 +654,6 @@ int live_rps_frequency_cs(void *arg)
err = rq->engine->emit_bb_start(rq,
vma->node.start,
PAGE_SIZE, 0);
- i915_vma_unlock(vma);
i915_request_add(rq);
if (err)
goto err_vma;
@@ -708,6 +714,7 @@ err_vma:
i915_gem_object_flush_map(vma->obj);
i915_gem_object_unpin_map(vma->obj);
i915_vma_unpin(vma);
+ i915_vma_unlock(vma);
i915_vma_put(vma);
st_engine_heartbeat_enable(engine);
@@ -781,7 +788,6 @@ int live_rps_frequency_srm(void *arg)
goto err_vma;
}
- i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, false);
if (!err)
err = i915_vma_move_to_active(vma, rq, 0);
@@ -789,7 +795,6 @@ int live_rps_frequency_srm(void *arg)
err = rq->engine->emit_bb_start(rq,
vma->node.start,
PAGE_SIZE, 0);
- i915_vma_unlock(vma);
i915_request_add(rq);
if (err)
goto err_vma;
@@ -849,6 +854,7 @@ err_vma:
i915_gem_object_flush_map(vma->obj);
i915_gem_object_unpin_map(vma->obj);
i915_vma_unpin(vma);
+ i915_vma_unlock(vma);
i915_vma_put(vma);
st_engine_heartbeat_enable(engine);
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index 6564c989dbee..96d164a3841d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -72,7 +72,7 @@ static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
unsigned long cacheline;
int err;
- tl = intel_timeline_create(state->gt, NULL);
+ tl = intel_timeline_create(state->gt);
if (IS_ERR(tl))
return PTR_ERR(tl);
@@ -455,7 +455,7 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
struct i915_request *rq;
int err;
- err = intel_timeline_pin(tl);
+ err = intel_timeline_pin(tl, NULL);
if (err) {
rq = ERR_PTR(err);
goto out;
@@ -487,7 +487,7 @@ checked_intel_timeline_create(struct intel_gt *gt)
{
struct intel_timeline *tl;
- tl = intel_timeline_create(gt, NULL);
+ tl = intel_timeline_create(gt);
if (IS_ERR(tl))
return tl;
@@ -660,14 +660,14 @@ static int live_hwsp_wrap(void *arg)
* foreign GPU references.
*/
- tl = intel_timeline_create(gt, NULL);
+ tl = intel_timeline_create(gt);
if (IS_ERR(tl))
return PTR_ERR(tl);
if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
goto out_free;
- err = intel_timeline_pin(tl);
+ err = intel_timeline_pin(tl, NULL);
if (err)
goto out_free;
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index febc9e6692ba..61a0532d0f3d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -214,7 +214,7 @@ static int check_whitelist(struct i915_gem_context *ctx,
return PTR_ERR(results);
err = 0;
- i915_gem_object_lock(results);
+ i915_gem_object_lock(results, NULL);
intel_wedge_on_timeout(&wedge, engine->gt, HZ / 5) /* safety net! */
err = i915_gem_object_set_to_cpu_domain(results, false);
i915_gem_object_unlock(results);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 861657897c0f..942c7c187adb 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -677,7 +677,7 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
goto err;
flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
- ret = i915_ggtt_pin(vma, 0, flags);
+ ret = i915_ggtt_pin(vma, NULL, 0, flags);
if (ret) {
vma = ERR_PTR(ret);
goto err;
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index f1940939260a..d0a599b51bfe 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -1923,6 +1923,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
if (ret)
goto err_unmap;
+ i915_gem_object_unlock(bb->obj);
INIT_LIST_HEAD(&bb->list);
list_add(&bb->list, &s->workload->shadow_bb);
@@ -2982,7 +2983,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
goto put_obj;
}
- i915_gem_object_lock(obj);
+ i915_gem_object_lock(obj, NULL);
ret = i915_gem_object_set_to_cpu_domain(obj, false);
i915_gem_object_unlock(obj);
if (ret) {
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 3c3b9842bbbd..1570eb8aa978 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -403,6 +403,14 @@ static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
wa_ctx->indirect_ctx.shadow_va = NULL;
}
+static void set_dma_address(struct i915_page_directory *pd, dma_addr_t addr)
+{
+ struct scatterlist *sg = pd->pt.base->mm.pages->sgl;
+
+ /* This is not a good idea */
+ sg->dma_address = addr;
+}
+
static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
struct intel_context *ce)
{
@@ -411,7 +419,7 @@ static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
int i = 0;
if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
- px_dma(ppgtt->pd) = mm->ppgtt_mm.shadow_pdps[0];
+ set_dma_address(ppgtt->pd, mm->ppgtt_mm.shadow_pdps[0]);
} else {
for (i = 0; i < GVT_RING_CTX_NR_PDPS; i++) {
struct i915_page_directory * const pd =
@@ -421,7 +429,8 @@ static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
shadow ppgtt. */
if (!pd)
break;
- px_dma(pd) = mm->ppgtt_mm.shadow_pdps[i];
+
+ set_dma_address(pd, mm->ppgtt_mm.shadow_pdps[i]);
}
}
}
@@ -1240,13 +1249,13 @@ i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s,
int i;
if (i915_vm_is_4lvl(&ppgtt->vm)) {
- px_dma(ppgtt->pd) = s->i915_context_pml4;
+ set_dma_address(ppgtt->pd, s->i915_context_pml4);
} else {
for (i = 0; i < GEN8_3LVL_PDPES; i++) {
struct i915_page_directory * const pd =
i915_pd_entry(ppgtt->pd, i);
- px_dma(pd) = s->i915_context_pdps[i];
+ set_dma_address(pd, s->i915_context_pdps[i]);
}
}
}
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 1e1253207425..b0a6522be3d1 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -28,12 +28,14 @@ static struct i915_global_active {
} global;
struct active_node {
+ struct rb_node node;
struct i915_active_fence base;
struct i915_active *ref;
- struct rb_node node;
u64 timeline;
};
+#define fetch_node(x) rb_entry(READ_ONCE(x), typeof(struct active_node), node)
+
static inline struct active_node *
node_from_active(struct i915_active_fence *active)
{
@@ -128,8 +130,8 @@ static inline void debug_active_assert(struct i915_active *ref) { }
static void
__active_retire(struct i915_active *ref)
{
+ struct rb_root root = RB_ROOT;
struct active_node *it, *n;
- struct rb_root root;
unsigned long flags;
GEM_BUG_ON(i915_active_is_idle(ref));
@@ -141,9 +143,25 @@ __active_retire(struct i915_active *ref)
GEM_BUG_ON(rcu_access_pointer(ref->excl.fence));
debug_active_deactivate(ref);
- root = ref->tree;
- ref->tree = RB_ROOT;
- ref->cache = NULL;
+ /* Even if we have not used the cache, we may still have a barrier */
+ if (!ref->cache)
+ ref->cache = fetch_node(ref->tree.rb_node);
+
+ /* Keep the MRU cached node for reuse */
+ if (ref->cache) {
+ /* Discard all other nodes in the tree */
+ rb_erase(&ref->cache->node, &ref->tree);
+ root = ref->tree;
+
+ /* Rebuild the tree with only the cached node */
+ rb_link_node(&ref->cache->node, NULL, &ref->tree.rb_node);
+ rb_insert_color(&ref->cache->node, &ref->tree);
+ GEM_BUG_ON(ref->tree.rb_node != &ref->cache->node);
+
+ /* Make the cached node available for reuse with any timeline */
+ if (IS_ENABLED(CONFIG_64BIT))
+ ref->cache->timeline = 0; /* needs cmpxchg(u64) */
+ }
spin_unlock_irqrestore(&ref->tree_lock, flags);
@@ -154,6 +172,7 @@ __active_retire(struct i915_active *ref)
/* ... except if you wait on it, you must manage your own references! */
wake_up_var(ref);
+ /* Finally free the discarded timeline tree */
rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
GEM_BUG_ON(i915_active_fence_isset(&it->base));
kmem_cache_free(global.slab_cache, it);
@@ -216,12 +235,11 @@ excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
active_retire(container_of(cb, struct i915_active, excl.cb));
}
-static struct i915_active_fence *
-active_instance(struct i915_active *ref, struct intel_timeline *tl)
+static struct active_node *__active_lookup(struct i915_active *ref, u64 idx)
{
- struct active_node *node, *prealloc;
- struct rb_node **p, *parent;
- u64 idx = tl->fence_context;
+ struct active_node *it;
+
+ GEM_BUG_ON(idx == 0); /* 0 is the unordered timeline, rsvd for cache */
/*
* We track the most recently used timeline to skip a rbtree search
@@ -230,8 +248,59 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl)
* after the previous activity has been retired, or if it matches the
* current timeline.
*/
- node = READ_ONCE(ref->cache);
- if (node && node->timeline == idx)
+ it = READ_ONCE(ref->cache);
+ if (it) {
+ u64 cached = READ_ONCE(it->timeline);
+
+ /* Once claimed, this slot will only belong to this idx */
+ if (cached == idx)
+ return it;
+
+#ifdef CONFIG_64BIT /* for cmpxchg(u64) */
+ /*
+ * An unclaimed cache [.timeline=0] can only be claimed once.
+ *
+ * If the value is already non-zero, some other thread has
+ * claimed the cache and we know that is does not match our
+ * idx. If, and only if, the timeline is currently zero is it
+ * worth competing to claim it atomically for ourselves (for
+ * only the winner of that race will cmpxchg return the old
+ * value of 0).
+ */
+ if (!cached && !cmpxchg(&it->timeline, 0, idx))
+ return it;
+#endif
+ }
+
+ BUILD_BUG_ON(offsetof(typeof(*it), node));
+
+ /* While active, the tree can only be built; not destroyed */
+ GEM_BUG_ON(i915_active_is_idle(ref));
+
+ it = fetch_node(ref->tree.rb_node);
+ while (it) {
+ if (it->timeline < idx) {
+ it = fetch_node(it->node.rb_right);
+ } else if (it->timeline > idx) {
+ it = fetch_node(it->node.rb_left);
+ } else {
+ WRITE_ONCE(ref->cache, it);
+ break;
+ }
+ }
+
+ /* NB: If the tree rotated beneath us, we may miss our target. */
+ return it;
+}
+
+static struct i915_active_fence *
+active_instance(struct i915_active *ref, u64 idx)
+{
+ struct active_node *node, *prealloc;
+ struct rb_node **p, *parent;
+
+ node = __active_lookup(ref, idx);
+ if (likely(node))
return &node->base;
/* Preallocate a replacement, just in case */
@@ -268,10 +337,9 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl)
rb_insert_color(&node->node, &ref->tree);
out:
- ref->cache = node;
+ WRITE_ONCE(ref->cache, node);
spin_unlock_irq(&ref->tree_lock);
- BUILD_BUG_ON(offsetof(typeof(*node), base));
return &node->base;
}
@@ -353,69 +421,116 @@ __active_del_barrier(struct i915_active *ref, struct active_node *node)
return ____active_del_barrier(ref, node, barrier_to_engine(node));
}
-int i915_active_ref(struct i915_active *ref,
- struct intel_timeline *tl,
- struct dma_fence *fence)
+static bool
+replace_barrier(struct i915_active *ref, struct i915_active_fence *active)
+{
+ if (!is_barrier(active)) /* proto-node used by our idle barrier? */
+ return false;
+
+ /*
+ * This request is on the kernel_context timeline, and so
+ * we can use it to substitute for the pending idle-barrer
+ * request that we want to emit on the kernel_context.
+ */
+ __active_del_barrier(ref, node_from_active(active));
+ return true;
+}
+
+int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence)
{
struct i915_active_fence *active;
int err;
- lockdep_assert_held(&tl->mutex);
-
/* Prevent reaping in case we malloc/wait while building the tree */
err = i915_active_acquire(ref);
if (err)
return err;
- active = active_instance(ref, tl);
+ active = active_instance(ref, idx);
if (!active) {
err = -ENOMEM;
goto out;
}
- if (is_barrier(active)) { /* proto-node used by our idle barrier */
- /*
- * This request is on the kernel_context timeline, and so
- * we can use it to substitute for the pending idle-barrer
- * request that we want to emit on the kernel_context.
- */
- __active_del_barrier(ref, node_from_active(active));
+ if (replace_barrier(ref, active)) {
RCU_INIT_POINTER(active->fence, NULL);
atomic_dec(&ref->count);
}
if (!__i915_active_fence_set(active, fence))
- atomic_inc(&ref->count);
+ __i915_active_acquire(ref);
out:
i915_active_release(ref);
return err;
}
-struct dma_fence *
-i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
+static struct dma_fence *
+__i915_active_set_fence(struct i915_active *ref,
+ struct i915_active_fence *active,
+ struct dma_fence *fence)
{
struct dma_fence *prev;
- /* We expect the caller to manage the exclusive timeline ordering */
- GEM_BUG_ON(i915_active_is_idle(ref));
+ if (replace_barrier(ref, active)) {
+ RCU_INIT_POINTER(active->fence, fence);
+ return NULL;
+ }
rcu_read_lock();
- prev = __i915_active_fence_set(&ref->excl, f);
+ prev = __i915_active_fence_set(active, fence);
if (prev)
prev = dma_fence_get_rcu(prev);
else
- atomic_inc(&ref->count);
+ __i915_active_acquire(ref);
rcu_read_unlock();
return prev;
}
+static struct i915_active_fence *
+__active_fence(struct i915_active *ref, u64 idx)
+{
+ struct active_node *it;
+
+ it = __active_lookup(ref, idx);
+ if (unlikely(!it)) { /* Contention with parallel tree builders! */
+ spin_lock_irq(&ref->tree_lock);
+ it = __active_lookup(ref, idx);
+ spin_unlock_irq(&ref->tree_lock);
+ }
+ GEM_BUG_ON(!it); /* slot must be preallocated */
+
+ return &it->base;
+}
+
+struct dma_fence *
+__i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence)
+{
+ /* Only valid while active, see i915_active_acquire_for_context() */
+ return __i915_active_set_fence(ref, __active_fence(ref, idx), fence);
+}
+
+struct dma_fence *
+i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
+{
+ /* We expect the caller to manage the exclusive timeline ordering */
+ return __i915_active_set_fence(ref, &ref->excl, f);
+}
+
bool i915_active_acquire_if_busy(struct i915_active *ref)
{
debug_active_assert(ref);
return atomic_add_unless(&ref->count, 1, 0);
}
+static void __i915_active_activate(struct i915_active *ref)
+{
+ spin_lock_irq(&ref->tree_lock); /* __active_retire() */
+ if (!atomic_fetch_inc(&ref->count))
+ debug_active_activate(ref);
+ spin_unlock_irq(&ref->tree_lock);
+}
+
int i915_active_acquire(struct i915_active *ref)
{
int err;
@@ -423,19 +538,19 @@ int i915_active_acquire(struct i915_active *ref)
if (i915_active_acquire_if_busy(ref))
return 0;
+ if (!ref->active) {
+ __i915_active_activate(ref);
+ return 0;
+ }
+
err = mutex_lock_interruptible(&ref->mutex);
if (err)
return err;
if (likely(!i915_active_acquire_if_busy(ref))) {
- if (ref->active)
- err = ref->active(ref);
- if (!err) {
- spin_lock_irq(&ref->tree_lock); /* __active_retire() */
- debug_active_activate(ref);
- atomic_inc(&ref->count);
- spin_unlock_irq(&ref->tree_lock);
- }
+ err = ref->active(ref);
+ if (!err)
+ __i915_active_activate(ref);
}
mutex_unlock(&ref->mutex);
@@ -443,6 +558,24 @@ int i915_active_acquire(struct i915_active *ref)
return err;
}
+int i915_active_acquire_for_context(struct i915_active *ref, u64 idx)
+{
+ struct i915_active_fence *active;
+ int err;
+
+ err = i915_active_acquire(ref);
+ if (err)
+ return err;
+
+ active = active_instance(ref, idx);
+ if (!active) {
+ i915_active_release(ref);
+ return -ENOMEM;
+ }
+
+ return 0; /* return with active ref */
+}
+
void i915_active_release(struct i915_active *ref)
{
debug_active_assert(ref);
@@ -651,16 +784,16 @@ int i915_sw_fence_await_active(struct i915_sw_fence *fence,
return await_active(ref, flags, sw_await_fence, fence, fence);
}
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
void i915_active_fini(struct i915_active *ref)
{
debug_active_fini(ref);
GEM_BUG_ON(atomic_read(&ref->count));
GEM_BUG_ON(work_pending(&ref->work));
- GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
mutex_destroy(&ref->mutex);
+
+ if (ref->cache)
+ kmem_cache_free(global.slab_cache, ref->cache);
}
-#endif
static inline bool is_idle_barrier(struct active_node *node, u64 idx)
{
@@ -674,7 +807,6 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
if (RB_EMPTY_ROOT(&ref->tree))
return NULL;
- spin_lock_irq(&ref->tree_lock);
GEM_BUG_ON(i915_active_is_idle(ref));
/*
@@ -700,9 +832,9 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
prev = p;
if (node->timeline < idx)
- p = p->rb_right;
+ p = READ_ONCE(p->rb_right);
else
- p = p->rb_left;
+ p = READ_ONCE(p->rb_left);
}
/*
@@ -739,14 +871,13 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
goto match;
}
- spin_unlock_irq(&ref->tree_lock);
-
return NULL;
match:
+ spin_lock_irq(&ref->tree_lock);
rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */
if (p == &ref->cache->node)
- ref->cache = NULL;
+ WRITE_ONCE(ref->cache, NULL);
spin_unlock_irq(&ref->tree_lock);
return rb_entry(p, struct active_node, node);
@@ -777,7 +908,9 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
struct llist_node *prev = first;
struct active_node *node;
+ rcu_read_lock();
node = reuse_idle_barrier(ref, idx);
+ rcu_read_unlock();
if (!node) {
node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
if (!node)
@@ -801,7 +934,7 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
*/
RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN));
node->base.cb.node.prev = (void *)engine;
- atomic_inc(&ref->count);
+ __i915_active_acquire(ref);
}
GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN));
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index cf4058150966..fb165d3f01cf 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -163,14 +163,16 @@ void __i915_active_init(struct i915_active *ref,
__i915_active_init(ref, active, retire, &__mkey, &__wkey); \
} while (0)
-int i915_active_ref(struct i915_active *ref,
- struct intel_timeline *tl,
- struct dma_fence *fence);
+struct dma_fence *
+__i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence);
+int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence);
static inline int
i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
{
- return i915_active_ref(ref, i915_request_timeline(rq), &rq->fence);
+ return i915_active_ref(ref,
+ i915_request_timeline(rq)->fence_context,
+ &rq->fence);
}
struct dma_fence *
@@ -198,7 +200,9 @@ int i915_request_await_active(struct i915_request *rq,
#define I915_ACTIVE_AWAIT_BARRIER BIT(2)
int i915_active_acquire(struct i915_active *ref);
+int i915_active_acquire_for_context(struct i915_active *ref, u64 idx);
bool i915_active_acquire_if_busy(struct i915_active *ref);
+
void i915_active_release(struct i915_active *ref);
static inline void __i915_active_acquire(struct i915_active *ref)
@@ -213,11 +217,7 @@ i915_active_is_idle(const struct i915_active *ref)
return !atomic_read(&ref->count);
}
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
void i915_active_fini(struct i915_active *ref);
-#else
-static inline void i915_active_fini(struct i915_active *ref) { }
-#endif
int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
struct intel_engine_cs *engine);
@@ -231,4 +231,19 @@ struct i915_active *i915_active_create(void);
struct i915_active *i915_active_get(struct i915_active *ref);
void i915_active_put(struct i915_active *ref);
+static inline int __i915_request_await_exclusive(struct i915_request *rq,
+ struct i915_active *active)
+{
+ struct dma_fence *fence;
+ int err = 0;
+
+ fence = i915_active_fence_get(&active->excl);
+ if (fence) {
+ err = i915_request_await_dma_fence(rq, fence);
+ dma_fence_put(fence);
+ }
+
+ return err;
+}
+
#endif /* _I915_ACTIVE_H_ */
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index e6918c7c0709..00292a849c34 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1075,6 +1075,7 @@ static void i915_driver_release(struct drm_device *dev)
intel_memory_regions_driver_release(dev_priv);
i915_ggtt_driver_release(dev_priv);
+ i915_gem_drain_freed_objects(dev_priv);
i915_driver_mmio_release(dev_priv);
@@ -1119,7 +1120,6 @@ static void i915_driver_postclose(struct drm_device *dev, struct drm_file *file)
struct drm_i915_file_private *file_priv = file->driver_priv;
i915_gem_context_close(file);
- i915_gem_release(dev, file);
kfree_rcu(file_priv, rcu);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1314e0e92c41..ab17084af0ff 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -203,11 +203,6 @@ struct drm_i915_file_private {
struct rcu_head rcu;
};
- struct {
- spinlock_t lock;
- struct list_head request_list;
- } mm;
-
struct xarray context_xa;
struct xarray vm_xa;
@@ -593,11 +588,6 @@ struct i915_gem_mm {
atomic_t free_count;
/**
- * Small stash of WC pages
- */
- struct pagestash wc_stash;
-
- /**
* tmpfs instance used for shmem backed objects
*/
struct vfsmount *gemfs;
@@ -1826,11 +1816,18 @@ static inline void i915_gem_drain_workqueue(struct drm_i915_private *i915)
}
struct i915_vma * __must_check
+i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj,
+ struct i915_gem_ww_ctx *ww,
+ const struct i915_ggtt_view *view,
+ u64 size, u64 alignment, u64 flags);
+
+static inline struct i915_vma * __must_check
i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
const struct i915_ggtt_view *view,
- u64 size,
- u64 alignment,
- u64 flags);
+ u64 size, u64 alignment, u64 flags)
+{
+ return i915_gem_object_ggtt_pin_ww(obj, NULL, view, size, alignment, flags);
+}
int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
unsigned long flags);
@@ -1867,7 +1864,6 @@ void i915_gem_suspend_late(struct drm_i915_private *dev_priv);
void i915_gem_resume(struct drm_i915_private *dev_priv);
int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file);
-void i915_gem_release(struct drm_device *dev, struct drm_file *file);
int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
enum i915_cache_level cache_level);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9aa3066cb75d..bb0c12975f38 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -335,12 +335,20 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
u64 remain;
int ret;
- ret = i915_gem_object_prepare_read(obj, &needs_clflush);
+ ret = i915_gem_object_lock_interruptible(obj, NULL);
if (ret)
return ret;
+ ret = i915_gem_object_prepare_read(obj, &needs_clflush);
+ if (ret) {
+ i915_gem_object_unlock(obj);
+ return ret;
+ }
+
fence = i915_gem_object_lock_fence(obj);
i915_gem_object_finish_access(obj);
+ i915_gem_object_unlock(obj);
+
if (!fence)
return -ENOMEM;
@@ -420,7 +428,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
GEM_BUG_ON(!drm_mm_node_allocated(&node));
}
- ret = i915_gem_object_lock_interruptible(obj);
+ ret = i915_gem_object_lock_interruptible(obj, NULL);
if (ret)
goto out_unpin;
@@ -619,7 +627,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
GEM_BUG_ON(!drm_mm_node_allocated(&node));
}
- ret = i915_gem_object_lock_interruptible(obj);
+ ret = i915_gem_object_lock_interruptible(obj, NULL);
if (ret)
goto out_unpin;
@@ -734,12 +742,20 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
u64 remain;
int ret;
- ret = i915_gem_object_prepare_write(obj, &needs_clflush);
+ ret = i915_gem_object_lock_interruptible(obj, NULL);
if (ret)
return ret;
+ ret = i915_gem_object_prepare_write(obj, &needs_clflush);
+ if (ret) {
+ i915_gem_object_unlock(obj);
+ return ret;
+ }
+
fence = i915_gem_object_lock_fence(obj);
i915_gem_object_finish_access(obj);
+ i915_gem_object_unlock(obj);
+
if (!fence)
return -ENOMEM;
@@ -946,11 +962,10 @@ static void discard_ggtt_vma(struct i915_vma *vma)
}
struct i915_vma *
-i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
- const struct i915_ggtt_view *view,
- u64 size,
- u64 alignment,
- u64 flags)
+i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj,
+ struct i915_gem_ww_ctx *ww,
+ const struct i915_ggtt_view *view,
+ u64 size, u64 alignment, u64 flags)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct i915_ggtt *ggtt = &i915->ggtt;
@@ -1016,7 +1031,7 @@ new_vma:
return ERR_PTR(ret);
}
- ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
+ ret = i915_vma_pin_ww(vma, ww, size, alignment, flags | PIN_GLOBAL);
if (ret)
return ERR_PTR(ret);
@@ -1290,7 +1305,7 @@ int i915_gem_freeze_late(struct drm_i915_private *i915)
i915_gem_drain_freed_objects(i915);
list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) {
- i915_gem_object_lock(obj);
+ i915_gem_object_lock(obj, NULL);
drm_WARN_ON(&i915->drm,
i915_gem_object_set_to_cpu_domain(obj, true));
i915_gem_object_unlock(obj);
@@ -1301,21 +1316,6 @@ int i915_gem_freeze_late(struct drm_i915_private *i915)
return 0;
}
-void i915_gem_release(struct drm_device *dev, struct drm_file *file)
-{
- struct drm_i915_file_private *file_priv = file->driver_priv;
- struct i915_request *request;
-
- /* Clean up our request list when the client is going away, so that
- * later retire_requests won't dereference our soon-to-be-gone
- * file_priv.
- */
- spin_lock(&file_priv->mm.lock);
- list_for_each_entry(request, &file_priv->mm.request_list, client_link)
- request->file_priv = NULL;
- spin_unlock(&file_priv->mm.lock);
-}
-
int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
{
struct drm_i915_file_private *file_priv;
@@ -1331,9 +1331,6 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
file_priv->dev_priv = i915;
file_priv->file = file;
- spin_lock_init(&file_priv->mm.lock);
- INIT_LIST_HEAD(&file_priv->mm.request_list);
-
file_priv->bsd_engine = -1;
file_priv->hang_timestamp = jiffies;
@@ -1344,6 +1341,58 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
return ret;
}
+void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ww, bool intr)
+{
+ ww_acquire_init(&ww->ctx, &reservation_ww_class);
+ INIT_LIST_HEAD(&ww->obj_list);
+ ww->intr = intr;
+ ww->contended = NULL;
+}
+
+static void i915_gem_ww_ctx_unlock_all(struct i915_gem_ww_ctx *ww)
+{
+ struct drm_i915_gem_object *obj;
+
+ while ((obj = list_first_entry_or_null(&ww->obj_list, struct drm_i915_gem_object, obj_link))) {
+ list_del(&obj->obj_link);
+ i915_gem_object_unlock(obj);
+ }
+}
+
+void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj)
+{
+ list_del(&obj->obj_link);
+ i915_gem_object_unlock(obj);
+}
+
+void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ww)
+{
+ i915_gem_ww_ctx_unlock_all(ww);
+ WARN_ON(ww->contended);
+ ww_acquire_fini(&ww->ctx);
+}
+
+int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ww)
+{
+ int ret = 0;
+
+ if (WARN_ON(!ww->contended))
+ return -EINVAL;
+
+ i915_gem_ww_ctx_unlock_all(ww);
+ if (ww->intr)
+ ret = dma_resv_lock_slow_interruptible(ww->contended->base.resv, &ww->ctx);
+ else
+ dma_resv_lock_slow(ww->contended->base.resv, &ww->ctx);
+
+ if (!ret)
+ list_add_tail(&ww->contended->obj_link, &ww->obj_list);
+
+ ww->contended = NULL;
+
+ return ret;
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/mock_gem_device.c"
#include "selftests/i915_gem.c"
diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index f333e88a2b6e..a4cad3f154ca 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -116,4 +116,16 @@ static inline bool __tasklet_is_scheduled(struct tasklet_struct *t)
return test_bit(TASKLET_STATE_SCHED, &t->state);
}
+struct i915_gem_ww_ctx {
+ struct ww_acquire_ctx ctx;
+ struct list_head obj_list;
+ bool intr;
+ struct drm_i915_gem_object *contended;
+};
+
+void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ctx, bool intr);
+void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ctx);
+int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ctx);
+void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj);
+
#endif /* __I915_GEM_H__ */
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 1fa67700d8f4..f113fe44572b 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -41,6 +41,7 @@
#include "display/intel_lpe_audio.h"
#include "display/intel_psr.h"
+#include "gt/intel_breadcrumbs.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_irq.h"
#include "gt/intel_gt_pm_irq.h"
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index c6f6370283cf..e94976976571 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1195,24 +1195,39 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
struct i915_gem_engines_iter it;
struct i915_gem_context *ctx = stream->ctx;
struct intel_context *ce;
- int err;
+ struct i915_gem_ww_ctx ww;
+ int err = -ENODEV;
for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
if (ce->engine != stream->engine) /* first match! */
continue;
- /*
- * As the ID is the gtt offset of the context's vma we
- * pin the vma to ensure the ID remains fixed.
- */
- err = intel_context_pin(ce);
- if (err == 0) {
- stream->pinned_ctx = ce;
- break;
- }
+ err = 0;
+ break;
}
i915_gem_context_unlock_engines(ctx);
+ if (err)
+ return ERR_PTR(err);
+
+ i915_gem_ww_ctx_init(&ww, true);
+retry:
+ /*
+ * As the ID is the gtt offset of the context's vma we
+ * pin the vma to ensure the ID remains fixed.
+ */
+ err = intel_context_pin_ww(ce, &ww);
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
+
+ if (err)
+ return ERR_PTR(err);
+
+ stream->pinned_ctx = ce;
return stream->pinned_ctx;
}
@@ -1923,15 +1938,22 @@ emit_oa_config(struct i915_perf_stream *stream,
{
struct i915_request *rq;
struct i915_vma *vma;
+ struct i915_gem_ww_ctx ww;
int err;
vma = get_oa_vma(stream, oa_config);
if (IS_ERR(vma))
return PTR_ERR(vma);
- err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ i915_gem_ww_ctx_init(&ww, true);
+retry:
+ err = i915_gem_object_lock(vma->obj, &ww);
+ if (err)
+ goto err;
+
+ err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_GLOBAL | PIN_HIGH);
if (err)
- goto err_vma_put;
+ goto err;
intel_engine_pm_get(ce->engine);
rq = i915_request_create(ce);
@@ -1953,11 +1975,9 @@ emit_oa_config(struct i915_perf_stream *stream,
goto err_add_request;
}
- i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, 0);
if (!err)
err = i915_vma_move_to_active(vma, rq, 0);
- i915_vma_unlock(vma);
if (err)
goto err_add_request;
@@ -1971,7 +1991,14 @@ err_add_request:
i915_request_add(rq);
err_vma_unpin:
i915_vma_unpin(vma);
-err_vma_put:
+err:
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err)
+ goto retry;
+ }
+
+ i915_gem_ww_ctx_fini(&ww);
i915_vma_put(vma);
return err;
}
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 7a05850ca931..11e272422fb7 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -31,6 +31,7 @@
#include <linux/sched/signal.h>
#include "gem/i915_gem_context.h"
+#include "gt/intel_breadcrumbs.h"
#include "gt/intel_context.h"
#include "gt/intel_ring.h"
#include "gt/intel_rps.h"
@@ -186,48 +187,34 @@ static void irq_execute_cb_hook(struct irq_work *wrk)
irq_execute_cb(wrk);
}
-static void __notify_execute_cb(struct i915_request *rq)
+static __always_inline void
+__notify_execute_cb(struct i915_request *rq, bool (*fn)(struct irq_work *wrk))
{
struct execute_cb *cb, *cn;
- lockdep_assert_held(&rq->lock);
-
- GEM_BUG_ON(!i915_request_is_active(rq));
if (llist_empty(&rq->execute_cb))
return;
- llist_for_each_entry_safe(cb, cn, rq->execute_cb.first, work.llnode)
- irq_work_queue(&cb->work);
-
- /*
- * XXX Rollback on __i915_request_unsubmit()
- *
- * In the future, perhaps when we have an active time-slicing scheduler,
- * it will be interesting to unsubmit parallel execution and remove
- * busywaits from the GPU until their master is restarted. This is
- * quite hairy, we have to carefully rollback the fence and do a
- * preempt-to-idle cycle on the target engine, all the while the
- * master execute_cb may refire.
- */
- init_llist_head(&rq->execute_cb);
+ llist_for_each_entry_safe(cb, cn,
+ llist_del_all(&rq->execute_cb),
+ work.llnode)
+ fn(&cb->work);
}
-static inline void
-remove_from_client(struct i915_request *request)
+static void __notify_execute_cb_irq(struct i915_request *rq)
{
- struct drm_i915_file_private *file_priv;
+ __notify_execute_cb(rq, irq_work_queue);
+}
- if (!READ_ONCE(request->file_priv))
- return;
+static bool irq_work_imm(struct irq_work *wrk)
+{
+ wrk->func(wrk);
+ return false;
+}
- rcu_read_lock();
- file_priv = xchg(&request->file_priv, NULL);
- if (file_priv) {
- spin_lock(&file_priv->mm.lock);
- list_del(&request->client_link);
- spin_unlock(&file_priv->mm.lock);
- }
- rcu_read_unlock();
+static void __notify_execute_cb_imm(struct i915_request *rq)
+{
+ __notify_execute_cb(rq, irq_work_imm);
}
static void free_capture_list(struct i915_request *request)
@@ -274,9 +261,16 @@ static void remove_from_engine(struct i915_request *rq)
locked = engine;
}
list_del_init(&rq->sched.link);
+
clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
+
+ /* Prevent further __await_execution() registering a cb, then flush */
+ set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
+
spin_unlock_irq(&locked->active.lock);
+
+ __notify_execute_cb_imm(rq);
}
bool i915_request_retire(struct i915_request *rq)
@@ -288,6 +282,7 @@ bool i915_request_retire(struct i915_request *rq)
GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
trace_i915_request_retire(rq);
+ i915_request_mark_complete(rq);
/*
* We know the GPU must have read the request to have
@@ -305,32 +300,30 @@ bool i915_request_retire(struct i915_request *rq)
__i915_request_fill(rq, POISON_FREE);
rq->ring->head = rq->postfix;
+ if (!i915_request_signaled(rq)) {
+ spin_lock_irq(&rq->lock);
+ dma_fence_signal_locked(&rq->fence);
+ spin_unlock_irq(&rq->lock);
+ }
+
+ if (i915_request_has_waitboost(rq)) {
+ GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters));
+ atomic_dec(&rq->engine->gt->rps.num_waiters);
+ }
+
/*
* We only loosely track inflight requests across preemption,
* and so we may find ourselves attempting to retire a _completed_
* request that we have removed from the HW and put back on a run
* queue.
+ *
+ * As we set I915_FENCE_FLAG_ACTIVE on the request, this should be
+ * after removing the breadcrumb and signaling it, so that we do not
+ * inadvertently attach the breadcrumb to a completed request.
*/
remove_from_engine(rq);
-
- spin_lock_irq(&rq->lock);
- i915_request_mark_complete(rq);
- if (!i915_request_signaled(rq))
- dma_fence_signal_locked(&rq->fence);
- if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
- i915_request_cancel_breadcrumb(rq);
- if (i915_request_has_waitboost(rq)) {
- GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters));
- atomic_dec(&rq->engine->gt->rps.num_waiters);
- }
- if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
- set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
- __notify_execute_cb(rq);
- }
GEM_BUG_ON(!llist_empty(&rq->execute_cb));
- spin_unlock_irq(&rq->lock);
- remove_from_client(rq);
__list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */
intel_context_exit(rq->context);
@@ -357,12 +350,6 @@ void i915_request_retire_upto(struct i915_request *rq)
} while (i915_request_retire(tmp) && tmp != rq);
}
-static void __llist_add(struct llist_node *node, struct llist_head *head)
-{
- node->next = head->first;
- head->first = node;
-}
-
static struct i915_request * const *
__engine_active(struct intel_engine_cs *engine)
{
@@ -388,17 +375,38 @@ static bool __request_in_flight(const struct i915_request *signal)
* As we know that there are always preemption points between
* requests, we know that only the currently executing request
* may be still active even though we have cleared the flag.
- * However, we can't rely on our tracking of ELSP[0] to known
+ * However, we can't rely on our tracking of ELSP[0] to know
* which request is currently active and so maybe stuck, as
* the tracking maybe an event behind. Instead assume that
* if the context is still inflight, then it is still active
* even if the active flag has been cleared.
+ *
+ * To further complicate matters, if there a pending promotion, the HW
+ * may either perform a context switch to the second inflight execlists,
+ * or it may switch to the pending set of execlists. In the case of the
+ * latter, it may send the ACK and we process the event copying the
+ * pending[] over top of inflight[], _overwriting_ our *active. Since
+ * this implies the HW is arbitrating and not struck in *active, we do
+ * not worry about complete accuracy, but we do require no read/write
+ * tearing of the pointer [the read of the pointer must be valid, even
+ * as the array is being overwritten, for which we require the writes
+ * to avoid tearing.]
+ *
+ * Note that the read of *execlists->active may race with the promotion
+ * of execlists->pending[] to execlists->inflight[], overwritting
+ * the value at *execlists->active. This is fine. The promotion implies
+ * that we received an ACK from the HW, and so the context is not
+ * stuck -- if we do not see ourselves in *active, the inflight status
+ * is valid. If instead we see ourselves being copied into *active,
+ * we are inflight and may signal the callback.
*/
if (!intel_context_inflight(signal->context))
return false;
rcu_read_lock();
- for (port = __engine_active(signal->engine); (rq = *port); port++) {
+ for (port = __engine_active(signal->engine);
+ (rq = READ_ONCE(*port)); /* may race with promotion of pending[] */
+ port++) {
if (rq->context == signal->context) {
inflight = i915_seqno_passed(rq->fence.seqno,
signal->fence.seqno);
@@ -439,18 +447,24 @@ __await_execution(struct i915_request *rq,
cb->work.func = irq_execute_cb_hook;
}
- spin_lock_irq(&signal->lock);
- if (i915_request_is_active(signal) || __request_in_flight(signal)) {
- if (hook) {
- hook(rq, &signal->fence);
- i915_request_put(signal);
- }
- i915_sw_fence_complete(cb->fence);
- kmem_cache_free(global.slab_execute_cbs, cb);
- } else {
- __llist_add(&cb->work.llnode, &signal->execute_cb);
+ /*
+ * Register the callback first, then see if the signaler is already
+ * active. This ensures that if we race with the
+ * __notify_execute_cb from i915_request_submit() and we are not
+ * included in that list, we get a second bite of the cherry and
+ * execute it ourselves. After this point, a future
+ * i915_request_submit() will notify us.
+ *
+ * In i915_request_retire() we set the ACTIVE bit on a completed
+ * request (then flush the execute_cb). So by registering the
+ * callback first, then checking the ACTIVE bit, we serialise with
+ * the completed/retired request.
+ */
+ if (llist_add(&cb->work.llnode, &signal->execute_cb)) {
+ if (i915_request_is_active(signal) ||
+ __request_in_flight(signal))
+ __notify_execute_cb_imm(signal);
}
- spin_unlock_irq(&signal->lock);
return 0;
}
@@ -566,18 +580,28 @@ xfer:
clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags);
}
+ /*
+ * XXX Rollback bonded-execution on __i915_request_unsubmit()?
+ *
+ * In the future, perhaps when we have an active time-slicing scheduler,
+ * it will be interesting to unsubmit parallel execution and remove
+ * busywaits from the GPU until their master is restarted. This is
+ * quite hairy, we have to carefully rollback the fence and do a
+ * preempt-to-idle cycle on the target engine, all the while the
+ * master execute_cb may refire.
+ */
+ __notify_execute_cb_irq(request);
+
/* We may be recursing from the signal callback of another i915 fence */
if (!i915_request_signaled(request)) {
spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
- __notify_execute_cb(request);
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
&request->fence.flags) &&
!i915_request_enable_breadcrumb(request))
intel_engine_signal_breadcrumbs(engine);
spin_unlock(&request->lock);
- GEM_BUG_ON(!llist_empty(&request->execute_cb));
}
return result;
@@ -600,27 +624,27 @@ void __i915_request_unsubmit(struct i915_request *request)
{
struct intel_engine_cs *engine = request->engine;
+ /*
+ * Only unwind in reverse order, required so that the per-context list
+ * is kept in seqno/ring order.
+ */
RQ_TRACE(request, "\n");
GEM_BUG_ON(!irqs_disabled());
lockdep_assert_held(&engine->active.lock);
/*
- * Only unwind in reverse order, required so that the per-context list
- * is kept in seqno/ring order.
+ * Before we remove this breadcrumb from the signal list, we have
+ * to ensure that a concurrent dma_fence_enable_signaling() does not
+ * attach itself. We first mark the request as no longer active and
+ * make sure that is visible to other cores, and then remove the
+ * breadcrumb if attached.
*/
-
- /* We may be recursing from the signal callback of another i915 fence */
- spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
-
+ GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
+ clear_bit_unlock(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
i915_request_cancel_breadcrumb(request);
- GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
- clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
-
- spin_unlock(&request->lock);
-
/* We've already spun, don't charge on resubmitting. */
if (request->sched.semaphores && i915_request_started(request))
request->sched.semaphores = 0;
@@ -757,7 +781,6 @@ static void __i915_request_ctor(void *arg)
dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, 0, 0);
- rq->file_priv = NULL;
rq->capture_list = NULL;
init_llist_head(&rq->execute_cb);
@@ -847,7 +870,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
/* No zalloc, everything must be cleared after use */
rq->batch = NULL;
- GEM_BUG_ON(rq->file_priv);
GEM_BUG_ON(rq->capture_list);
GEM_BUG_ON(!llist_empty(&rq->execute_cb));
@@ -1640,7 +1662,7 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu)
return this_cpu != cpu;
}
-static bool __i915_spin_request(const struct i915_request * const rq, int state)
+static bool __i915_spin_request(struct i915_request * const rq, int state)
{
unsigned long timeout_ns;
unsigned int cpu;
@@ -1673,7 +1695,7 @@ static bool __i915_spin_request(const struct i915_request * const rq, int state)
timeout_ns = READ_ONCE(rq->engine->props.max_busywait_duration_ns);
timeout_ns += local_clock_ns(&cpu);
do {
- if (i915_request_completed(rq))
+ if (dma_fence_is_signaled(&rq->fence))
return true;
if (signal_pending_state(state, current))
@@ -1697,7 +1719,7 @@ static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb)
{
struct request_wait *wait = container_of(cb, typeof(*wait), cb);
- wake_up_process(wait->tsk);
+ wake_up_process(fetch_and_zero(&wait->tsk));
}
/**
@@ -1766,10 +1788,8 @@ long i915_request_wait(struct i915_request *rq,
* duration, which we currently lack.
*/
if (IS_ACTIVE(CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT) &&
- __i915_spin_request(rq, state)) {
- dma_fence_signal(&rq->fence);
+ __i915_spin_request(rq, state))
goto out;
- }
/*
* This client is about to stall waiting for the GPU. In many cases
@@ -1790,15 +1810,29 @@ long i915_request_wait(struct i915_request *rq,
if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake))
goto out;
+ /*
+ * Flush the submission tasklet, but only if it may help this request.
+ *
+ * We sometimes experience some latency between the HW interrupts and
+ * tasklet execution (mostly due to ksoftirqd latency, but it can also
+ * be due to lazy CS events), so lets run the tasklet manually if there
+ * is a chance it may submit this request. If the request is not ready
+ * to run, as it is waiting for other fences to be signaled, flushing
+ * the tasklet is busy work without any advantage for this client.
+ *
+ * If the HW is being lazy, this is the last chance before we go to
+ * sleep to catch any pending events. We will check periodically in
+ * the heartbeat to flush the submission tasklets as a last resort
+ * for unhappy HW.
+ */
+ if (i915_request_is_ready(rq))
+ intel_engine_flush_submission(rq->engine);
+
for (;;) {
set_current_state(state);
- if (i915_request_completed(rq)) {
- dma_fence_signal(&rq->fence);
+ if (dma_fence_is_signaled(&rq->fence))
break;
- }
-
- intel_engine_flush_submission(rq->engine);
if (signal_pending_state(state, current)) {
timeout = -ERESTARTSYS;
@@ -1814,7 +1848,9 @@ long i915_request_wait(struct i915_request *rq,
}
__set_current_state(TASK_RUNNING);
- dma_fence_remove_callback(&rq->fence, &wait.cb);
+ if (READ_ONCE(wait.tsk))
+ dma_fence_remove_callback(&rq->fence, &wait.cb);
+ GEM_BUG_ON(!list_empty(&wait.cb.node));
out:
mutex_release(&rq->engine->gt->reset.mutex.dep_map, _THIS_IP_);
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 590762820761..16b721080195 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -284,10 +284,6 @@ struct i915_request {
/** timeline->request entry for this request */
struct list_head link;
- struct drm_i915_file_private *file_priv;
- /** file_priv list entry for this request */
- struct list_head client_link;
-
I915_SELFTEST_DECLARE(struct {
struct list_head link;
unsigned long delay;
@@ -365,10 +361,6 @@ void i915_request_submit(struct i915_request *request);
void __i915_request_unsubmit(struct i915_request *request);
void i915_request_unsubmit(struct i915_request *request);
-/* Note: part of the intel_breadcrumbs family */
-bool i915_request_enable_breadcrumb(struct i915_request *request);
-void i915_request_cancel_breadcrumb(struct i915_request *request);
-
long i915_request_wait(struct i915_request *rq,
unsigned int flags,
long timeout)
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index 295b9829e2da..4cd2038cbe35 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -164,9 +164,13 @@ static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence,
do {
list_for_each_entry_safe(pos, next, &x->head, entry) {
- pos->func(pos,
- TASK_NORMAL, fence->error,
- &extra);
+ int wake_flags;
+
+ wake_flags = fence->error;
+ if (pos->func == autoremove_wake_function)
+ wake_flags = 0;
+
+ pos->func(pos, TASK_NORMAL, wake_flags, &extra);
}
if (list_empty(&extra))
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index bc64f773dcdb..495d28f6d160 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -291,6 +291,8 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
struct i915_vma_work {
struct dma_fence_work base;
+ struct i915_address_space *vm;
+ struct i915_vm_pt_stash stash;
struct i915_vma *vma;
struct drm_i915_gem_object *pinned;
struct i915_sw_dma_fence_cb cb;
@@ -302,13 +304,10 @@ static int __vma_bind(struct dma_fence_work *work)
{
struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
struct i915_vma *vma = vw->vma;
- int err;
-
- err = vma->ops->bind_vma(vma->vm, vma, vw->cache_level, vw->flags);
- if (err)
- atomic_or(I915_VMA_ERROR, &vma->flags);
- return err;
+ vma->ops->bind_vma(vw->vm, &vw->stash,
+ vma, vw->cache_level, vw->flags);
+ return 0;
}
static void __vma_release(struct dma_fence_work *work)
@@ -317,6 +316,9 @@ static void __vma_release(struct dma_fence_work *work)
if (vw->pinned)
__i915_gem_object_unpin_pages(vw->pinned);
+
+ i915_vm_free_pt_stash(vw->vm, &vw->stash);
+ i915_vm_put(vw->vm);
}
static const struct dma_fence_work_ops bind_ops = {
@@ -376,7 +378,6 @@ int i915_vma_bind(struct i915_vma *vma,
{
u32 bind_flags;
u32 vma_flags;
- int ret;
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
GEM_BUG_ON(vma->size > vma->node.size);
@@ -433,9 +434,7 @@ int i915_vma_bind(struct i915_vma *vma,
work->pinned = vma->obj;
}
} else {
- ret = vma->ops->bind_vma(vma->vm, vma, cache_level, bind_flags);
- if (ret)
- return ret;
+ vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
}
atomic_or(bind_flags, &vma->flags);
@@ -853,13 +852,19 @@ static void vma_unbind_pages(struct i915_vma *vma)
__vma_put_pages(vma, count | count << I915_VMA_PAGES_BIAS);
}
-int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
+int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
+ u64 size, u64 alignment, u64 flags)
{
struct i915_vma_work *work = NULL;
intel_wakeref_t wakeref = 0;
unsigned int bound;
int err;
+#ifdef CONFIG_PROVE_LOCKING
+ if (debug_locks && lockdep_is_held(&vma->vm->i915->drm.struct_mutex))
+ WARN_ON(!ww);
+#endif
+
BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND);
@@ -873,16 +878,30 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
if (err)
return err;
+ if (flags & PIN_GLOBAL)
+ wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
+
if (flags & vma->vm->bind_async_flags) {
work = i915_vma_work();
if (!work) {
err = -ENOMEM;
- goto err_pages;
+ goto err_rpm;
}
- }
- if (flags & PIN_GLOBAL)
- wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
+ work->vm = i915_vm_get(vma->vm);
+
+ /* Allocate enough page directories to used PTE */
+ if (vma->vm->allocate_va_range) {
+ i915_vm_alloc_pt_stash(vma->vm,
+ &work->stash,
+ vma->size);
+
+ err = i915_vm_pin_pt_stash(vma->vm,
+ &work->stash);
+ if (err)
+ goto err_fence;
+ }
+ }
/*
* Differentiate between user/kernel vma inside the aliasing-ppgtt.
@@ -971,9 +990,9 @@ err_unlock:
err_fence:
if (work)
dma_fence_work_commit_imm(&work->base);
+err_rpm:
if (wakeref)
intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
-err_pages:
vma_put_pages(vma);
return err;
}
@@ -989,7 +1008,8 @@ static void flush_idle_contexts(struct intel_gt *gt)
intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT);
}
-int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags)
+int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
+ u32 align, unsigned int flags)
{
struct i915_address_space *vm = vma->vm;
int err;
@@ -997,7 +1017,7 @@ int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags)
GEM_BUG_ON(!i915_vma_is_ggtt(vma));
do {
- err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL);
+ err = i915_vma_pin_ww(vma, ww, 0, align, flags | PIN_GLOBAL);
if (err != -ENOSPC) {
if (!err) {
err = i915_vma_wait_for_bind(vma);
@@ -1167,6 +1187,12 @@ void i915_vma_revoke_mmap(struct i915_vma *vma)
list_del(&vma->obj->userfault_link);
}
+static int
+__i915_request_await_bind(struct i915_request *rq, struct i915_vma *vma)
+{
+ return __i915_request_await_exclusive(rq, &vma->active);
+}
+
int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
{
int err;
@@ -1174,8 +1200,7 @@ int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
GEM_BUG_ON(!i915_vma_is_pinned(vma));
/* Wait for the vma to be bound before we start! */
- err = i915_request_await_active(rq, &vma->active,
- I915_ACTIVE_AWAIT_EXCL);
+ err = __i915_request_await_bind(rq, vma);
if (err)
return err;
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index d0d01f909548..5b3a3c653454 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -237,8 +237,17 @@ static inline void i915_vma_unlock(struct i915_vma *vma)
}
int __must_check
-i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags);
-int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags);
+i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
+ u64 size, u64 alignment, u64 flags);
+
+static inline int __must_check
+i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
+{
+ return i915_vma_pin_ww(vma, NULL, size, alignment, flags);
+}
+
+int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
+ u32 align, unsigned int flags);
static inline int i915_vma_pin_count(const struct i915_vma *vma)
{
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index 88d400b9df88..23a6132c5f4e 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -199,11 +199,52 @@ out:
return err;
}
+static int igt_gem_ww_ctx(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct drm_i915_gem_object *obj, *obj2;
+ struct i915_gem_ww_ctx ww;
+ int err = 0;
+
+ obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ obj2 = i915_gem_object_create_internal(i915, PAGE_SIZE);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ goto put1;
+ }
+
+ i915_gem_ww_ctx_init(&ww, true);
+retry:
+ /* Lock the objects, twice for good measure (-EALREADY handling) */
+ err = i915_gem_object_lock(obj, &ww);
+ if (!err)
+ err = i915_gem_object_lock_interruptible(obj, &ww);
+ if (!err)
+ err = i915_gem_object_lock_interruptible(obj2, &ww);
+ if (!err)
+ err = i915_gem_object_lock(obj2, &ww);
+
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
+ i915_gem_object_put(obj2);
+put1:
+ i915_gem_object_put(obj);
+ return err;
+}
+
int i915_gem_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(igt_gem_suspend),
SUBTEST(igt_gem_hibernate),
+ SUBTEST(igt_gem_ww_ctx),
};
if (intel_gt_is_wedged(&i915->gt))
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 0016ffc7d914..af8205a2bd8f 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -172,35 +172,45 @@ static int igt_ppgtt_alloc(void *arg)
/* Check we can allocate the entire range */
for (size = 4096; size <= limit; size <<= 2) {
- err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, size);
+ struct i915_vm_pt_stash stash = {};
+
+ err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, size);
+ if (err)
+ goto err_ppgtt_cleanup;
+
+ err = i915_vm_pin_pt_stash(&ppgtt->vm, &stash);
if (err) {
- if (err == -ENOMEM) {
- pr_info("[1] Ran out of memory for va_range [0 + %llx] [bit %d]\n",
- size, ilog2(size));
- err = 0; /* virtual space too large! */
- }
+ i915_vm_free_pt_stash(&ppgtt->vm, &stash);
goto err_ppgtt_cleanup;
}
+ ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, size);
cond_resched();
ppgtt->vm.clear_range(&ppgtt->vm, 0, size);
+
+ i915_vm_free_pt_stash(&ppgtt->vm, &stash);
}
/* Check we can incrementally allocate the entire range */
for (last = 0, size = 4096; size <= limit; last = size, size <<= 2) {
- err = ppgtt->vm.allocate_va_range(&ppgtt->vm,
- last, size - last);
+ struct i915_vm_pt_stash stash = {};
+
+ err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, size - last);
+ if (err)
+ goto err_ppgtt_cleanup;
+
+ err = i915_vm_pin_pt_stash(&ppgtt->vm, &stash);
if (err) {
- if (err == -ENOMEM) {
- pr_info("[2] Ran out of memory for va_range [%llx + %llx] [bit %d]\n",
- last, size - last, ilog2(size));
- err = 0; /* virtual space too large! */
- }
+ i915_vm_free_pt_stash(&ppgtt->vm, &stash);
goto err_ppgtt_cleanup;
}
+ ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash,
+ last, size - last);
cond_resched();
+
+ i915_vm_free_pt_stash(&ppgtt->vm, &stash);
}
err_ppgtt_cleanup:
@@ -284,9 +294,23 @@ static int lowlevel_hole(struct i915_address_space *vm,
break;
}
- if (vm->allocate_va_range &&
- vm->allocate_va_range(vm, addr, BIT_ULL(size)))
- break;
+ if (vm->allocate_va_range) {
+ struct i915_vm_pt_stash stash = {};
+
+ if (i915_vm_alloc_pt_stash(vm, &stash,
+ BIT_ULL(size)))
+ break;
+
+ if (i915_vm_pin_pt_stash(vm, &stash)) {
+ i915_vm_free_pt_stash(vm, &stash);
+ break;
+ }
+
+ vm->allocate_va_range(vm, &stash,
+ addr, BIT_ULL(size));
+
+ i915_vm_free_pt_stash(vm, &stash);
+ }
mock_vma->pages = obj->mm.pages;
mock_vma->node.size = BIT_ULL(size);
@@ -1881,6 +1905,7 @@ static int igt_cs_tlb(void *arg)
continue;
while (!__igt_timeout(end_time, NULL)) {
+ struct i915_vm_pt_stash stash = {};
struct i915_request *rq;
u64 offset;
@@ -1888,10 +1913,6 @@ static int igt_cs_tlb(void *arg)
0, vm->total - PAGE_SIZE,
chunk_size, PAGE_SIZE);
- err = vm->allocate_va_range(vm, offset, chunk_size);
- if (err)
- goto end;
-
memset32(result, STACK_MAGIC, PAGE_SIZE / sizeof(u32));
vma = i915_vma_instance(bbe, vm, NULL);
@@ -1904,6 +1925,20 @@ static int igt_cs_tlb(void *arg)
if (err)
goto end;
+ err = i915_vm_alloc_pt_stash(vm, &stash, chunk_size);
+ if (err)
+ goto end;
+
+ err = i915_vm_pin_pt_stash(vm, &stash);
+ if (err) {
+ i915_vm_free_pt_stash(vm, &stash);
+ goto end;
+ }
+
+ vm->allocate_va_range(vm, &stash, offset, chunk_size);
+
+ i915_vm_free_pt_stash(vm, &stash);
+
/* Prime the TLB with the dummy pages */
for (i = 0; i < count; i++) {
vma->node.start = offset + i * PAGE_SIZE;
diff --git a/drivers/gpu/drm/i915/selftests/i915_perf.c b/drivers/gpu/drm/i915/selftests/i915_perf.c
index c2d001d9c0ec..debbac660519 100644
--- a/drivers/gpu/drm/i915/selftests/i915_perf.c
+++ b/drivers/gpu/drm/i915/selftests/i915_perf.c
@@ -307,7 +307,7 @@ static int live_noa_gpr(void *arg)
}
/* Poison the ce->vm so we detect writes not to the GGTT gt->scratch */
- scratch = kmap(ce->vm->scratch[0].base.page);
+ scratch = kmap(__px_page(ce->vm->scratch[0]));
memset(scratch, POISON_FREE, PAGE_SIZE);
rq = intel_context_create_request(ce);
@@ -405,7 +405,7 @@ static int live_noa_gpr(void *arg)
out_rq:
i915_request_put(rq);
out_ce:
- kunmap(ce->vm->scratch[0].base.page);
+ kunmap(__px_page(ce->vm->scratch[0]));
intel_context_put(ce);
out:
stream_destroy(stream);
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index c1dcd4b91bda..3092ca763789 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -862,6 +862,8 @@ static int live_all_engines(void *arg)
goto out_free;
}
+ i915_vma_lock(batch);
+
idx = 0;
for_each_uabi_engine(engine, i915) {
request[idx] = intel_engine_create_kernel_request(engine);
@@ -872,11 +874,9 @@ static int live_all_engines(void *arg)
goto out_request;
}
- i915_vma_lock(batch);
err = i915_request_await_object(request[idx], batch->obj, 0);
if (err == 0)
err = i915_vma_move_to_active(batch, request[idx], 0);
- i915_vma_unlock(batch);
GEM_BUG_ON(err);
err = engine->emit_bb_start(request[idx],
@@ -891,6 +891,8 @@ static int live_all_engines(void *arg)
idx++;
}
+ i915_vma_unlock(batch);
+
idx = 0;
for_each_uabi_engine(engine, i915) {
if (i915_request_completed(request[idx])) {
@@ -981,12 +983,13 @@ static int live_sequential_engines(void *arg)
goto out_free;
}
+ i915_vma_lock(batch);
request[idx] = intel_engine_create_kernel_request(engine);
if (IS_ERR(request[idx])) {
err = PTR_ERR(request[idx]);
pr_err("%s: Request allocation failed for %s with err=%d\n",
__func__, engine->name, err);
- goto out_request;
+ goto out_unlock;
}
if (prev) {
@@ -996,16 +999,14 @@ static int live_sequential_engines(void *arg)
i915_request_add(request[idx]);
pr_err("%s: Request await failed for %s with err=%d\n",
__func__, engine->name, err);
- goto out_request;
+ goto out_unlock;
}
}
- i915_vma_lock(batch);
err = i915_request_await_object(request[idx],
batch->obj, false);
if (err == 0)
err = i915_vma_move_to_active(batch, request[idx], 0);
- i915_vma_unlock(batch);
GEM_BUG_ON(err);
err = engine->emit_bb_start(request[idx],
@@ -1020,6 +1021,11 @@ static int live_sequential_engines(void *arg)
prev = request[idx];
idx++;
+
+out_unlock:
+ i915_vma_unlock(batch);
+ if (err)
+ goto out_request;
}
idx = 0;
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index af89c7fc8f59..88c5e9acb84c 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -892,7 +892,7 @@ static int igt_vma_remapped_gtt(void *arg)
unsigned int x, y;
int err;
- i915_gem_object_lock(obj);
+ i915_gem_object_lock(obj, NULL);
err = i915_gem_object_set_to_gtt_domain(obj, true);
i915_gem_object_unlock(obj);
if (err)
diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
index 6e80d99048e4..93a38a323584 100644
--- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
@@ -509,7 +509,7 @@ static int igt_lmem_write_cpu(void *arg)
if (err)
goto out_unpin;
- i915_gem_object_lock(obj);
+ i915_gem_object_lock(obj, NULL);
err = i915_gem_object_set_to_wc_domain(obj, true);
i915_gem_object_unlock(obj);
if (err)
@@ -522,9 +522,9 @@ static int igt_lmem_write_cpu(void *arg)
goto out_unpin;
}
- /* We want to throw in a random width/align */
- bytes[0] = igt_random_offset(&prng, 0, PAGE_SIZE, sizeof(u32),
- sizeof(u32));
+ /* A random multiple of u32, picked between [64, PAGE_SIZE - 64] */
+ bytes[0] = igt_random_offset(&prng, 64, PAGE_SIZE - 64, 0, sizeof(u32));
+ GEM_BUG_ON(!IS_ALIGNED(bytes[0], sizeof(u32)));
i = 0;
do {
diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c
index b173086411ef..7270fc8ca801 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c
@@ -38,14 +38,14 @@ static void mock_insert_entries(struct i915_address_space *vm,
{
}
-static int mock_bind_ppgtt(struct i915_address_space *vm,
- struct i915_vma *vma,
- enum i915_cache_level cache_level,
- u32 flags)
+static void mock_bind_ppgtt(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags)
{
GEM_BUG_ON(flags & I915_VMA_GLOBAL_BIND);
set_bit(I915_VMA_LOCAL_BIND_BIT, __i915_vma_flags(vma));
- return 0;
}
static void mock_unbind_ppgtt(struct i915_address_space *vm,
@@ -74,9 +74,12 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name)
ppgtt->vm.i915 = i915;
ppgtt->vm.total = round_down(U64_MAX, PAGE_SIZE);
ppgtt->vm.file = ERR_PTR(-ENODEV);
+ ppgtt->vm.dma = &i915->drm.pdev->dev;
i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
+ ppgtt->vm.alloc_pt_dma = alloc_pt_dma;
+
ppgtt->vm.clear_range = mock_clear_range;
ppgtt->vm.insert_page = mock_insert_page;
ppgtt->vm.insert_entries = mock_insert_entries;
@@ -90,13 +93,12 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name)
return ppgtt;
}
-static int mock_bind_ggtt(struct i915_address_space *vm,
- struct i915_vma *vma,
- enum i915_cache_level cache_level,
- u32 flags)
+static void mock_bind_ggtt(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags)
{
- atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags);
- return 0;
}
static void mock_unbind_ggtt(struct i915_address_space *vm,
@@ -116,6 +118,8 @@ void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt)
ggtt->mappable_end = resource_size(&ggtt->gmadr);
ggtt->vm.total = 4096 * PAGE_SIZE;
+ ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+
ggtt->vm.clear_range = mock_clear_range;
ggtt->vm.insert_page = mock_insert_page;
ggtt->vm.insert_entries = mock_insert_entries;