summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gem
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gem')
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_client_blt.c89
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.c130
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c15
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_domain.c80
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c1625
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_mman.c51
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.h40
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_blt.c152
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_blt.h3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_types.h10
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pages.c30
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pm.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_throttle.c67
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_tiling.c2
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/huge_pages.c11
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c2
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c50
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c146
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c2
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c75
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c45
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c2
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c4
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c7
24 files changed, 1692 insertions, 948 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
index 278664f831e7..272cf3ea68d5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
@@ -32,12 +32,13 @@ static void vma_clear_pages(struct i915_vma *vma)
vma->pages = NULL;
}
-static int vma_bind(struct i915_address_space *vm,
- struct i915_vma *vma,
- enum i915_cache_level cache_level,
- u32 flags)
+static void vma_bind(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags)
{
- return vm->vma_ops.bind_vma(vm, vma, cache_level, flags);
+ vm->vma_ops.bind_vma(vm, stash, vma, cache_level, flags);
}
static void vma_unbind(struct i915_address_space *vm, struct i915_vma *vma)
@@ -157,6 +158,7 @@ static void clear_pages_worker(struct work_struct *work)
struct clear_pages_work *w = container_of(work, typeof(*w), work);
struct drm_i915_gem_object *obj = w->sleeve->vma->obj;
struct i915_vma *vma = w->sleeve->vma;
+ struct i915_gem_ww_ctx ww;
struct i915_request *rq;
struct i915_vma *batch;
int err = w->dma.error;
@@ -172,17 +174,20 @@ static void clear_pages_worker(struct work_struct *work)
obj->read_domains = I915_GEM_GPU_DOMAINS;
obj->write_domain = 0;
- err = i915_vma_pin(vma, 0, 0, PIN_USER);
- if (unlikely(err))
+ i915_gem_ww_ctx_init(&ww, false);
+ intel_engine_pm_get(w->ce->engine);
+retry:
+ err = intel_context_pin_ww(w->ce, &ww);
+ if (err)
goto out_signal;
- batch = intel_emit_vma_fill_blt(w->ce, vma, w->value);
+ batch = intel_emit_vma_fill_blt(w->ce, vma, &ww, w->value);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
- goto out_unpin;
+ goto out_ctx;
}
- rq = intel_context_create_request(w->ce);
+ rq = i915_request_create(w->ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out_batch;
@@ -224,9 +229,19 @@ out_request:
i915_request_add(rq);
out_batch:
intel_emit_vma_release(w->ce, batch);
-out_unpin:
- i915_vma_unpin(vma);
+out_ctx:
+ intel_context_unpin(w->ce);
out_signal:
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
+
+ i915_vma_unpin(w->sleeve->vma);
+ intel_engine_pm_put(w->ce->engine);
+
if (unlikely(err)) {
dma_fence_set_error(&w->dma, err);
dma_fence_signal(&w->dma);
@@ -234,6 +249,44 @@ out_signal:
}
}
+static int pin_wait_clear_pages_work(struct clear_pages_work *w,
+ struct intel_context *ce)
+{
+ struct i915_vma *vma = w->sleeve->vma;
+ struct i915_gem_ww_ctx ww;
+ int err;
+
+ i915_gem_ww_ctx_init(&ww, false);
+retry:
+ err = i915_gem_object_lock(vma->obj, &ww);
+ if (err)
+ goto out;
+
+ err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
+ if (unlikely(err))
+ goto out;
+
+ err = i915_sw_fence_await_reservation(&w->wait,
+ vma->obj->base.resv, NULL,
+ true, 0, I915_FENCE_GFP);
+ if (err)
+ goto err_unpin_vma;
+
+ dma_resv_add_excl_fence(vma->obj->base.resv, &w->dma);
+
+err_unpin_vma:
+ if (err)
+ i915_vma_unpin(vma);
+out:
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
+ return err;
+}
+
static int __i915_sw_fence_call
clear_pages_work_notify(struct i915_sw_fence *fence,
enum i915_sw_fence_notify state)
@@ -287,17 +340,9 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,
dma_fence_init(&work->dma, &clear_pages_work_ops, &fence_lock, 0, 0);
i915_sw_fence_init(&work->wait, clear_pages_work_notify);
- i915_gem_object_lock(obj);
- err = i915_sw_fence_await_reservation(&work->wait,
- obj->base.resv, NULL, true, 0,
- I915_FENCE_GFP);
- if (err < 0) {
+ err = pin_wait_clear_pages_work(work, ce);
+ if (err < 0)
dma_fence_set_error(&work->dma, err);
- } else {
- dma_resv_add_excl_fence(obj->base.resv, &work->dma);
- err = 0;
- }
- i915_gem_object_unlock(obj);
dma_fence_get(&work->dma);
i915_sw_fence_commit(&work->wait);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index ef755dd5e68f..4fd38101bb56 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -390,24 +390,6 @@ __context_engines_static(const struct i915_gem_context *ctx)
return rcu_dereference_protected(ctx->engines, true);
}
-static bool __reset_engine(struct intel_engine_cs *engine)
-{
- struct intel_gt *gt = engine->gt;
- bool success = false;
-
- if (!intel_has_reset_engine(gt))
- return false;
-
- if (!test_and_set_bit(I915_RESET_ENGINE + engine->id,
- &gt->reset.flags)) {
- success = intel_engine_reset(engine, NULL) == 0;
- clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id,
- &gt->reset.flags);
- }
-
- return success;
-}
-
static void __reset_context(struct i915_gem_context *ctx,
struct intel_engine_cs *engine)
{
@@ -431,12 +413,7 @@ static bool __cancel_engine(struct intel_engine_cs *engine)
* kill the banned context, we fallback to doing a local reset
* instead.
*/
- if (IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT) &&
- !intel_engine_pulse(engine))
- return true;
-
- /* If we are unable to send a pulse, try resetting this engine. */
- return __reset_engine(engine);
+ return intel_engine_pulse(engine) == 0;
}
static bool
@@ -460,8 +437,8 @@ __active_engine(struct i915_request *rq, struct intel_engine_cs **active)
spin_lock(&locked->active.lock);
}
- if (!i915_request_completed(rq)) {
- if (i915_request_is_active(rq) && rq->fence.error != -EIO)
+ if (i915_request_is_active(rq)) {
+ if (!i915_request_completed(rq))
*active = locked;
ret = true;
}
@@ -479,13 +456,26 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
if (!ce->timeline)
return NULL;
+ /*
+ * rq->link is only SLAB_TYPESAFE_BY_RCU, we need to hold a reference
+ * to the request to prevent it being transferred to a new timeline
+ * (and onto a new timeline->requests list).
+ */
rcu_read_lock();
- list_for_each_entry_rcu(rq, &ce->timeline->requests, link) {
- if (i915_request_is_active(rq) && i915_request_completed(rq))
- continue;
+ list_for_each_entry_reverse(rq, &ce->timeline->requests, link) {
+ bool found;
+
+ /* timeline is already completed upto this point? */
+ if (!i915_request_get_rcu(rq))
+ break;
/* Check with the backend if the request is inflight */
- if (__active_engine(rq, &engine))
+ found = true;
+ if (likely(rcu_access_pointer(rq->timeline) == ce->timeline))
+ found = __active_engine(rq, &engine);
+
+ i915_request_put(rq);
+ if (found)
break;
}
rcu_read_unlock();
@@ -493,7 +483,7 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
return engine;
}
-static void kill_engines(struct i915_gem_engines *engines)
+static void kill_engines(struct i915_gem_engines *engines, bool ban)
{
struct i915_gem_engines_iter it;
struct intel_context *ce;
@@ -508,7 +498,7 @@ static void kill_engines(struct i915_gem_engines *engines)
for_each_gem_engine(ce, engines, it) {
struct intel_engine_cs *engine;
- if (intel_context_set_banned(ce))
+ if (ban && intel_context_set_banned(ce))
continue;
/*
@@ -521,7 +511,7 @@ static void kill_engines(struct i915_gem_engines *engines)
engine = active_engine(ce);
/* First attempt to gracefully cancel the context */
- if (engine && !__cancel_engine(engine))
+ if (engine && !__cancel_engine(engine) && ban)
/*
* If we are unable to send a preemptive pulse to bump
* the context from the GPU, we have to resort to a full
@@ -531,8 +521,10 @@ static void kill_engines(struct i915_gem_engines *engines)
}
}
-static void kill_stale_engines(struct i915_gem_context *ctx)
+static void kill_context(struct i915_gem_context *ctx)
{
+ bool ban = (!i915_gem_context_is_persistent(ctx) ||
+ !ctx->i915->params.enable_hangcheck);
struct i915_gem_engines *pos, *next;
spin_lock_irq(&ctx->stale.lock);
@@ -545,7 +537,7 @@ static void kill_stale_engines(struct i915_gem_context *ctx)
spin_unlock_irq(&ctx->stale.lock);
- kill_engines(pos);
+ kill_engines(pos, ban);
spin_lock_irq(&ctx->stale.lock);
GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence));
@@ -557,11 +549,6 @@ static void kill_stale_engines(struct i915_gem_context *ctx)
spin_unlock_irq(&ctx->stale.lock);
}
-static void kill_context(struct i915_gem_context *ctx)
-{
- kill_stale_engines(ctx);
-}
-
static void engines_idle_release(struct i915_gem_context *ctx,
struct i915_gem_engines *engines)
{
@@ -596,7 +583,7 @@ static void engines_idle_release(struct i915_gem_context *ctx,
kill:
if (list_empty(&engines->link)) /* raced, already closed */
- kill_engines(engines);
+ kill_engines(engines, true);
i915_sw_fence_commit(&engines->fence);
}
@@ -654,9 +641,7 @@ static void context_close(struct i915_gem_context *ctx)
* case we opt to forcibly kill off all remaining requests on
* context close.
*/
- if (!i915_gem_context_is_persistent(ctx) ||
- !ctx->i915->params.enable_hangcheck)
- kill_context(ctx);
+ kill_context(ctx);
i915_gem_context_put(ctx);
}
@@ -892,7 +877,7 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) {
struct intel_timeline *timeline;
- timeline = intel_timeline_create(&i915->gt, NULL);
+ timeline = intel_timeline_create(&i915->gt);
if (IS_ERR(timeline)) {
context_close(ctx);
return ERR_CAST(timeline);
@@ -1106,6 +1091,7 @@ I915_SELFTEST_DECLARE(static intel_engine_mask_t context_barrier_inject_fault);
static int context_barrier_task(struct i915_gem_context *ctx,
intel_engine_mask_t engines,
bool (*skip)(struct intel_context *ce, void *data),
+ int (*pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data),
int (*emit)(struct i915_request *rq, void *data),
void (*task)(void *data),
void *data)
@@ -1113,6 +1099,7 @@ static int context_barrier_task(struct i915_gem_context *ctx,
struct context_barrier_task *cb;
struct i915_gem_engines_iter it;
struct i915_gem_engines *e;
+ struct i915_gem_ww_ctx ww;
struct intel_context *ce;
int err = 0;
@@ -1150,10 +1137,21 @@ static int context_barrier_task(struct i915_gem_context *ctx,
if (skip && skip(ce, data))
continue;
- rq = intel_context_create_request(ce);
+ i915_gem_ww_ctx_init(&ww, true);
+retry:
+ err = intel_context_pin_ww(ce, &ww);
+ if (err)
+ goto err;
+
+ if (pin)
+ err = pin(ce, &ww, data);
+ if (err)
+ goto err_unpin;
+
+ rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
- break;
+ goto err_unpin;
}
err = 0;
@@ -1163,6 +1161,16 @@ static int context_barrier_task(struct i915_gem_context *ctx,
err = i915_active_add_request(&cb->base, rq);
i915_request_add(rq);
+err_unpin:
+ intel_context_unpin(ce);
+err:
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
+
if (err)
break;
}
@@ -1218,6 +1226,17 @@ static void set_ppgtt_barrier(void *data)
i915_vm_close(old);
}
+static int pin_ppgtt_update(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data)
+{
+ struct i915_address_space *vm = ce->vm;
+
+ if (!HAS_LOGICAL_RING_CONTEXTS(vm->i915))
+ /* ppGTT is not part of the legacy context image */
+ return gen6_ppgtt_pin(i915_vm_to_ppgtt(vm), ww);
+
+ return 0;
+}
+
static int emit_ppgtt_update(struct i915_request *rq, void *data)
{
struct i915_address_space *vm = rq->context->vm;
@@ -1274,20 +1293,10 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
static bool skip_ppgtt_update(struct intel_context *ce, void *data)
{
- if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))
- return true;
-
if (HAS_LOGICAL_RING_CONTEXTS(ce->engine->i915))
- return false;
-
- if (!atomic_read(&ce->pin_count))
- return true;
-
- /* ppGTT is not part of the legacy context image */
- if (gen6_ppgtt_pin(i915_vm_to_ppgtt(ce->vm)))
- return true;
-
- return false;
+ return !ce->state;
+ else
+ return !atomic_read(&ce->pin_count);
}
static int set_ppgtt(struct drm_i915_file_private *file_priv,
@@ -1338,6 +1347,7 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
*/
err = context_barrier_task(ctx, ALL_ENGINES,
skip_ppgtt_update,
+ pin_ppgtt_update,
emit_ppgtt_update,
set_ppgtt_barrier,
old);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 2679380159fc..8dd295dbe241 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -48,12 +48,9 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme
src = sg_next(src);
}
- if (!dma_map_sg_attrs(attachment->dev,
- st->sgl, st->nents, dir,
- DMA_ATTR_SKIP_CPU_SYNC)) {
- ret = -ENOMEM;
+ ret = dma_map_sgtable(attachment->dev, st, dir, DMA_ATTR_SKIP_CPU_SYNC);
+ if (ret)
goto err_free_sg;
- }
return st;
@@ -73,9 +70,7 @@ static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment,
{
struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf);
- dma_unmap_sg_attrs(attachment->dev,
- sg->sgl, sg->nents, dir,
- DMA_ATTR_SKIP_CPU_SYNC);
+ dma_unmap_sgtable(attachment->dev, sg, dir, DMA_ATTR_SKIP_CPU_SYNC);
sg_free_table(sg);
kfree(sg);
@@ -128,7 +123,7 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire
if (err)
return err;
- err = i915_gem_object_lock_interruptible(obj);
+ err = i915_gem_object_lock_interruptible(obj, NULL);
if (err)
goto out;
@@ -149,7 +144,7 @@ static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direct
if (err)
return err;
- err = i915_gem_object_lock_interruptible(obj);
+ err = i915_gem_object_lock_interruptible(obj, NULL);
if (err)
goto out;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 7f76fc68f498..7c90a63c273d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -32,11 +32,17 @@ void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
if (!i915_gem_object_is_framebuffer(obj))
return;
- i915_gem_object_lock(obj);
+ i915_gem_object_lock(obj, NULL);
__i915_gem_object_flush_for_display(obj);
i915_gem_object_unlock(obj);
}
+void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
+{
+ if (i915_gem_object_is_framebuffer(obj))
+ __i915_gem_object_flush_for_display(obj);
+}
+
/**
* Moves a single object to the WC read, and possibly write domain.
* @obj: object to act on
@@ -197,18 +203,12 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
if (ret)
return ret;
- ret = i915_gem_object_lock_interruptible(obj);
- if (ret)
- return ret;
-
/* Always invalidate stale cachelines */
if (obj->cache_level != cache_level) {
i915_gem_object_set_cache_coherency(obj, cache_level);
obj->cache_dirty = true;
}
- i915_gem_object_unlock(obj);
-
/* The cache-level will be applied when each vma is rebound. */
return i915_gem_object_unbind(obj,
I915_GEM_OBJECT_UNBIND_ACTIVE |
@@ -293,7 +293,12 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
goto out;
}
+ ret = i915_gem_object_lock_interruptible(obj, NULL);
+ if (ret)
+ goto out;
+
ret = i915_gem_object_set_cache_level(obj, level);
+ i915_gem_object_unlock(obj);
out:
i915_gem_object_put(obj);
@@ -313,6 +318,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
unsigned int flags)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ struct i915_gem_ww_ctx ww;
struct i915_vma *vma;
int ret;
@@ -320,6 +326,11 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
return ERR_PTR(-EINVAL);
+ i915_gem_ww_ctx_init(&ww, true);
+retry:
+ ret = i915_gem_object_lock(obj, &ww);
+ if (ret)
+ goto err;
/*
* The display engine is not coherent with the LLC cache on gen6. As
* a result, we make sure that the pinning that is about to occur is
@@ -334,7 +345,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
HAS_WT(i915) ?
I915_CACHE_WT : I915_CACHE_NONE);
if (ret)
- return ERR_PTR(ret);
+ goto err;
/*
* As the user may map the buffer once pinned in the display plane
@@ -347,18 +358,31 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
vma = ERR_PTR(-ENOSPC);
if ((flags & PIN_MAPPABLE) == 0 &&
(!view || view->type == I915_GGTT_VIEW_NORMAL))
- vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
- flags |
- PIN_MAPPABLE |
- PIN_NONBLOCK);
- if (IS_ERR(vma))
- vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
- if (IS_ERR(vma))
- return vma;
+ vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0, alignment,
+ flags | PIN_MAPPABLE |
+ PIN_NONBLOCK);
+ if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
+ vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0,
+ alignment, flags);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto err;
+ }
vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
- i915_gem_object_flush_if_display(obj);
+ i915_gem_object_flush_if_display_locked(obj);
+
+err:
+ if (ret == -EDEADLK) {
+ ret = i915_gem_ww_ctx_backoff(&ww);
+ if (!ret)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
+
+ if (ret)
+ return ERR_PTR(ret);
return vma;
}
@@ -536,7 +560,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
if (err)
goto out;
- err = i915_gem_object_lock_interruptible(obj);
+ err = i915_gem_object_lock_interruptible(obj, NULL);
if (err)
goto out_unpin;
@@ -576,19 +600,17 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
if (!i915_gem_object_has_struct_page(obj))
return -ENODEV;
- ret = i915_gem_object_lock_interruptible(obj);
- if (ret)
- return ret;
+ assert_object_held(obj);
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT);
if (ret)
- goto err_unlock;
+ return ret;
ret = i915_gem_object_pin_pages(obj);
if (ret)
- goto err_unlock;
+ return ret;
if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
!static_cpu_has(X86_FEATURE_CLFLUSH)) {
@@ -616,8 +638,6 @@ out:
err_unpin:
i915_gem_object_unpin_pages(obj);
-err_unlock:
- i915_gem_object_unlock(obj);
return ret;
}
@@ -630,20 +650,18 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
if (!i915_gem_object_has_struct_page(obj))
return -ENODEV;
- ret = i915_gem_object_lock_interruptible(obj);
- if (ret)
- return ret;
+ assert_object_held(obj);
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_ALL,
MAX_SCHEDULE_TIMEOUT);
if (ret)
- goto err_unlock;
+ return ret;
ret = i915_gem_object_pin_pages(obj);
if (ret)
- goto err_unlock;
+ return ret;
if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
!static_cpu_has(X86_FEATURE_CLFLUSH)) {
@@ -680,7 +698,5 @@ out:
err_unpin:
i915_gem_object_unpin_pages(obj);
-err_unlock:
- i915_gem_object_unlock(obj);
return ret;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 446e76e95c38..4b09bcd70cf4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -26,6 +26,7 @@
#include "i915_gem_ioctls.h"
#include "i915_sw_fence_work.h"
#include "i915_trace.h"
+#include "i915_user_extensions.h"
struct eb_vma {
struct i915_vma *vma;
@@ -40,11 +41,6 @@ struct eb_vma {
u32 handle;
};
-struct eb_vma_array {
- struct kref kref;
- struct eb_vma vma[];
-};
-
enum {
FORCE_CPU_RELOC = 1,
FORCE_GTT_RELOC,
@@ -57,9 +53,11 @@ enum {
#define __EXEC_OBJECT_NEEDS_MAP BIT(29)
#define __EXEC_OBJECT_NEEDS_BIAS BIT(28)
#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 28) /* all of the above */
+#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
#define __EXEC_HAS_RELOC BIT(31)
-#define __EXEC_INTERNAL_FLAGS (~0u << 31)
+#define __EXEC_ENGINE_PINNED BIT(30)
+#define __EXEC_INTERNAL_FLAGS (~0u << 30)
#define UPDATE PIN_OFFSET_FIXED
#define BATCH_OFFSET_BIAS (256*1024)
@@ -229,6 +227,13 @@ enum {
* the batchbuffer in trusted mode, otherwise the ioctl is rejected.
*/
+struct eb_fence {
+ struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
+ struct dma_fence *dma_fence;
+ u64 value;
+ struct dma_fence_chain *chain_fence;
+};
+
struct i915_execbuffer {
struct drm_i915_private *i915; /** i915 backpointer */
struct drm_file *file; /** per-file lookup tables and limits */
@@ -253,6 +258,8 @@ struct i915_execbuffer {
/** list of vma that have execobj.relocation_count */
struct list_head relocs;
+ struct i915_gem_ww_ctx ww;
+
/**
* Track the most recently used object for relocations, as we
* frequently have to perform multiple relocations within the same
@@ -268,19 +275,22 @@ struct i915_execbuffer {
bool has_fence : 1;
bool needs_unfenced : 1;
- struct i915_vma *target;
struct i915_request *rq;
- struct i915_vma *rq_vma;
u32 *rq_cmd;
unsigned int rq_size;
+ struct intel_gt_buffer_pool_node *pool;
} reloc_cache;
+ struct intel_gt_buffer_pool_node *reloc_pool; /** relocation pool for -EDEADLK handling */
+ struct intel_context *reloc_context;
+
u64 invalid_flags; /** Set of execobj.flags that are invalid */
u32 context_flags; /** Set of execobj.flags to insert from the ctx */
u32 batch_start_offset; /** Location within object of batch */
u32 batch_len; /** Length of batch within object */
u32 batch_flags; /** Flags composed for emit_bb_start() */
+ struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */
/**
* Indicate either the size of the hastable used to resolve
@@ -289,9 +299,16 @@ struct i915_execbuffer {
*/
int lut_size;
struct hlist_head *buckets; /** ht for relocation handles */
- struct eb_vma_array *array;
+
+ struct eb_fence *fences;
+ unsigned long num_fences;
};
+static int eb_parse(struct i915_execbuffer *eb);
+static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb,
+ bool throttle);
+static void eb_unpin_engine(struct i915_execbuffer *eb);
+
static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
{
return intel_engine_requires_cmd_parser(eb->engine) ||
@@ -299,62 +316,8 @@ static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
eb->args->batch_len);
}
-static struct eb_vma_array *eb_vma_array_create(unsigned int count)
-{
- struct eb_vma_array *arr;
-
- arr = kvmalloc(struct_size(arr, vma, count), GFP_KERNEL | __GFP_NOWARN);
- if (!arr)
- return NULL;
-
- kref_init(&arr->kref);
- arr->vma[0].vma = NULL;
-
- return arr;
-}
-
-static inline void eb_unreserve_vma(struct eb_vma *ev)
-{
- struct i915_vma *vma = ev->vma;
-
- if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE))
- __i915_vma_unpin_fence(vma);
-
- if (ev->flags & __EXEC_OBJECT_HAS_PIN)
- __i915_vma_unpin(vma);
-
- ev->flags &= ~(__EXEC_OBJECT_HAS_PIN |
- __EXEC_OBJECT_HAS_FENCE);
-}
-
-static void eb_vma_array_destroy(struct kref *kref)
-{
- struct eb_vma_array *arr = container_of(kref, typeof(*arr), kref);
- struct eb_vma *ev = arr->vma;
-
- while (ev->vma) {
- eb_unreserve_vma(ev);
- i915_vma_put(ev->vma);
- ev++;
- }
-
- kvfree(arr);
-}
-
-static void eb_vma_array_put(struct eb_vma_array *arr)
-{
- kref_put(&arr->kref, eb_vma_array_destroy);
-}
-
static int eb_create(struct i915_execbuffer *eb)
{
- /* Allocate an extra slot for use by the command parser + sentinel */
- eb->array = eb_vma_array_create(eb->buffer_count + 2);
- if (!eb->array)
- return -ENOMEM;
-
- eb->vma = eb->array->vma;
-
if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) {
unsigned int size = 1 + ilog2(eb->buffer_count);
@@ -388,10 +351,8 @@ static int eb_create(struct i915_execbuffer *eb)
break;
} while (--size);
- if (unlikely(!size)) {
- eb_vma_array_put(eb->array);
+ if (unlikely(!size))
return -ENOMEM;
- }
eb->lut_size = size;
} else {
@@ -475,16 +436,17 @@ eb_pin_vma(struct i915_execbuffer *eb,
pin_flags |= PIN_GLOBAL;
/* Attempt to reuse the current location if available */
- if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags))) {
+ /* TODO: Add -EDEADLK handling here */
+ if (unlikely(i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags))) {
if (entry->flags & EXEC_OBJECT_PINNED)
return false;
/* Failing that pick any _free_ space if suitable */
- if (unlikely(i915_vma_pin(vma,
- entry->pad_to_size,
- entry->alignment,
- eb_pin_flags(entry, ev->flags) |
- PIN_USER | PIN_NOEVICT)))
+ if (unlikely(i915_vma_pin_ww(vma, &eb->ww,
+ entry->pad_to_size,
+ entry->alignment,
+ eb_pin_flags(entry, ev->flags) |
+ PIN_USER | PIN_NOEVICT)))
return false;
}
@@ -502,6 +464,19 @@ eb_pin_vma(struct i915_execbuffer *eb,
return !eb_vma_misplaced(entry, vma, ev->flags);
}
+static inline void
+eb_unreserve_vma(struct eb_vma *ev)
+{
+ if (!(ev->flags & __EXEC_OBJECT_HAS_PIN))
+ return;
+
+ if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE))
+ __i915_vma_unpin_fence(ev->vma);
+
+ __i915_vma_unpin(ev->vma);
+ ev->flags &= ~__EXEC_OBJECT_RESERVED;
+}
+
static int
eb_validate_vma(struct i915_execbuffer *eb,
struct drm_i915_gem_exec_object2 *entry,
@@ -593,16 +568,6 @@ eb_add_vma(struct i915_execbuffer *eb,
eb->batch = ev;
}
-
- if (eb_pin_vma(eb, entry, ev)) {
- if (entry->offset != vma->node.start) {
- entry->offset = vma->node.start | UPDATE;
- eb->args->flags |= __EXEC_HAS_RELOC;
- }
- } else {
- eb_unreserve_vma(ev);
- list_add_tail(&ev->bind_link, &eb->unbound);
- }
}
static inline int use_cpu_reloc(const struct reloc_cache *cache,
@@ -622,7 +587,7 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache,
obj->cache_level != I915_CACHE_NONE);
}
-static int eb_reserve_vma(const struct i915_execbuffer *eb,
+static int eb_reserve_vma(struct i915_execbuffer *eb,
struct eb_vma *ev,
u64 pin_flags)
{
@@ -637,7 +602,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
return err;
}
- err = i915_vma_pin(vma,
+ err = i915_vma_pin_ww(vma, &eb->ww,
entry->pad_to_size, entry->alignment,
eb_pin_flags(entry, ev->flags) | pin_flags);
if (err)
@@ -687,10 +652,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
* This avoid unnecessary unbinding of later objects in order to make
* room for the earlier objects *unless* we need to defragment.
*/
-
- if (mutex_lock_interruptible(&eb->i915->drm.struct_mutex))
- return -EINTR;
-
pass = 0;
do {
list_for_each_entry(ev, &eb->unbound, bind_link) {
@@ -698,8 +659,8 @@ static int eb_reserve(struct i915_execbuffer *eb)
if (err)
break;
}
- if (!(err == -ENOSPC || err == -EAGAIN))
- break;
+ if (err != -ENOSPC)
+ return err;
/* Resort *all* the objects into priority order */
INIT_LIST_HEAD(&eb->unbound);
@@ -729,13 +690,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
}
list_splice_tail(&last, &eb->unbound);
- if (err == -EAGAIN) {
- mutex_unlock(&eb->i915->drm.struct_mutex);
- flush_workqueue(eb->i915->mm.userptr_wq);
- mutex_lock(&eb->i915->drm.struct_mutex);
- continue;
- }
-
switch (pass++) {
case 0:
break;
@@ -746,20 +700,15 @@ static int eb_reserve(struct i915_execbuffer *eb)
err = i915_gem_evict_vm(eb->context->vm);
mutex_unlock(&eb->context->vm->mutex);
if (err)
- goto unlock;
+ return err;
break;
default:
- err = -ENOSPC;
- goto unlock;
+ return -ENOSPC;
}
pin_flags = PIN_USER;
} while (1);
-
-unlock:
- mutex_unlock(&eb->i915->drm.struct_mutex);
- return err;
}
static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
@@ -882,12 +831,12 @@ static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle)
static int eb_lookup_vmas(struct i915_execbuffer *eb)
{
+ struct drm_i915_private *i915 = eb->i915;
unsigned int batch = eb_batch_index(eb);
unsigned int i;
int err = 0;
INIT_LIST_HEAD(&eb->relocs);
- INIT_LIST_HEAD(&eb->unbound);
for (i = 0; i < eb->buffer_count; i++) {
struct i915_vma *vma;
@@ -895,22 +844,83 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
vma = eb_lookup_vma(eb, eb->exec[i].handle);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
- break;
+ goto err;
}
err = eb_validate_vma(eb, &eb->exec[i], vma);
if (unlikely(err)) {
i915_vma_put(vma);
- break;
+ goto err;
}
eb_add_vma(eb, i, batch, vma);
}
+ if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)) {
+ drm_dbg(&i915->drm,
+ "Attempting to use self-modifying batch buffer\n");
+ return -EINVAL;
+ }
+
+ if (range_overflows_t(u64,
+ eb->batch_start_offset, eb->batch_len,
+ eb->batch->vma->size)) {
+ drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n");
+ return -EINVAL;
+ }
+
+ if (eb->batch_len == 0)
+ eb->batch_len = eb->batch->vma->size - eb->batch_start_offset;
+
+ return 0;
+
+err:
eb->vma[i].vma = NULL;
return err;
}
+static int eb_validate_vmas(struct i915_execbuffer *eb)
+{
+ unsigned int i;
+ int err;
+
+ INIT_LIST_HEAD(&eb->unbound);
+
+ for (i = 0; i < eb->buffer_count; i++) {
+ struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
+ struct eb_vma *ev = &eb->vma[i];
+ struct i915_vma *vma = ev->vma;
+
+ err = i915_gem_object_lock(vma->obj, &eb->ww);
+ if (err)
+ return err;
+
+ if (eb_pin_vma(eb, entry, ev)) {
+ if (entry->offset != vma->node.start) {
+ entry->offset = vma->node.start | UPDATE;
+ eb->args->flags |= __EXEC_HAS_RELOC;
+ }
+ } else {
+ eb_unreserve_vma(ev);
+
+ list_add_tail(&ev->bind_link, &eb->unbound);
+ if (drm_mm_node_allocated(&vma->node)) {
+ err = i915_vma_unbind(vma);
+ if (err)
+ return err;
+ }
+ }
+
+ GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&
+ eb_vma_misplaced(&eb->exec[i], vma, ev->flags));
+ }
+
+ if (!list_empty(&eb->unbound))
+ return eb_reserve(eb);
+
+ return 0;
+}
+
static struct eb_vma *
eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
{
@@ -931,13 +941,31 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
}
}
+static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
+{
+ const unsigned int count = eb->buffer_count;
+ unsigned int i;
+
+ for (i = 0; i < count; i++) {
+ struct eb_vma *ev = &eb->vma[i];
+ struct i915_vma *vma = ev->vma;
+
+ if (!vma)
+ break;
+
+ eb_unreserve_vma(ev);
+
+ if (final)
+ i915_vma_put(vma);
+ }
+
+ eb_unpin_engine(eb);
+}
+
static void eb_destroy(const struct i915_execbuffer *eb)
{
GEM_BUG_ON(eb->reloc_cache.rq);
- if (eb->array)
- eb_vma_array_put(eb->array);
-
if (eb->lut_size > 0)
kfree(eb->buckets);
}
@@ -949,6 +977,14 @@ relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
return gen8_canonical_addr((int)reloc->delta + target->node.start);
}
+static void reloc_cache_clear(struct reloc_cache *cache)
+{
+ cache->rq = NULL;
+ cache->rq_cmd = NULL;
+ cache->pool = NULL;
+ cache->rq_size = 0;
+}
+
static void reloc_cache_init(struct reloc_cache *cache,
struct drm_i915_private *i915)
{
@@ -961,8 +997,7 @@ static void reloc_cache_init(struct reloc_cache *cache,
cache->has_fence = cache->gen < 4;
cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
cache->node.flags = 0;
- cache->rq = NULL;
- cache->target = NULL;
+ reloc_cache_clear(cache);
}
static inline void *unmask_page(unsigned long p)
@@ -984,132 +1019,60 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
return &i915->ggtt;
}
-#define RELOC_TAIL 4
-
-static int reloc_gpu_chain(struct reloc_cache *cache)
+static void reloc_cache_put_pool(struct i915_execbuffer *eb, struct reloc_cache *cache)
{
- struct intel_gt_buffer_pool_node *pool;
- struct i915_request *rq = cache->rq;
- struct i915_vma *batch;
- u32 *cmd;
- int err;
-
- pool = intel_gt_get_buffer_pool(rq->engine->gt, PAGE_SIZE);
- if (IS_ERR(pool))
- return PTR_ERR(pool);
-
- batch = i915_vma_instance(pool->obj, rq->context->vm, NULL);
- if (IS_ERR(batch)) {
- err = PTR_ERR(batch);
- goto out_pool;
- }
-
- err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK);
- if (err)
- goto out_pool;
-
- GEM_BUG_ON(cache->rq_size + RELOC_TAIL > PAGE_SIZE / sizeof(u32));
- cmd = cache->rq_cmd + cache->rq_size;
- *cmd++ = MI_ARB_CHECK;
- if (cache->gen >= 8)
- *cmd++ = MI_BATCH_BUFFER_START_GEN8;
- else if (cache->gen >= 6)
- *cmd++ = MI_BATCH_BUFFER_START;
- else
- *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
- *cmd++ = lower_32_bits(batch->node.start);
- *cmd++ = upper_32_bits(batch->node.start); /* Always 0 for gen<8 */
- i915_gem_object_flush_map(cache->rq_vma->obj);
- i915_gem_object_unpin_map(cache->rq_vma->obj);
- cache->rq_vma = NULL;
-
- err = intel_gt_buffer_pool_mark_active(pool, rq);
- if (err == 0) {
- i915_vma_lock(batch);
- err = i915_request_await_object(rq, batch->obj, false);
- if (err == 0)
- err = i915_vma_move_to_active(batch, rq, 0);
- i915_vma_unlock(batch);
- }
- i915_vma_unpin(batch);
- if (err)
- goto out_pool;
-
- cmd = i915_gem_object_pin_map(batch->obj,
- cache->has_llc ?
- I915_MAP_FORCE_WB :
- I915_MAP_FORCE_WC);
- if (IS_ERR(cmd)) {
- err = PTR_ERR(cmd);
- goto out_pool;
- }
-
- /* Return with batch mapping (cmd) still pinned */
- cache->rq_cmd = cmd;
- cache->rq_size = 0;
- cache->rq_vma = batch;
-
-out_pool:
- intel_gt_buffer_pool_put(pool);
- return err;
-}
+ if (!cache->pool)
+ return;
-static unsigned int reloc_bb_flags(const struct reloc_cache *cache)
-{
- return cache->gen > 5 ? 0 : I915_DISPATCH_SECURE;
+ /*
+ * This is a bit nasty, normally we keep objects locked until the end
+ * of execbuffer, but we already submit this, and have to unlock before
+ * dropping the reference. Fortunately we can only hold 1 pool node at
+ * a time, so this should be harmless.
+ */
+ i915_gem_ww_unlock_single(cache->pool->obj);
+ intel_gt_buffer_pool_put(cache->pool);
+ cache->pool = NULL;
}
-static int reloc_gpu_flush(struct reloc_cache *cache)
+static void reloc_gpu_flush(struct i915_execbuffer *eb, struct reloc_cache *cache)
{
- struct i915_request *rq;
- int err;
+ struct drm_i915_gem_object *obj = cache->rq->batch->obj;
- rq = fetch_and_zero(&cache->rq);
- if (!rq)
- return 0;
+ GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32));
+ cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END;
- if (cache->rq_vma) {
- struct drm_i915_gem_object *obj = cache->rq_vma->obj;
+ __i915_gem_object_flush_map(obj, 0, sizeof(u32) * (cache->rq_size + 1));
+ i915_gem_object_unpin_map(obj);
- GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32));
- cache->rq_cmd[cache->rq_size++] = MI_BATCH_BUFFER_END;
+ intel_gt_chipset_flush(cache->rq->engine->gt);
- __i915_gem_object_flush_map(obj,
- 0, sizeof(u32) * cache->rq_size);
- i915_gem_object_unpin_map(obj);
- }
+ i915_request_add(cache->rq);
+ reloc_cache_put_pool(eb, cache);
+ reloc_cache_clear(cache);
- err = 0;
- if (rq->engine->emit_init_breadcrumb)
- err = rq->engine->emit_init_breadcrumb(rq);
- if (!err)
- err = rq->engine->emit_bb_start(rq,
- rq->batch->node.start,
- PAGE_SIZE,
- reloc_bb_flags(cache));
- if (err)
- i915_request_set_error_once(rq, err);
-
- intel_gt_chipset_flush(rq->engine->gt);
- i915_request_add(rq);
-
- return err;
+ eb->reloc_pool = NULL;
}
-static void reloc_cache_reset(struct reloc_cache *cache)
+static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb)
{
void *vaddr;
+ if (cache->rq)
+ reloc_gpu_flush(eb, cache);
+
if (!cache->vaddr)
return;
vaddr = unmask_page(cache->vaddr);
if (cache->vaddr & KMAP) {
+ struct drm_i915_gem_object *obj =
+ (struct drm_i915_gem_object *)cache->node.mm;
if (cache->vaddr & CLFLUSH_AFTER)
mb();
kunmap_atomic(vaddr);
- i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm);
+ i915_gem_object_finish_access(obj);
} else {
struct i915_ggtt *ggtt = cache_to_ggtt(cache);
@@ -1134,9 +1097,10 @@ static void reloc_cache_reset(struct reloc_cache *cache)
static void *reloc_kmap(struct drm_i915_gem_object *obj,
struct reloc_cache *cache,
- unsigned long page)
+ unsigned long pageno)
{
void *vaddr;
+ struct page *page;
if (cache->vaddr) {
kunmap_atomic(unmask_page(cache->vaddr));
@@ -1157,17 +1121,22 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
mb();
}
- vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page));
+ page = i915_gem_object_get_page(obj, pageno);
+ if (!obj->mm.dirty)
+ set_page_dirty(page);
+
+ vaddr = kmap_atomic(page);
cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
- cache->page = page;
+ cache->page = pageno;
return vaddr;
}
static void *reloc_iomap(struct drm_i915_gem_object *obj,
- struct reloc_cache *cache,
+ struct i915_execbuffer *eb,
unsigned long page)
{
+ struct reloc_cache *cache = &eb->reloc_cache;
struct i915_ggtt *ggtt = cache_to_ggtt(cache);
unsigned long offset;
void *vaddr;
@@ -1185,16 +1154,17 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
if (use_cpu_reloc(cache, obj))
return NULL;
- i915_gem_object_lock(obj);
err = i915_gem_object_set_to_gtt_domain(obj, true);
- i915_gem_object_unlock(obj);
if (err)
return ERR_PTR(err);
- vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
- PIN_MAPPABLE |
- PIN_NONBLOCK /* NOWARN */ |
- PIN_NOEVICT);
+ vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
+ PIN_MAPPABLE |
+ PIN_NONBLOCK /* NOWARN */ |
+ PIN_NOEVICT);
+ if (vma == ERR_PTR(-EDEADLK))
+ return vma;
+
if (IS_ERR(vma)) {
memset(&cache->node, 0, sizeof(cache->node));
mutex_lock(&ggtt->vm.mutex);
@@ -1230,9 +1200,10 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
}
static void *reloc_vaddr(struct drm_i915_gem_object *obj,
- struct reloc_cache *cache,
+ struct i915_execbuffer *eb,
unsigned long page)
{
+ struct reloc_cache *cache = &eb->reloc_cache;
void *vaddr;
if (cache->page == page) {
@@ -1240,7 +1211,7 @@ static void *reloc_vaddr(struct drm_i915_gem_object *obj,
} else {
vaddr = NULL;
if ((cache->vaddr & KMAP) == 0)
- vaddr = reloc_iomap(obj, cache, page);
+ vaddr = reloc_iomap(obj, eb, page);
if (!vaddr)
vaddr = reloc_kmap(obj, cache, page);
}
@@ -1276,7 +1247,7 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
struct drm_i915_gem_object *obj = vma->obj;
int err;
- i915_vma_lock(vma);
+ assert_vma_held(vma);
if (obj->cache_dirty & ~obj->cache_coherent)
i915_gem_clflush_object(obj, 0);
@@ -1286,25 +1257,31 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
if (err == 0)
err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
- i915_vma_unlock(vma);
-
return err;
}
static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
struct intel_engine_cs *engine,
+ struct i915_vma *vma,
unsigned int len)
{
struct reloc_cache *cache = &eb->reloc_cache;
- struct intel_gt_buffer_pool_node *pool;
+ struct intel_gt_buffer_pool_node *pool = eb->reloc_pool;
struct i915_request *rq;
struct i915_vma *batch;
u32 *cmd;
int err;
- pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE);
- if (IS_ERR(pool))
- return PTR_ERR(pool);
+ if (!pool) {
+ pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE);
+ if (IS_ERR(pool))
+ return PTR_ERR(pool);
+ }
+ eb->reloc_pool = NULL;
+
+ err = i915_gem_object_lock(pool->obj, &eb->ww);
+ if (err)
+ goto err_pool;
cmd = i915_gem_object_pin_map(pool->obj,
cache->has_llc ?
@@ -1312,35 +1289,42 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
I915_MAP_FORCE_WC);
if (IS_ERR(cmd)) {
err = PTR_ERR(cmd);
- goto out_pool;
+ goto err_pool;
}
- batch = i915_vma_instance(pool->obj, eb->context->vm, NULL);
+ batch = i915_vma_instance(pool->obj, vma->vm, NULL);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
goto err_unmap;
}
- err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK);
+ err = i915_vma_pin_ww(batch, &eb->ww, 0, 0, PIN_USER | PIN_NONBLOCK);
if (err)
goto err_unmap;
if (engine == eb->context->engine) {
rq = i915_request_create(eb->context);
} else {
- struct intel_context *ce;
+ struct intel_context *ce = eb->reloc_context;
- ce = intel_context_create(engine);
- if (IS_ERR(ce)) {
- err = PTR_ERR(ce);
- goto err_unpin;
+ if (!ce) {
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ goto err_unpin;
+ }
+
+ i915_vm_put(ce->vm);
+ ce->vm = i915_vm_get(eb->context->vm);
+ eb->reloc_context = ce;
}
- i915_vm_put(ce->vm);
- ce->vm = i915_vm_get(eb->context->vm);
+ err = intel_context_pin_ww(ce, &eb->ww);
+ if (err)
+ goto err_unpin;
- rq = intel_context_create_request(ce);
- intel_context_put(ce);
+ rq = i915_request_create(ce);
+ intel_context_unpin(ce);
}
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
@@ -1351,11 +1335,20 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
if (err)
goto err_request;
- i915_vma_lock(batch);
+ err = reloc_move_to_gpu(rq, vma);
+ if (err)
+ goto err_request;
+
+ err = eb->engine->emit_bb_start(rq,
+ batch->node.start, PAGE_SIZE,
+ cache->gen > 5 ? 0 : I915_DISPATCH_SECURE);
+ if (err)
+ goto skip_request;
+
+ assert_vma_held(batch);
err = i915_request_await_object(rq, batch->obj, false);
if (err == 0)
err = i915_vma_move_to_active(batch, rq, 0);
- i915_vma_unlock(batch);
if (err)
goto skip_request;
@@ -1365,10 +1358,10 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
cache->rq = rq;
cache->rq_cmd = cmd;
cache->rq_size = 0;
- cache->rq_vma = batch;
+ cache->pool = pool;
/* Return with batch mapping (cmd) still pinned */
- goto out_pool;
+ return 0;
skip_request:
i915_request_set_error_once(rq, err);
@@ -1378,8 +1371,8 @@ err_unpin:
i915_vma_unpin(batch);
err_unmap:
i915_gem_object_unpin_map(pool->obj);
-out_pool:
- intel_gt_buffer_pool_put(pool);
+err_pool:
+ eb->reloc_pool = pool;
return err;
}
@@ -1394,9 +1387,12 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
{
struct reloc_cache *cache = &eb->reloc_cache;
u32 *cmd;
- int err;
+
+ if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1))
+ reloc_gpu_flush(eb, cache);
if (unlikely(!cache->rq)) {
+ int err;
struct intel_engine_cs *engine = eb->engine;
if (!reloc_can_use_engine(engine)) {
@@ -1405,31 +1401,11 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
return ERR_PTR(-ENODEV);
}
- err = __reloc_gpu_alloc(eb, engine, len);
+ err = __reloc_gpu_alloc(eb, engine, vma, len);
if (unlikely(err))
return ERR_PTR(err);
}
- if (vma != cache->target) {
- err = reloc_move_to_gpu(cache->rq, vma);
- if (unlikely(err)) {
- i915_request_set_error_once(cache->rq, err);
- return ERR_PTR(err);
- }
-
- cache->target = vma;
- }
-
- if (unlikely(cache->rq_size + len >
- PAGE_SIZE / sizeof(u32) - RELOC_TAIL)) {
- err = reloc_gpu_chain(cache);
- if (unlikely(err)) {
- i915_request_set_error_once(cache->rq, err);
- return ERR_PTR(err);
- }
- }
-
- GEM_BUG_ON(cache->rq_size + len >= PAGE_SIZE / sizeof(u32));
cmd = cache->rq_cmd + cache->rq_size;
cache->rq_size += len;
@@ -1461,7 +1437,7 @@ static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset)
return addr + offset_in_page(offset);
}
-static bool __reloc_entry_gpu(struct i915_execbuffer *eb,
+static int __reloc_entry_gpu(struct i915_execbuffer *eb,
struct i915_vma *vma,
u64 offset,
u64 target_addr)
@@ -1479,7 +1455,9 @@ static bool __reloc_entry_gpu(struct i915_execbuffer *eb,
len = 3;
batch = reloc_gpu(eb, vma, len);
- if (IS_ERR(batch))
+ if (batch == ERR_PTR(-EDEADLK))
+ return -EDEADLK;
+ else if (IS_ERR(batch))
return false;
addr = gen8_canonical_addr(vma->node.start + offset);
@@ -1532,7 +1510,7 @@ static bool __reloc_entry_gpu(struct i915_execbuffer *eb,
return true;
}
-static bool reloc_entry_gpu(struct i915_execbuffer *eb,
+static int reloc_entry_gpu(struct i915_execbuffer *eb,
struct i915_vma *vma,
u64 offset,
u64 target_addr)
@@ -1554,14 +1532,17 @@ relocate_entry(struct i915_vma *vma,
{
u64 target_addr = relocation_target(reloc, target);
u64 offset = reloc->offset;
+ int reloc_gpu = reloc_entry_gpu(eb, vma, offset, target_addr);
- if (!reloc_entry_gpu(eb, vma, offset, target_addr)) {
+ if (reloc_gpu < 0)
+ return reloc_gpu;
+
+ if (!reloc_gpu) {
bool wide = eb->reloc_cache.use_64bit_reloc;
void *vaddr;
repeat:
- vaddr = reloc_vaddr(vma->obj,
- &eb->reloc_cache,
+ vaddr = reloc_vaddr(vma->obj, eb,
offset >> PAGE_SHIFT);
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
@@ -1712,7 +1693,9 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
* we would try to acquire the struct mutex again. Obviously
* this is bad and so lockdep complains vehemently.
*/
- copied = __copy_from_user(r, urelocs, count * sizeof(r[0]));
+ pagefault_disable();
+ copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0]));
+ pagefault_enable();
if (unlikely(copied)) {
remain = -EFAULT;
goto out;
@@ -1756,74 +1739,400 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
urelocs += ARRAY_SIZE(stack);
} while (remain);
out:
- reloc_cache_reset(&eb->reloc_cache);
+ reloc_cache_reset(&eb->reloc_cache, eb);
return remain;
}
-static int eb_relocate(struct i915_execbuffer *eb)
+static int
+eb_relocate_vma_slow(struct i915_execbuffer *eb, struct eb_vma *ev)
{
+ const struct drm_i915_gem_exec_object2 *entry = ev->exec;
+ struct drm_i915_gem_relocation_entry *relocs =
+ u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
+ unsigned int i;
int err;
- err = eb_lookup_vmas(eb);
- if (err)
- return err;
+ for (i = 0; i < entry->relocation_count; i++) {
+ u64 offset = eb_relocate_entry(eb, ev, &relocs[i]);
+
+ if ((s64)offset < 0) {
+ err = (int)offset;
+ goto err;
+ }
+ }
+ err = 0;
+err:
+ reloc_cache_reset(&eb->reloc_cache, eb);
+ return err;
+}
+
+static int check_relocations(const struct drm_i915_gem_exec_object2 *entry)
+{
+ const char __user *addr, *end;
+ unsigned long size;
+ char __maybe_unused c;
+
+ size = entry->relocation_count;
+ if (size == 0)
+ return 0;
- if (!list_empty(&eb->unbound)) {
- err = eb_reserve(eb);
+ if (size > N_RELOC(ULONG_MAX))
+ return -EINVAL;
+
+ addr = u64_to_user_ptr(entry->relocs_ptr);
+ size *= sizeof(struct drm_i915_gem_relocation_entry);
+ if (!access_ok(addr, size))
+ return -EFAULT;
+
+ end = addr + size;
+ for (; addr < end; addr += PAGE_SIZE) {
+ int err = __get_user(c, addr);
if (err)
return err;
}
+ return __get_user(c, end - 1);
+}
- /* The objects are in their final locations, apply the relocations. */
- if (eb->args->flags & __EXEC_HAS_RELOC) {
- struct eb_vma *ev;
- int flush;
+static int eb_copy_relocations(const struct i915_execbuffer *eb)
+{
+ struct drm_i915_gem_relocation_entry *relocs;
+ const unsigned int count = eb->buffer_count;
+ unsigned int i;
+ int err;
- list_for_each_entry(ev, &eb->relocs, reloc_link) {
+ for (i = 0; i < count; i++) {
+ const unsigned int nreloc = eb->exec[i].relocation_count;
+ struct drm_i915_gem_relocation_entry __user *urelocs;
+ unsigned long size;
+ unsigned long copied;
+
+ if (nreloc == 0)
+ continue;
+
+ err = check_relocations(&eb->exec[i]);
+ if (err)
+ goto err;
+
+ urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr);
+ size = nreloc * sizeof(*relocs);
+
+ relocs = kvmalloc_array(size, 1, GFP_KERNEL);
+ if (!relocs) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ /* copy_from_user is limited to < 4GiB */
+ copied = 0;
+ do {
+ unsigned int len =
+ min_t(u64, BIT_ULL(31), size - copied);
+
+ if (__copy_from_user((char *)relocs + copied,
+ (char __user *)urelocs + copied,
+ len))
+ goto end;
+
+ copied += len;
+ } while (copied < size);
+
+ /*
+ * As we do not update the known relocation offsets after
+ * relocating (due to the complexities in lock handling),
+ * we need to mark them as invalid now so that we force the
+ * relocation processing next time. Just in case the target
+ * object is evicted and then rebound into its old
+ * presumed_offset before the next execbuffer - if that
+ * happened we would make the mistake of assuming that the
+ * relocations were valid.
+ */
+ if (!user_access_begin(urelocs, size))
+ goto end;
+
+ for (copied = 0; copied < nreloc; copied++)
+ unsafe_put_user(-1,
+ &urelocs[copied].presumed_offset,
+ end_user);
+ user_access_end();
+
+ eb->exec[i].relocs_ptr = (uintptr_t)relocs;
+ }
+
+ return 0;
+
+end_user:
+ user_access_end();
+end:
+ kvfree(relocs);
+ err = -EFAULT;
+err:
+ while (i--) {
+ relocs = u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr);
+ if (eb->exec[i].relocation_count)
+ kvfree(relocs);
+ }
+ return err;
+}
+
+static int eb_prefault_relocations(const struct i915_execbuffer *eb)
+{
+ const unsigned int count = eb->buffer_count;
+ unsigned int i;
+
+ for (i = 0; i < count; i++) {
+ int err;
+
+ err = check_relocations(&eb->exec[i]);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
+ struct i915_request *rq)
+{
+ bool have_copy = false;
+ struct eb_vma *ev;
+ int err = 0;
+
+repeat:
+ if (signal_pending(current)) {
+ err = -ERESTARTSYS;
+ goto out;
+ }
+
+ /* We may process another execbuffer during the unlock... */
+ eb_release_vmas(eb, false);
+ i915_gem_ww_ctx_fini(&eb->ww);
+
+ if (rq) {
+ /* nonblocking is always false */
+ if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE,
+ MAX_SCHEDULE_TIMEOUT) < 0) {
+ i915_request_put(rq);
+ rq = NULL;
+
+ err = -EINTR;
+ goto err_relock;
+ }
+
+ i915_request_put(rq);
+ rq = NULL;
+ }
+
+ /*
+ * We take 3 passes through the slowpatch.
+ *
+ * 1 - we try to just prefault all the user relocation entries and
+ * then attempt to reuse the atomic pagefault disabled fast path again.
+ *
+ * 2 - we copy the user entries to a local buffer here outside of the
+ * local and allow ourselves to wait upon any rendering before
+ * relocations
+ *
+ * 3 - we already have a local copy of the relocation entries, but
+ * were interrupted (EAGAIN) whilst waiting for the objects, try again.
+ */
+ if (!err) {
+ err = eb_prefault_relocations(eb);
+ } else if (!have_copy) {
+ err = eb_copy_relocations(eb);
+ have_copy = err == 0;
+ } else {
+ cond_resched();
+ err = 0;
+ }
+
+ if (!err)
+ flush_workqueue(eb->i915->mm.userptr_wq);
+
+err_relock:
+ i915_gem_ww_ctx_init(&eb->ww, true);
+ if (err)
+ goto out;
+
+ /* reacquire the objects */
+repeat_validate:
+ rq = eb_pin_engine(eb, false);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ rq = NULL;
+ goto err;
+ }
+
+ /* We didn't throttle, should be NULL */
+ GEM_WARN_ON(rq);
+
+ err = eb_validate_vmas(eb);
+ if (err)
+ goto err;
+
+ GEM_BUG_ON(!eb->batch);
+
+ list_for_each_entry(ev, &eb->relocs, reloc_link) {
+ if (!have_copy) {
+ pagefault_disable();
err = eb_relocate_vma(eb, ev);
+ pagefault_enable();
+ if (err)
+ break;
+ } else {
+ err = eb_relocate_vma_slow(eb, ev);
if (err)
break;
}
+ }
+
+ if (err == -EDEADLK)
+ goto err;
- flush = reloc_gpu_flush(&eb->reloc_cache);
+ if (err && !have_copy)
+ goto repeat;
+
+ if (err)
+ goto err;
+
+ /* as last step, parse the command buffer */
+ err = eb_parse(eb);
+ if (err)
+ goto err;
+
+ /*
+ * Leave the user relocations as are, this is the painfully slow path,
+ * and we want to avoid the complication of dropping the lock whilst
+ * having buffers reserved in the aperture and so causing spurious
+ * ENOSPC for random operations.
+ */
+
+err:
+ if (err == -EDEADLK) {
+ eb_release_vmas(eb, false);
+ err = i915_gem_ww_ctx_backoff(&eb->ww);
if (!err)
- err = flush;
+ goto repeat_validate;
+ }
+
+ if (err == -EAGAIN)
+ goto repeat;
+
+out:
+ if (have_copy) {
+ const unsigned int count = eb->buffer_count;
+ unsigned int i;
+
+ for (i = 0; i < count; i++) {
+ const struct drm_i915_gem_exec_object2 *entry =
+ &eb->exec[i];
+ struct drm_i915_gem_relocation_entry *relocs;
+
+ if (!entry->relocation_count)
+ continue;
+
+ relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
+ kvfree(relocs);
+ }
}
+ if (rq)
+ i915_request_put(rq);
+
return err;
}
-static int eb_move_to_gpu(struct i915_execbuffer *eb)
+static int eb_relocate_parse(struct i915_execbuffer *eb)
{
- const unsigned int count = eb->buffer_count;
- struct ww_acquire_ctx acquire;
- unsigned int i;
- int err = 0;
+ int err;
+ struct i915_request *rq = NULL;
+ bool throttle = true;
- ww_acquire_init(&acquire, &reservation_ww_class);
+retry:
+ rq = eb_pin_engine(eb, throttle);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ rq = NULL;
+ if (err != -EDEADLK)
+ return err;
- for (i = 0; i < count; i++) {
- struct eb_vma *ev = &eb->vma[i];
- struct i915_vma *vma = ev->vma;
+ goto err;
+ }
- err = ww_mutex_lock_interruptible(&vma->resv->lock, &acquire);
- if (err == -EDEADLK) {
- GEM_BUG_ON(i == 0);
- do {
- int j = i - 1;
+ if (rq) {
+ bool nonblock = eb->file->filp->f_flags & O_NONBLOCK;
- ww_mutex_unlock(&eb->vma[j].vma->resv->lock);
+ /* Need to drop all locks now for throttling, take slowpath */
+ err = i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, 0);
+ if (err == -ETIME) {
+ if (nonblock) {
+ err = -EWOULDBLOCK;
+ i915_request_put(rq);
+ goto err;
+ }
+ goto slow;
+ }
+ i915_request_put(rq);
+ rq = NULL;
+ }
- swap(eb->vma[i], eb->vma[j]);
- } while (--i);
+ /* only throttle once, even if we didn't need to throttle */
+ throttle = false;
- err = ww_mutex_lock_slow_interruptible(&vma->resv->lock,
- &acquire);
+ err = eb_validate_vmas(eb);
+ if (err == -EAGAIN)
+ goto slow;
+ else if (err)
+ goto err;
+
+ /* The objects are in their final locations, apply the relocations. */
+ if (eb->args->flags & __EXEC_HAS_RELOC) {
+ struct eb_vma *ev;
+
+ list_for_each_entry(ev, &eb->relocs, reloc_link) {
+ err = eb_relocate_vma(eb, ev);
+ if (err)
+ break;
}
- if (err)
- break;
+
+ if (err == -EDEADLK)
+ goto err;
+ else if (err)
+ goto slow;
+ }
+
+ if (!err)
+ err = eb_parse(eb);
+
+err:
+ if (err == -EDEADLK) {
+ eb_release_vmas(eb, false);
+ err = i915_gem_ww_ctx_backoff(&eb->ww);
+ if (!err)
+ goto retry;
}
- ww_acquire_done(&acquire);
+
+ return err;
+
+slow:
+ err = eb_relocate_parse_slow(eb, rq);
+ if (err)
+ /*
+ * If the user expects the execobject.offset and
+ * reloc.presumed_offset to be an exact match,
+ * as for using NO_RELOC, then we cannot update
+ * the execobject.offset until we have completed
+ * relocation.
+ */
+ eb->args->flags &= ~__EXEC_HAS_RELOC;
+
+ return err;
+}
+
+static int eb_move_to_gpu(struct i915_execbuffer *eb)
+{
+ const unsigned int count = eb->buffer_count;
+ unsigned int i = count;
+ int err = 0;
while (i--) {
struct eb_vma *ev = &eb->vma[i];
@@ -1868,13 +2177,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
if (err == 0)
err = i915_vma_move_to_active(vma, eb->request, flags);
-
- i915_vma_unlock(vma);
- eb_unreserve_vma(ev);
}
- ww_acquire_fini(&acquire);
-
- eb_vma_array_put(fetch_and_zero(&eb->array));
if (unlikely(err))
goto err_skip;
@@ -1894,7 +2197,8 @@ static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
return -EINVAL;
/* Kernel clipping was a DRI1 misfeature */
- if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) {
+ if (!(exec->flags & (I915_EXEC_FENCE_ARRAY |
+ I915_EXEC_USE_EXTENSIONS))) {
if (exec->num_cliprects || exec->cliprects_ptr)
return -EINVAL;
}
@@ -1938,7 +2242,8 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
}
static struct i915_vma *
-shadow_batch_pin(struct drm_i915_gem_object *obj,
+shadow_batch_pin(struct i915_execbuffer *eb,
+ struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
unsigned int flags)
{
@@ -1949,7 +2254,7 @@ shadow_batch_pin(struct drm_i915_gem_object *obj,
if (IS_ERR(vma))
return vma;
- err = i915_vma_pin(vma, 0, 0, flags);
+ err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags);
if (err)
return ERR_PTR(err);
@@ -1962,8 +2267,8 @@ struct eb_parse_work {
struct i915_vma *batch;
struct i915_vma *shadow;
struct i915_vma *trampoline;
- unsigned int batch_offset;
- unsigned int batch_length;
+ unsigned long batch_offset;
+ unsigned long batch_length;
};
static int __eb_parse(struct dma_fence_work *work)
@@ -2001,7 +2306,7 @@ __parser_mark_active(struct i915_vma *vma,
{
struct intel_gt_buffer_pool_node *node = vma->private;
- return i915_active_ref(&node->active, tl, fence);
+ return i915_active_ref(&node->active, tl->fence_context, fence);
}
static int
@@ -2033,6 +2338,9 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
struct eb_parse_work *pw;
int err;
+ GEM_BUG_ON(overflows_type(eb->batch_start_offset, pw->batch_offset));
+ GEM_BUG_ON(overflows_type(eb->batch_len, pw->batch_length));
+
pw = kzalloc(sizeof(*pw), GFP_KERNEL);
if (!pw)
return -ENOMEM;
@@ -2065,36 +2373,26 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
if (err)
goto err_commit;
- err = dma_resv_lock_interruptible(pw->batch->resv, NULL);
- if (err)
- goto err_commit;
-
err = dma_resv_reserve_shared(pw->batch->resv, 1);
if (err)
- goto err_commit_unlock;
+ goto err_commit;
/* Wait for all writes (and relocs) into the batch to complete */
err = i915_sw_fence_await_reservation(&pw->base.chain,
pw->batch->resv, NULL, false,
0, I915_FENCE_GFP);
if (err < 0)
- goto err_commit_unlock;
+ goto err_commit;
/* Keep the batch alive and unwritten as we parse */
dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma);
- dma_resv_unlock(pw->batch->resv);
-
/* Force execution to wait for completion of the parser */
- dma_resv_lock(shadow->resv, NULL);
dma_resv_add_excl_fence(shadow->resv, &pw->base.dma);
- dma_resv_unlock(shadow->resv);
dma_fence_work_commit_imm(&pw->base);
return 0;
-err_commit_unlock:
- dma_resv_unlock(pw->batch->resv);
err_commit:
i915_sw_fence_set_error_once(&pw->base.chain, err);
dma_fence_work_commit_imm(&pw->base);
@@ -2109,16 +2407,33 @@ err_free:
return err;
}
+static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma)
+{
+ /*
+ * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
+ * batch" bit. Hence we need to pin secure batches into the global gtt.
+ * hsw should have this fixed, but bdw mucks it up again. */
+ if (eb->batch_flags & I915_DISPATCH_SECURE)
+ return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, 0);
+
+ return NULL;
+}
+
static int eb_parse(struct i915_execbuffer *eb)
{
struct drm_i915_private *i915 = eb->i915;
- struct intel_gt_buffer_pool_node *pool;
- struct i915_vma *shadow, *trampoline;
+ struct intel_gt_buffer_pool_node *pool = eb->batch_pool;
+ struct i915_vma *shadow, *trampoline, *batch;
unsigned int len;
int err;
- if (!eb_use_cmdparser(eb))
- return 0;
+ if (!eb_use_cmdparser(eb)) {
+ batch = eb_dispatch_secure(eb, eb->batch->vma);
+ if (IS_ERR(batch))
+ return PTR_ERR(batch);
+
+ goto secure_batch;
+ }
len = eb->batch_len;
if (!CMDPARSER_USES_GGTT(eb->i915)) {
@@ -2135,11 +2450,18 @@ static int eb_parse(struct i915_execbuffer *eb)
len += I915_CMD_PARSER_TRAMPOLINE_SIZE;
}
- pool = intel_gt_get_buffer_pool(eb->engine->gt, len);
- if (IS_ERR(pool))
- return PTR_ERR(pool);
+ if (!pool) {
+ pool = intel_gt_get_buffer_pool(eb->engine->gt, len);
+ if (IS_ERR(pool))
+ return PTR_ERR(pool);
+ eb->batch_pool = pool;
+ }
+
+ err = i915_gem_object_lock(pool->obj, &eb->ww);
+ if (err)
+ goto err;
- shadow = shadow_batch_pin(pool->obj, eb->context->vm, PIN_USER);
+ shadow = shadow_batch_pin(eb, pool->obj, eb->context->vm, PIN_USER);
if (IS_ERR(shadow)) {
err = PTR_ERR(shadow);
goto err;
@@ -2151,7 +2473,7 @@ static int eb_parse(struct i915_execbuffer *eb)
if (CMDPARSER_USES_GGTT(eb->i915)) {
trampoline = shadow;
- shadow = shadow_batch_pin(pool->obj,
+ shadow = shadow_batch_pin(eb, pool->obj,
&eb->engine->gt->ggtt->vm,
PIN_GLOBAL);
if (IS_ERR(shadow)) {
@@ -2164,42 +2486,43 @@ static int eb_parse(struct i915_execbuffer *eb)
eb->batch_flags |= I915_DISPATCH_SECURE;
}
+ batch = eb_dispatch_secure(eb, shadow);
+ if (IS_ERR(batch)) {
+ err = PTR_ERR(batch);
+ goto err_trampoline;
+ }
+
err = eb_parse_pipeline(eb, shadow, trampoline);
if (err)
- goto err_trampoline;
+ goto err_unpin_batch;
- eb->vma[eb->buffer_count].vma = i915_vma_get(shadow);
- eb->vma[eb->buffer_count].flags = __EXEC_OBJECT_HAS_PIN;
eb->batch = &eb->vma[eb->buffer_count++];
- eb->vma[eb->buffer_count].vma = NULL;
+ eb->batch->vma = i915_vma_get(shadow);
+ eb->batch->flags = __EXEC_OBJECT_HAS_PIN;
eb->trampoline = trampoline;
eb->batch_start_offset = 0;
+secure_batch:
+ if (batch) {
+ eb->batch = &eb->vma[eb->buffer_count++];
+ eb->batch->flags = __EXEC_OBJECT_HAS_PIN;
+ eb->batch->vma = i915_vma_get(batch);
+ }
return 0;
+err_unpin_batch:
+ if (batch)
+ i915_vma_unpin(batch);
err_trampoline:
if (trampoline)
i915_vma_unpin(trampoline);
err_shadow:
i915_vma_unpin(shadow);
err:
- intel_gt_buffer_pool_put(pool);
return err;
}
-static void
-add_to_client(struct i915_request *rq, struct drm_file *file)
-{
- struct drm_i915_file_private *file_priv = file->driver_priv;
-
- rq->file_priv = file_priv;
-
- spin_lock(&file_priv->mm.lock);
- list_add_tail(&rq->client_link, &file_priv->mm.request_list);
- spin_unlock(&file_priv->mm.lock);
-}
-
static int eb_submit(struct i915_execbuffer *eb, struct i915_vma *batch)
{
int err;
@@ -2281,7 +2604,7 @@ static const enum intel_engine_id user_ring_map[] = {
[I915_EXEC_VEBOX] = VECS0
};
-static struct i915_request *eb_throttle(struct intel_context *ce)
+static struct i915_request *eb_throttle(struct i915_execbuffer *eb, struct intel_context *ce)
{
struct intel_ring *ring = ce->ring;
struct intel_timeline *tl = ce->timeline;
@@ -2315,31 +2638,26 @@ static struct i915_request *eb_throttle(struct intel_context *ce)
return i915_request_get(rq);
}
-static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
+static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, bool throttle)
{
+ struct intel_context *ce = eb->context;
struct intel_timeline *tl;
- struct i915_request *rq;
+ struct i915_request *rq = NULL;
int err;
- /*
- * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
- * EIO if the GPU is already wedged.
- */
- err = intel_gt_terminally_wedged(ce->engine->gt);
- if (err)
- return err;
+ GEM_BUG_ON(eb->args->flags & __EXEC_ENGINE_PINNED);
if (unlikely(intel_context_is_banned(ce)))
- return -EIO;
+ return ERR_PTR(-EIO);
/*
* Pinning the contexts may generate requests in order to acquire
* GGTT space, so do this first before we reserve a seqno for
* ourselves.
*/
- err = intel_context_pin(ce);
+ err = intel_context_pin_ww(ce, &eb->ww);
if (err)
- return err;
+ return ERR_PTR(err);
/*
* Take a local wakeref for preparing to dispatch the execbuf as
@@ -2351,45 +2669,17 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
*/
tl = intel_context_timeline_lock(ce);
if (IS_ERR(tl)) {
- err = PTR_ERR(tl);
- goto err_unpin;
+ intel_context_unpin(ce);
+ return ERR_CAST(tl);
}
intel_context_enter(ce);
- rq = eb_throttle(ce);
-
+ if (throttle)
+ rq = eb_throttle(eb, ce);
intel_context_timeline_unlock(tl);
- if (rq) {
- bool nonblock = eb->file->filp->f_flags & O_NONBLOCK;
- long timeout;
-
- timeout = MAX_SCHEDULE_TIMEOUT;
- if (nonblock)
- timeout = 0;
-
- timeout = i915_request_wait(rq,
- I915_WAIT_INTERRUPTIBLE,
- timeout);
- i915_request_put(rq);
-
- if (timeout < 0) {
- err = nonblock ? -EWOULDBLOCK : timeout;
- goto err_exit;
- }
- }
-
- eb->engine = ce->engine;
- eb->context = ce;
- return 0;
-
-err_exit:
- mutex_lock(&tl->mutex);
- intel_context_exit(ce);
- intel_context_timeline_unlock(tl);
-err_unpin:
- intel_context_unpin(ce);
- return err;
+ eb->args->flags |= __EXEC_ENGINE_PINNED;
+ return rq;
}
static void eb_unpin_engine(struct i915_execbuffer *eb)
@@ -2397,6 +2687,11 @@ static void eb_unpin_engine(struct i915_execbuffer *eb)
struct intel_context *ce = eb->context;
struct intel_timeline *tl = ce->timeline;
+ if (!(eb->args->flags & __EXEC_ENGINE_PINNED))
+ return;
+
+ eb->args->flags &= ~__EXEC_ENGINE_PINNED;
+
mutex_lock(&tl->mutex);
intel_context_exit(ce);
mutex_unlock(&tl->mutex);
@@ -2405,11 +2700,10 @@ static void eb_unpin_engine(struct i915_execbuffer *eb)
}
static unsigned int
-eb_select_legacy_ring(struct i915_execbuffer *eb,
- struct drm_file *file,
- struct drm_i915_gem_execbuffer2 *args)
+eb_select_legacy_ring(struct i915_execbuffer *eb)
{
struct drm_i915_private *i915 = eb->i915;
+ struct drm_i915_gem_execbuffer2 *args = eb->args;
unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
if (user_ring_id != I915_EXEC_BSD &&
@@ -2424,7 +2718,7 @@ eb_select_legacy_ring(struct i915_execbuffer *eb,
unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
- bsd_idx = gen8_dispatch_bsd_engine(i915, file);
+ bsd_idx = gen8_dispatch_bsd_engine(i915, eb->file);
} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
bsd_idx <= I915_EXEC_BSD_RING2) {
bsd_idx >>= I915_EXEC_BSD_SHIFT;
@@ -2449,131 +2743,297 @@ eb_select_legacy_ring(struct i915_execbuffer *eb,
}
static int
-eb_pin_engine(struct i915_execbuffer *eb,
- struct drm_file *file,
- struct drm_i915_gem_execbuffer2 *args)
+eb_select_engine(struct i915_execbuffer *eb)
{
struct intel_context *ce;
unsigned int idx;
int err;
if (i915_gem_context_user_engines(eb->gem_context))
- idx = args->flags & I915_EXEC_RING_MASK;
+ idx = eb->args->flags & I915_EXEC_RING_MASK;
else
- idx = eb_select_legacy_ring(eb, file, args);
+ idx = eb_select_legacy_ring(eb);
ce = i915_gem_context_get_engine(eb->gem_context, idx);
if (IS_ERR(ce))
return PTR_ERR(ce);
- err = __eb_pin_engine(eb, ce);
- intel_context_put(ce);
+ intel_gt_pm_get(ce->engine->gt);
+
+ if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
+ err = intel_context_alloc_state(ce);
+ if (err)
+ goto err;
+ }
+
+ /*
+ * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
+ * EIO if the GPU is already wedged.
+ */
+ err = intel_gt_terminally_wedged(ce->engine->gt);
+ if (err)
+ goto err;
+
+ eb->context = ce;
+ eb->engine = ce->engine;
+ /*
+ * Make sure engine pool stays alive even if we call intel_context_put
+ * during ww handling. The pool is destroyed when last pm reference
+ * is dropped, which breaks our -EDEADLK handling.
+ */
return err;
+
+err:
+ intel_gt_pm_put(ce->engine->gt);
+ intel_context_put(ce);
+ return err;
+}
+
+static void
+eb_put_engine(struct i915_execbuffer *eb)
+{
+ intel_gt_pm_put(eb->engine->gt);
+ intel_context_put(eb->context);
}
static void
-__free_fence_array(struct drm_syncobj **fences, unsigned int n)
+__free_fence_array(struct eb_fence *fences, unsigned int n)
{
- while (n--)
- drm_syncobj_put(ptr_mask_bits(fences[n], 2));
+ while (n--) {
+ drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
+ dma_fence_put(fences[n].dma_fence);
+ kfree(fences[n].chain_fence);
+ }
kvfree(fences);
}
-static struct drm_syncobj **
-get_fence_array(struct drm_i915_gem_execbuffer2 *args,
- struct drm_file *file)
+static int
+add_timeline_fence_array(struct i915_execbuffer *eb,
+ const struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences)
{
- const unsigned long nfences = args->num_cliprects;
- struct drm_i915_gem_exec_fence __user *user;
- struct drm_syncobj **fences;
- unsigned long n;
- int err;
+ struct drm_i915_gem_exec_fence __user *user_fences;
+ u64 __user *user_values;
+ struct eb_fence *f;
+ u64 nfences;
+ int err = 0;
- if (!(args->flags & I915_EXEC_FENCE_ARRAY))
- return NULL;
+ nfences = timeline_fences->fence_count;
+ if (!nfences)
+ return 0;
/* Check multiplication overflow for access_ok() and kvmalloc_array() */
BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
if (nfences > min_t(unsigned long,
- ULONG_MAX / sizeof(*user),
- SIZE_MAX / sizeof(*fences)))
- return ERR_PTR(-EINVAL);
+ ULONG_MAX / sizeof(*user_fences),
+ SIZE_MAX / sizeof(*f)) - eb->num_fences)
+ return -EINVAL;
- user = u64_to_user_ptr(args->cliprects_ptr);
- if (!access_ok(user, nfences * sizeof(*user)))
- return ERR_PTR(-EFAULT);
+ user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
+ if (!access_ok(user_fences, nfences * sizeof(*user_fences)))
+ return -EFAULT;
- fences = kvmalloc_array(nfences, sizeof(*fences),
- __GFP_NOWARN | GFP_KERNEL);
- if (!fences)
- return ERR_PTR(-ENOMEM);
+ user_values = u64_to_user_ptr(timeline_fences->values_ptr);
+ if (!access_ok(user_values, nfences * sizeof(*user_values)))
+ return -EFAULT;
+
+ f = krealloc(eb->fences,
+ (eb->num_fences + nfences) * sizeof(*f),
+ __GFP_NOWARN | GFP_KERNEL);
+ if (!f)
+ return -ENOMEM;
- for (n = 0; n < nfences; n++) {
- struct drm_i915_gem_exec_fence fence;
+ eb->fences = f;
+ f += eb->num_fences;
+
+ BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
+ ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
+
+ while (nfences--) {
+ struct drm_i915_gem_exec_fence user_fence;
struct drm_syncobj *syncobj;
+ struct dma_fence *fence = NULL;
+ u64 point;
- if (__copy_from_user(&fence, user++, sizeof(fence))) {
- err = -EFAULT;
- goto err;
+ if (__copy_from_user(&user_fence,
+ user_fences++,
+ sizeof(user_fence)))
+ return -EFAULT;
+
+ if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS)
+ return -EINVAL;
+
+ if (__get_user(point, user_values++))
+ return -EFAULT;
+
+ syncobj = drm_syncobj_find(eb->file, user_fence.handle);
+ if (!syncobj) {
+ DRM_DEBUG("Invalid syncobj handle provided\n");
+ return -ENOENT;
}
- if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
- err = -EINVAL;
- goto err;
+ fence = drm_syncobj_fence_get(syncobj);
+
+ if (!fence && user_fence.flags &&
+ !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
+ DRM_DEBUG("Syncobj handle has no fence\n");
+ drm_syncobj_put(syncobj);
+ return -EINVAL;
}
- syncobj = drm_syncobj_find(file, fence.handle);
+ if (fence)
+ err = dma_fence_chain_find_seqno(&fence, point);
+
+ if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
+ DRM_DEBUG("Syncobj handle missing requested point %llu\n", point);
+ dma_fence_put(fence);
+ drm_syncobj_put(syncobj);
+ return err;
+ }
+
+ /*
+ * A point might have been signaled already and
+ * garbage collected from the timeline. In this case
+ * just ignore the point and carry on.
+ */
+ if (!fence && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
+ drm_syncobj_put(syncobj);
+ continue;
+ }
+
+ /*
+ * For timeline syncobjs we need to preallocate chains for
+ * later signaling.
+ */
+ if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
+ /*
+ * Waiting and signaling the same point (when point !=
+ * 0) would break the timeline.
+ */
+ if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
+ DRM_DEBUG("Trying to wait & signal the same timeline point.\n");
+ dma_fence_put(fence);
+ drm_syncobj_put(syncobj);
+ return -EINVAL;
+ }
+
+ f->chain_fence =
+ kmalloc(sizeof(*f->chain_fence),
+ GFP_KERNEL);
+ if (!f->chain_fence) {
+ drm_syncobj_put(syncobj);
+ dma_fence_put(fence);
+ return -ENOMEM;
+ }
+ } else {
+ f->chain_fence = NULL;
+ }
+
+ f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
+ f->dma_fence = fence;
+ f->value = point;
+ f++;
+ eb->num_fences++;
+ }
+
+ return 0;
+}
+
+static int add_fence_array(struct i915_execbuffer *eb)
+{
+ struct drm_i915_gem_execbuffer2 *args = eb->args;
+ struct drm_i915_gem_exec_fence __user *user;
+ unsigned long num_fences = args->num_cliprects;
+ struct eb_fence *f;
+
+ if (!(args->flags & I915_EXEC_FENCE_ARRAY))
+ return 0;
+
+ if (!num_fences)
+ return 0;
+
+ /* Check multiplication overflow for access_ok() and kvmalloc_array() */
+ BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
+ if (num_fences > min_t(unsigned long,
+ ULONG_MAX / sizeof(*user),
+ SIZE_MAX / sizeof(*f) - eb->num_fences))
+ return -EINVAL;
+
+ user = u64_to_user_ptr(args->cliprects_ptr);
+ if (!access_ok(user, num_fences * sizeof(*user)))
+ return -EFAULT;
+
+ f = krealloc(eb->fences,
+ (eb->num_fences + num_fences) * sizeof(*f),
+ __GFP_NOWARN | GFP_KERNEL);
+ if (!f)
+ return -ENOMEM;
+
+ eb->fences = f;
+ f += eb->num_fences;
+ while (num_fences--) {
+ struct drm_i915_gem_exec_fence user_fence;
+ struct drm_syncobj *syncobj;
+ struct dma_fence *fence = NULL;
+
+ if (__copy_from_user(&user_fence, user++, sizeof(user_fence)))
+ return -EFAULT;
+
+ if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS)
+ return -EINVAL;
+
+ syncobj = drm_syncobj_find(eb->file, user_fence.handle);
if (!syncobj) {
DRM_DEBUG("Invalid syncobj handle provided\n");
- err = -ENOENT;
- goto err;
+ return -ENOENT;
+ }
+
+ if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
+ fence = drm_syncobj_fence_get(syncobj);
+ if (!fence) {
+ DRM_DEBUG("Syncobj handle has no fence\n");
+ drm_syncobj_put(syncobj);
+ return -EINVAL;
+ }
}
BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
- fences[n] = ptr_pack_bits(syncobj, fence.flags, 2);
+ f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
+ f->dma_fence = fence;
+ f->value = 0;
+ f->chain_fence = NULL;
+ f++;
+ eb->num_fences++;
}
- return fences;
-
-err:
- __free_fence_array(fences, n);
- return ERR_PTR(err);
+ return 0;
}
-static void
-put_fence_array(struct drm_i915_gem_execbuffer2 *args,
- struct drm_syncobj **fences)
+static void put_fence_array(struct eb_fence *fences, int num_fences)
{
if (fences)
- __free_fence_array(fences, args->num_cliprects);
+ __free_fence_array(fences, num_fences);
}
static int
-await_fence_array(struct i915_execbuffer *eb,
- struct drm_syncobj **fences)
+await_fence_array(struct i915_execbuffer *eb)
{
- const unsigned int nfences = eb->args->num_cliprects;
unsigned int n;
int err;
- for (n = 0; n < nfences; n++) {
+ for (n = 0; n < eb->num_fences; n++) {
struct drm_syncobj *syncobj;
- struct dma_fence *fence;
unsigned int flags;
- syncobj = ptr_unpack_bits(fences[n], &flags, 2);
- if (!(flags & I915_EXEC_FENCE_WAIT))
- continue;
+ syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2);
- fence = drm_syncobj_fence_get(syncobj);
- if (!fence)
- return -EINVAL;
+ if (!eb->fences[n].dma_fence)
+ continue;
- err = i915_request_await_dma_fence(eb->request, fence);
- dma_fence_put(fence);
+ err = i915_request_await_dma_fence(eb->request,
+ eb->fences[n].dma_fence);
if (err < 0)
return err;
}
@@ -2581,26 +3041,47 @@ await_fence_array(struct i915_execbuffer *eb,
return 0;
}
-static void
-signal_fence_array(struct i915_execbuffer *eb,
- struct drm_syncobj **fences)
+static void signal_fence_array(const struct i915_execbuffer *eb)
{
- const unsigned int nfences = eb->args->num_cliprects;
struct dma_fence * const fence = &eb->request->fence;
unsigned int n;
- for (n = 0; n < nfences; n++) {
+ for (n = 0; n < eb->num_fences; n++) {
struct drm_syncobj *syncobj;
unsigned int flags;
- syncobj = ptr_unpack_bits(fences[n], &flags, 2);
+ syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2);
if (!(flags & I915_EXEC_FENCE_SIGNAL))
continue;
- drm_syncobj_replace_fence(syncobj, fence);
+ if (eb->fences[n].chain_fence) {
+ drm_syncobj_add_point(syncobj,
+ eb->fences[n].chain_fence,
+ fence,
+ eb->fences[n].value);
+ /*
+ * The chain's ownership is transferred to the
+ * timeline.
+ */
+ eb->fences[n].chain_fence = NULL;
+ } else {
+ drm_syncobj_replace_fence(syncobj, fence);
+ }
}
}
+static int
+parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
+{
+ struct i915_execbuffer *eb = data;
+ struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
+
+ if (copy_from_user(&timeline_fences, ext, sizeof(timeline_fences)))
+ return -EFAULT;
+
+ return add_timeline_fence_array(eb, &timeline_fences);
+}
+
static void retire_requests(struct intel_timeline *tl, struct i915_request *end)
{
struct i915_request *rq, *rn;
@@ -2642,12 +3123,37 @@ static void eb_request_add(struct i915_execbuffer *eb)
mutex_unlock(&tl->mutex);
}
+static const i915_user_extension_fn execbuf_extensions[] = {
+ [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
+};
+
+static int
+parse_execbuf2_extensions(struct drm_i915_gem_execbuffer2 *args,
+ struct i915_execbuffer *eb)
+{
+ if (!(args->flags & I915_EXEC_USE_EXTENSIONS))
+ return 0;
+
+ /* The execbuf2 extension mechanism reuses cliprects_ptr. So we cannot
+ * have another flag also using it at the same time.
+ */
+ if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
+ return -EINVAL;
+
+ if (args->num_cliprects != 0)
+ return -EINVAL;
+
+ return i915_user_extensions(u64_to_user_ptr(args->cliprects_ptr),
+ execbuf_extensions,
+ ARRAY_SIZE(execbuf_extensions),
+ eb);
+}
+
static int
i915_gem_do_execbuffer(struct drm_device *dev,
struct drm_file *file,
struct drm_i915_gem_execbuffer2 *args,
- struct drm_i915_gem_exec_object2 *exec,
- struct drm_syncobj **fences)
+ struct drm_i915_gem_exec_object2 *exec)
{
struct drm_i915_private *i915 = to_i915(dev);
struct i915_execbuffer eb;
@@ -2668,6 +3174,10 @@ i915_gem_do_execbuffer(struct drm_device *dev,
args->flags |= __EXEC_HAS_RELOC;
eb.exec = exec;
+ eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1);
+ eb.vma[0].vma = NULL;
+ eb.reloc_pool = eb.batch_pool = NULL;
+ eb.reloc_context = NULL;
eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
reloc_cache_init(&eb.reloc_cache, eb.i915);
@@ -2677,6 +3187,9 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb.batch_len = args->batch_len;
eb.trampoline = NULL;
+ eb.fences = NULL;
+ eb.num_fences = 0;
+
eb.batch_flags = 0;
if (args->flags & I915_EXEC_SECURE) {
if (INTEL_GEN(i915) >= 11)
@@ -2694,14 +3207,24 @@ i915_gem_do_execbuffer(struct drm_device *dev,
if (args->flags & I915_EXEC_IS_PINNED)
eb.batch_flags |= I915_DISPATCH_PINNED;
+ err = parse_execbuf2_extensions(args, &eb);
+ if (err)
+ goto err_ext;
+
+ err = add_fence_array(&eb);
+ if (err)
+ goto err_ext;
+
#define IN_FENCES (I915_EXEC_FENCE_IN | I915_EXEC_FENCE_SUBMIT)
if (args->flags & IN_FENCES) {
if ((args->flags & IN_FENCES) == IN_FENCES)
return -EINVAL;
in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
- if (!in_fence)
- return -EINVAL;
+ if (!in_fence) {
+ err = -EINVAL;
+ goto err_ext;
+ }
}
#undef IN_FENCES
@@ -2723,11 +3246,19 @@ i915_gem_do_execbuffer(struct drm_device *dev,
if (unlikely(err))
goto err_destroy;
- err = eb_pin_engine(&eb, file, args);
+ err = eb_select_engine(&eb);
if (unlikely(err))
goto err_context;
- err = eb_relocate(&eb);
+ err = eb_lookup_vmas(&eb);
+ if (err) {
+ eb_release_vmas(&eb, true);
+ goto err_engine;
+ }
+
+ i915_gem_ww_ctx_init(&eb.ww, true);
+
+ err = eb_relocate_parse(&eb);
if (err) {
/*
* If the user expects the execobject.offset and
@@ -2740,54 +3271,9 @@ i915_gem_do_execbuffer(struct drm_device *dev,
goto err_vma;
}
- if (unlikely(eb.batch->flags & EXEC_OBJECT_WRITE)) {
- drm_dbg(&i915->drm,
- "Attempting to use self-modifying batch buffer\n");
- err = -EINVAL;
- goto err_vma;
- }
-
- if (range_overflows_t(u64,
- eb.batch_start_offset, eb.batch_len,
- eb.batch->vma->size)) {
- drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n");
- err = -EINVAL;
- goto err_vma;
- }
-
- if (eb.batch_len == 0)
- eb.batch_len = eb.batch->vma->size - eb.batch_start_offset;
-
- err = eb_parse(&eb);
- if (err)
- goto err_vma;
+ ww_acquire_done(&eb.ww.ctx);
- /*
- * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
- * batch" bit. Hence we need to pin secure batches into the global gtt.
- * hsw should have this fixed, but bdw mucks it up again. */
batch = eb.batch->vma;
- if (eb.batch_flags & I915_DISPATCH_SECURE) {
- struct i915_vma *vma;
-
- /*
- * So on first glance it looks freaky that we pin the batch here
- * outside of the reservation loop. But:
- * - The batch is already pinned into the relevant ppgtt, so we
- * already have the backing storage fully allocated.
- * - No other BO uses the global gtt (well contexts, but meh),
- * so we don't really have issues with multiple objects not
- * fitting due to fragmentation.
- * So this is actually safe.
- */
- vma = i915_gem_object_ggtt_pin(batch->obj, NULL, 0, 0, 0);
- if (IS_ERR(vma)) {
- err = PTR_ERR(vma);
- goto err_parse;
- }
-
- batch = vma;
- }
/* All GPU relocation batches must be submitted prior to the user rq */
GEM_BUG_ON(eb.reloc_cache.rq);
@@ -2796,7 +3282,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb.request = i915_request_create(eb.context);
if (IS_ERR(eb.request)) {
err = PTR_ERR(eb.request);
- goto err_batch_unpin;
+ goto err_vma;
}
if (in_fence) {
@@ -2811,8 +3297,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
goto err_request;
}
- if (fences) {
- err = await_fence_array(&eb, fences);
+ if (eb.fences) {
+ err = await_fence_array(&eb);
if (err)
goto err_request;
}
@@ -2833,18 +3319,17 @@ i915_gem_do_execbuffer(struct drm_device *dev,
* to explicitly hold another reference here.
*/
eb.request->batch = batch;
- if (batch->private)
- intel_gt_buffer_pool_mark_active(batch->private, eb.request);
+ if (eb.batch_pool)
+ intel_gt_buffer_pool_mark_active(eb.batch_pool, eb.request);
trace_i915_request_queue(eb.request, eb.batch_flags);
err = eb_submit(&eb, batch);
err_request:
- add_to_client(eb.request, file);
i915_request_get(eb.request);
eb_request_add(&eb);
- if (fences)
- signal_fence_array(&eb, fences);
+ if (eb.fences)
+ signal_fence_array(&eb);
if (out_fence) {
if (err == 0) {
@@ -2858,16 +3343,21 @@ err_request:
}
i915_request_put(eb.request);
-err_batch_unpin:
- if (eb.batch_flags & I915_DISPATCH_SECURE)
- i915_vma_unpin(batch);
-err_parse:
- if (batch->private)
- intel_gt_buffer_pool_put(batch->private);
err_vma:
+ eb_release_vmas(&eb, true);
if (eb.trampoline)
i915_vma_unpin(eb.trampoline);
- eb_unpin_engine(&eb);
+ WARN_ON(err == -EDEADLK);
+ i915_gem_ww_ctx_fini(&eb.ww);
+
+ if (eb.batch_pool)
+ intel_gt_buffer_pool_put(eb.batch_pool);
+ if (eb.reloc_pool)
+ intel_gt_buffer_pool_put(eb.reloc_pool);
+ if (eb.reloc_context)
+ intel_context_put(eb.reloc_context);
+err_engine:
+ eb_put_engine(&eb);
err_context:
i915_gem_context_put(eb.gem_context);
err_destroy:
@@ -2877,12 +3367,14 @@ err_out_fence:
put_unused_fd(out_fence_fd);
err_in_fence:
dma_fence_put(in_fence);
+err_ext:
+ put_fence_array(eb.fences, eb.num_fences);
return err;
}
static size_t eb_element_size(void)
{
- return sizeof(struct drm_i915_gem_exec_object2);
+ return sizeof(struct drm_i915_gem_exec_object2) + sizeof(struct eb_vma);
}
static bool check_buffer_count(size_t count)
@@ -2938,7 +3430,9 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
/* Copy in the exec list from userland */
exec_list = kvmalloc_array(count, sizeof(*exec_list),
__GFP_NOWARN | GFP_KERNEL);
- exec2_list = kvmalloc_array(count, eb_element_size(),
+
+ /* Allocate extra slots for use by the command parser */
+ exec2_list = kvmalloc_array(count + 2, eb_element_size(),
__GFP_NOWARN | GFP_KERNEL);
if (exec_list == NULL || exec2_list == NULL) {
drm_dbg(&i915->drm,
@@ -2971,7 +3465,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
exec2_list[i].flags = 0;
}
- err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL);
+ err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list);
if (exec2.flags & __EXEC_HAS_RELOC) {
struct drm_i915_gem_exec_object __user *user_exec_list =
u64_to_user_ptr(args->buffers_ptr);
@@ -3003,7 +3497,6 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
struct drm_i915_private *i915 = to_i915(dev);
struct drm_i915_gem_execbuffer2 *args = data;
struct drm_i915_gem_exec_object2 *exec2_list;
- struct drm_syncobj **fences = NULL;
const size_t count = args->buffer_count;
int err;
@@ -3016,7 +3509,8 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
if (err)
return err;
- exec2_list = kvmalloc_array(count, eb_element_size(),
+ /* Allocate extra slots for use by the command parser */
+ exec2_list = kvmalloc_array(count + 2, eb_element_size(),
__GFP_NOWARN | GFP_KERNEL);
if (exec2_list == NULL) {
drm_dbg(&i915->drm, "Failed to allocate exec list for %zd buffers\n",
@@ -3031,15 +3525,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
return -EFAULT;
}
- if (args->flags & I915_EXEC_FENCE_ARRAY) {
- fences = get_fence_array(args, file);
- if (IS_ERR(fences)) {
- kvfree(exec2_list);
- return PTR_ERR(fences);
- }
- }
-
- err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences);
+ err = i915_gem_do_execbuffer(dev, file, args, exec2_list);
/*
* Now that we have begun execution of the batchbuffer, we ignore
@@ -3080,7 +3566,6 @@ end:;
}
args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
- put_fence_array(args, fences);
kvfree(exec2_list);
return err;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 753f82d87a31..3d69e51f3e4d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -283,37 +283,46 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
struct intel_runtime_pm *rpm = &i915->runtime_pm;
struct i915_ggtt *ggtt = &i915->ggtt;
bool write = area->vm_flags & VM_WRITE;
+ struct i915_gem_ww_ctx ww;
intel_wakeref_t wakeref;
struct i915_vma *vma;
pgoff_t page_offset;
int srcu;
int ret;
- /* Sanity check that we allow writing into this object */
- if (i915_gem_object_is_readonly(obj) && write)
- return VM_FAULT_SIGBUS;
-
/* We don't use vmf->pgoff since that has the fake offset */
page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
trace_i915_gem_object_fault(obj, page_offset, true, write);
- ret = i915_gem_object_pin_pages(obj);
+ wakeref = intel_runtime_pm_get(rpm);
+
+ i915_gem_ww_ctx_init(&ww, true);
+retry:
+ ret = i915_gem_object_lock(obj, &ww);
if (ret)
- goto err;
+ goto err_rpm;
- wakeref = intel_runtime_pm_get(rpm);
+ /* Sanity check that we allow writing into this object */
+ if (i915_gem_object_is_readonly(obj) && write) {
+ ret = -EFAULT;
+ goto err_rpm;
+ }
- ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu);
+ ret = i915_gem_object_pin_pages(obj);
if (ret)
goto err_rpm;
+ ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu);
+ if (ret)
+ goto err_pages;
+
/* Now pin it into the GTT as needed */
- vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
- PIN_MAPPABLE |
- PIN_NONBLOCK /* NOWARN */ |
- PIN_NOEVICT);
- if (IS_ERR(vma)) {
+ vma = i915_gem_object_ggtt_pin_ww(obj, &ww, NULL, 0, 0,
+ PIN_MAPPABLE |
+ PIN_NONBLOCK /* NOWARN */ |
+ PIN_NOEVICT);
+ if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) {
/* Use a partial view if it is bigger than available space */
struct i915_ggtt_view view =
compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
@@ -328,11 +337,11 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
* all hope that the hardware is able to track future writes.
*/
- vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
- if (IS_ERR(vma)) {
+ vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags);
+ if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) {
flags = PIN_MAPPABLE;
view.type = I915_GGTT_VIEW_PARTIAL;
- vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
+ vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags);
}
/* The entire mappable GGTT is pinned? Unexpected! */
@@ -389,10 +398,16 @@ err_unpin:
__i915_vma_unpin(vma);
err_reset:
intel_gt_reset_unlock(ggtt->vm.gt, srcu);
+err_pages:
+ i915_gem_object_unpin_pages(obj);
err_rpm:
+ if (ret == -EDEADLK) {
+ ret = i915_gem_ww_ctx_backoff(&ww);
+ if (!ret)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
intel_runtime_pm_put(rpm, wakeref);
- i915_gem_object_unpin_pages(obj);
-err:
return i915_error_to_vmf_fault(ret);
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 9cf4ad78ece6..d46db8d8f38e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -110,20 +110,44 @@ i915_gem_object_put(struct drm_i915_gem_object *obj)
#define assert_object_held(obj) dma_resv_assert_held((obj)->base.resv)
-static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj)
+static inline int __i915_gem_object_lock(struct drm_i915_gem_object *obj,
+ struct i915_gem_ww_ctx *ww,
+ bool intr)
{
- dma_resv_lock(obj->base.resv, NULL);
+ int ret;
+
+ if (intr)
+ ret = dma_resv_lock_interruptible(obj->base.resv, ww ? &ww->ctx : NULL);
+ else
+ ret = dma_resv_lock(obj->base.resv, ww ? &ww->ctx : NULL);
+
+ if (!ret && ww)
+ list_add_tail(&obj->obj_link, &ww->obj_list);
+ if (ret == -EALREADY)
+ ret = 0;
+
+ if (ret == -EDEADLK)
+ ww->contended = obj;
+
+ return ret;
}
-static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj)
+static inline int i915_gem_object_lock(struct drm_i915_gem_object *obj,
+ struct i915_gem_ww_ctx *ww)
{
- return dma_resv_trylock(obj->base.resv);
+ return __i915_gem_object_lock(obj, ww, ww && ww->intr);
}
-static inline int
-i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj)
+static inline int i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj,
+ struct i915_gem_ww_ctx *ww)
{
- return dma_resv_lock_interruptible(obj->base.resv, NULL);
+ WARN_ON(ww && !ww->intr);
+ return __i915_gem_object_lock(obj, ww, true);
+}
+
+static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj)
+{
+ return dma_resv_trylock(obj->base.resv);
}
static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
@@ -412,7 +436,6 @@ static inline void
i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
{
i915_gem_object_unpin_pages(obj);
- i915_gem_object_unlock(obj);
}
static inline struct intel_engine_cs *
@@ -435,6 +458,7 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
unsigned int cache_level);
void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
+void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj);
int __must_check
i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
index bfdb32d46877..aee7ad3cc3c6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
@@ -14,6 +14,7 @@
struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
struct i915_vma *vma,
+ struct i915_gem_ww_ctx *ww,
u32 value)
{
struct drm_i915_private *i915 = ce->vm->i915;
@@ -39,10 +40,24 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
goto out_pm;
}
+ err = i915_gem_object_lock(pool->obj, ww);
+ if (err)
+ goto out_put;
+
+ batch = i915_vma_instance(pool->obj, ce->vm, NULL);
+ if (IS_ERR(batch)) {
+ err = PTR_ERR(batch);
+ goto out_put;
+ }
+
+ err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER);
+ if (unlikely(err))
+ goto out_put;
+
cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
if (IS_ERR(cmd)) {
err = PTR_ERR(cmd);
- goto out_put;
+ goto out_unpin;
}
rem = vma->size;
@@ -84,19 +99,11 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
intel_gt_chipset_flush(ce->vm->gt);
- batch = i915_vma_instance(pool->obj, ce->vm, NULL);
- if (IS_ERR(batch)) {
- err = PTR_ERR(batch);
- goto out_put;
- }
-
- err = i915_vma_pin(batch, 0, 0, PIN_USER);
- if (unlikely(err))
- goto out_put;
-
batch->private = pool;
return batch;
+out_unpin:
+ i915_vma_unpin(batch);
out_put:
intel_gt_buffer_pool_put(pool);
out_pm:
@@ -108,11 +115,9 @@ int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq)
{
int err;
- i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, false);
if (err == 0)
err = i915_vma_move_to_active(vma, rq, 0);
- i915_vma_unlock(vma);
if (unlikely(err))
return err;
@@ -141,6 +146,7 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
struct intel_context *ce,
u32 value)
{
+ struct i915_gem_ww_ctx ww;
struct i915_request *rq;
struct i915_vma *batch;
struct i915_vma *vma;
@@ -150,17 +156,28 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
if (IS_ERR(vma))
return PTR_ERR(vma);
- err = i915_vma_pin(vma, 0, 0, PIN_USER);
- if (unlikely(err))
- return err;
+ i915_gem_ww_ctx_init(&ww, true);
+ intel_engine_pm_get(ce->engine);
+retry:
+ err = i915_gem_object_lock(obj, &ww);
+ if (err)
+ goto out;
- batch = intel_emit_vma_fill_blt(ce, vma, value);
+ err = intel_context_pin_ww(ce, &ww);
+ if (err)
+ goto out;
+
+ err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
+ if (err)
+ goto out_ctx;
+
+ batch = intel_emit_vma_fill_blt(ce, vma, &ww, value);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
- goto out_unpin;
+ goto out_vma;
}
- rq = intel_context_create_request(ce);
+ rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out_batch;
@@ -170,11 +187,9 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
if (unlikely(err))
goto out_request;
- i915_vma_lock(vma);
err = move_obj_to_gpu(vma->obj, rq, true);
if (err == 0)
err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
- i915_vma_unlock(vma);
if (unlikely(err))
goto out_request;
@@ -193,8 +208,18 @@ out_request:
i915_request_add(rq);
out_batch:
intel_emit_vma_release(ce, batch);
-out_unpin:
+out_vma:
i915_vma_unpin(vma);
+out_ctx:
+ intel_context_unpin(ce);
+out:
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
+ intel_engine_pm_put(ce->engine);
return err;
}
@@ -210,6 +235,7 @@ static bool wa_1209644611_applies(struct drm_i915_private *i915, u32 size)
}
struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
+ struct i915_gem_ww_ctx *ww,
struct i915_vma *src,
struct i915_vma *dst)
{
@@ -236,10 +262,24 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
goto out_pm;
}
+ err = i915_gem_object_lock(pool->obj, ww);
+ if (err)
+ goto out_put;
+
+ batch = i915_vma_instance(pool->obj, ce->vm, NULL);
+ if (IS_ERR(batch)) {
+ err = PTR_ERR(batch);
+ goto out_put;
+ }
+
+ err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER);
+ if (unlikely(err))
+ goto out_put;
+
cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
if (IS_ERR(cmd)) {
err = PTR_ERR(cmd);
- goto out_put;
+ goto out_unpin;
}
rem = src->size;
@@ -296,20 +336,11 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
i915_gem_object_unpin_map(pool->obj);
intel_gt_chipset_flush(ce->vm->gt);
-
- batch = i915_vma_instance(pool->obj, ce->vm, NULL);
- if (IS_ERR(batch)) {
- err = PTR_ERR(batch);
- goto out_put;
- }
-
- err = i915_vma_pin(batch, 0, 0, PIN_USER);
- if (unlikely(err))
- goto out_put;
-
batch->private = pool;
return batch;
+out_unpin:
+ i915_vma_unpin(batch);
out_put:
intel_gt_buffer_pool_put(pool);
out_pm:
@@ -321,10 +352,9 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
struct drm_i915_gem_object *dst,
struct intel_context *ce)
{
- struct drm_gem_object *objs[] = { &src->base, &dst->base };
struct i915_address_space *vm = ce->vm;
struct i915_vma *vma[2], *batch;
- struct ww_acquire_ctx acquire;
+ struct i915_gem_ww_ctx ww;
struct i915_request *rq;
int err, i;
@@ -332,25 +362,36 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
if (IS_ERR(vma[0]))
return PTR_ERR(vma[0]);
- err = i915_vma_pin(vma[0], 0, 0, PIN_USER);
- if (unlikely(err))
- return err;
-
vma[1] = i915_vma_instance(dst, vm, NULL);
if (IS_ERR(vma[1]))
- goto out_unpin_src;
+ return PTR_ERR(vma[1]);
- err = i915_vma_pin(vma[1], 0, 0, PIN_USER);
+ i915_gem_ww_ctx_init(&ww, true);
+ intel_engine_pm_get(ce->engine);
+retry:
+ err = i915_gem_object_lock(src, &ww);
+ if (!err)
+ err = i915_gem_object_lock(dst, &ww);
+ if (!err)
+ err = intel_context_pin_ww(ce, &ww);
+ if (err)
+ goto out;
+
+ err = i915_vma_pin_ww(vma[0], &ww, 0, 0, PIN_USER);
+ if (err)
+ goto out_ctx;
+
+ err = i915_vma_pin_ww(vma[1], &ww, 0, 0, PIN_USER);
if (unlikely(err))
goto out_unpin_src;
- batch = intel_emit_vma_copy_blt(ce, vma[0], vma[1]);
+ batch = intel_emit_vma_copy_blt(ce, &ww, vma[0], vma[1]);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
goto out_unpin_dst;
}
- rq = intel_context_create_request(ce);
+ rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out_batch;
@@ -360,14 +401,10 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
if (unlikely(err))
goto out_request;
- err = drm_gem_lock_reservations(objs, ARRAY_SIZE(objs), &acquire);
- if (unlikely(err))
- goto out_request;
-
for (i = 0; i < ARRAY_SIZE(vma); i++) {
err = move_obj_to_gpu(vma[i]->obj, rq, i);
if (unlikely(err))
- goto out_unlock;
+ goto out_request;
}
for (i = 0; i < ARRAY_SIZE(vma); i++) {
@@ -375,20 +412,19 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
err = i915_vma_move_to_active(vma[i], rq, flags);
if (unlikely(err))
- goto out_unlock;
+ goto out_request;
}
if (rq->engine->emit_init_breadcrumb) {
err = rq->engine->emit_init_breadcrumb(rq);
if (unlikely(err))
- goto out_unlock;
+ goto out_request;
}
err = rq->engine->emit_bb_start(rq,
batch->node.start, batch->node.size,
0);
-out_unlock:
- drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire);
+
out_request:
if (unlikely(err))
i915_request_set_error_once(rq, err);
@@ -400,6 +436,16 @@ out_unpin_dst:
i915_vma_unpin(vma[1]);
out_unpin_src:
i915_vma_unpin(vma[0]);
+out_ctx:
+ intel_context_unpin(ce);
+out:
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
+ intel_engine_pm_put(ce->engine);
return err;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
index 8bcd336a90dc..2409fdcccf0e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
@@ -13,12 +13,15 @@
#include "i915_vma.h"
struct drm_i915_gem_object;
+struct i915_gem_ww_ctx;
struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
struct i915_vma *vma,
+ struct i915_gem_ww_ctx *ww,
u32 value);
struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
+ struct i915_gem_ww_ctx *ww,
struct i915_vma *src,
struct i915_vma *dst);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 5335f799b548..b5c15557cc87 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -123,6 +123,15 @@ struct drm_i915_gem_object {
struct list_head lut_list;
spinlock_t lut_lock; /* guards lut_list */
+ /**
+ * @obj_link: Link into @i915_gem_ww_ctx.obj_list
+ *
+ * When we lock this object through i915_gem_object_lock() with a
+ * context, we add it to the list to ensure we can unlock everything
+ * when i915_gem_ww_ctx_backoff() or i915_gem_ww_ctx_fini() are called.
+ */
+ struct list_head obj_link;
+
/** Stolen memory for this object, instead of being backed by shmem. */
struct drm_mm_node *stolen;
union {
@@ -282,6 +291,7 @@ struct drm_i915_gem_object {
} userptr;
unsigned long scratch;
+ u64 encode;
void *gvt_info;
};
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index e8a083743e09..d6eeefab3d01 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -254,9 +254,35 @@ static void *i915_gem_object_map(struct drm_i915_gem_object *obj,
if (!i915_gem_object_has_struct_page(obj) && type != I915_MAP_WC)
return NULL;
+ if (GEM_WARN_ON(type == I915_MAP_WC &&
+ !static_cpu_has(X86_FEATURE_PAT)))
+ return NULL;
+
/* A single page can always be kmapped */
- if (n_pte == 1 && type == I915_MAP_WB)
- return kmap(sg_page(sgt->sgl));
+ if (n_pte == 1 && type == I915_MAP_WB) {
+ struct page *page = sg_page(sgt->sgl);
+
+ /*
+ * On 32b, highmem using a finite set of indirect PTE (i.e.
+ * vmap) to provide virtual mappings of the high pages.
+ * As these are finite, map_new_virtual() must wait for some
+ * other kmap() to finish when it runs out. If we map a large
+ * number of objects, there is no method for it to tell us
+ * to release the mappings, and we deadlock.
+ *
+ * However, if we make an explicit vmap of the page, that
+ * uses a larger vmalloc arena, and also has the ability
+ * to tell us to release unwanted mappings. Most importantly,
+ * it will fail and propagate an error instead of waiting
+ * forever.
+ *
+ * So if the page is beyond the 32b boundary, make an explicit
+ * vmap. On 64b, this check will be optimised away as we can
+ * directly kmap any page on the system.
+ */
+ if (!PageHighMem(page))
+ return kmap(page);
+ }
mem = stack;
if (n_pte > ARRAY_SIZE(stack)) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 3d215164dd5a..40d3e40500fa 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -84,7 +84,7 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
- i915_gem_object_lock(obj);
+ i915_gem_object_lock(obj, NULL);
drm_WARN_ON(&i915->drm,
i915_gem_object_set_to_gtt_domain(obj, false));
i915_gem_object_unlock(obj);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
index 540ef0551789..1929d6cf4150 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
@@ -9,6 +9,7 @@
#include <drm/drm_file.h>
#include "i915_drv.h"
+#include "i915_gem_context.h"
#include "i915_gem_ioctls.h"
#include "i915_gem_object.h"
@@ -35,9 +36,10 @@ int
i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
+ const unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
struct drm_i915_file_private *file_priv = file->driver_priv;
- unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
- struct i915_request *request, *target = NULL;
+ struct i915_gem_context *ctx;
+ unsigned long idx;
long ret;
/* ABI: return -EIO if already wedged */
@@ -45,27 +47,54 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
if (ret)
return ret;
- spin_lock(&file_priv->mm.lock);
- list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
- if (time_after_eq(request->emitted_jiffies, recent_enough))
- break;
+ rcu_read_lock();
+ xa_for_each(&file_priv->context_xa, idx, ctx) {
+ struct i915_gem_engines_iter it;
+ struct intel_context *ce;
- if (target && xchg(&target->file_priv, NULL))
- list_del(&target->client_link);
+ if (!kref_get_unless_zero(&ctx->ref))
+ continue;
+ rcu_read_unlock();
- target = request;
- }
- if (target)
- i915_request_get(target);
- spin_unlock(&file_priv->mm.lock);
+ for_each_gem_engine(ce,
+ i915_gem_context_lock_engines(ctx),
+ it) {
+ struct i915_request *rq, *target = NULL;
+
+ if (!ce->timeline)
+ continue;
+
+ mutex_lock(&ce->timeline->mutex);
+ list_for_each_entry_reverse(rq,
+ &ce->timeline->requests,
+ link) {
+ if (i915_request_completed(rq))
+ break;
- if (!target)
- return 0;
+ if (time_after(rq->emitted_jiffies,
+ recent_enough))
+ continue;
- ret = i915_request_wait(target,
- I915_WAIT_INTERRUPTIBLE,
- MAX_SCHEDULE_TIMEOUT);
- i915_request_put(target);
+ target = i915_request_get(rq);
+ break;
+ }
+ mutex_unlock(&ce->timeline->mutex);
+ if (!target)
+ continue;
+
+ ret = i915_request_wait(target,
+ I915_WAIT_INTERRUPTIBLE,
+ MAX_SCHEDULE_TIMEOUT);
+ i915_request_put(target);
+ if (ret < 0)
+ break;
+ }
+ i915_gem_context_unlock_engines(ctx);
+ i915_gem_context_put(ctx);
+
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
return ret < 0 ? ret : 0;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
index ff72ee2fd9cd..ffcaee74a249 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
@@ -249,7 +249,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
* whilst executing a fenced command for an untiled object.
*/
- i915_gem_object_lock(obj);
+ i915_gem_object_lock(obj, NULL);
if (i915_gem_object_is_framebuffer(obj)) {
i915_gem_object_unlock(obj);
return -EBUSY;
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index 8291ede6902c..1f35e71429b4 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -393,7 +393,7 @@ static int igt_mock_exhaust_device_supported_pages(void *arg)
*/
for (i = 1; i < BIT(ARRAY_SIZE(page_sizes)); i++) {
- unsigned int combination = 0;
+ unsigned int combination = SZ_4K; /* Required for ppGTT */
for (j = 0; j < ARRAY_SIZE(page_sizes); j++) {
if (i & BIT(j))
@@ -947,7 +947,7 @@ static int gpu_write(struct intel_context *ce,
{
int err;
- i915_gem_object_lock(vma->obj);
+ i915_gem_object_lock(vma->obj, NULL);
err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
i915_gem_object_unlock(vma->obj);
if (err)
@@ -964,9 +964,10 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val)
unsigned long n;
int err;
+ i915_gem_object_lock(obj, NULL);
err = i915_gem_object_prepare_read(obj, &needs_flush);
if (err)
- return err;
+ goto err_unlock;
for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) {
u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n));
@@ -986,6 +987,8 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val)
}
i915_gem_object_finish_access(obj);
+err_unlock:
+ i915_gem_object_unlock(obj);
return err;
}
@@ -1614,7 +1617,7 @@ int i915_gem_huge_page_mock_selftests(void)
out_put:
i915_vm_put(&ppgtt->vm);
out_unlock:
- drm_dev_put(&dev_priv->drm);
+ mock_destroy_device(dev_priv);
return err;
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
index 299c29e9ad86..4e36d4897ea6 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -75,7 +75,7 @@ static int __igt_client_fill(struct intel_engine_cs *engine)
if (err)
goto err_unpin;
- i915_gem_object_lock(obj);
+ i915_gem_object_lock(obj, NULL);
err = i915_gem_object_set_to_cpu_domain(obj, false);
i915_gem_object_unlock(obj);
if (err)
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index 87d7d8aa080f..7049a6bbc03d 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -27,9 +27,10 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
u32 *cpu;
int err;
+ i915_gem_object_lock(ctx->obj, NULL);
err = i915_gem_object_prepare_write(ctx->obj, &needs_clflush);
if (err)
- return err;
+ goto out;
page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
map = kmap_atomic(page);
@@ -46,7 +47,9 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
kunmap_atomic(map);
i915_gem_object_finish_access(ctx->obj);
- return 0;
+out:
+ i915_gem_object_unlock(ctx->obj);
+ return err;
}
static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
@@ -57,9 +60,10 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
u32 *cpu;
int err;
+ i915_gem_object_lock(ctx->obj, NULL);
err = i915_gem_object_prepare_read(ctx->obj, &needs_clflush);
if (err)
- return err;
+ goto out;
page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
map = kmap_atomic(page);
@@ -73,7 +77,9 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
kunmap_atomic(map);
i915_gem_object_finish_access(ctx->obj);
- return 0;
+out:
+ i915_gem_object_unlock(ctx->obj);
+ return err;
}
static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
@@ -82,7 +88,7 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
u32 __iomem *map;
int err = 0;
- i915_gem_object_lock(ctx->obj);
+ i915_gem_object_lock(ctx->obj, NULL);
err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
i915_gem_object_unlock(ctx->obj);
if (err)
@@ -115,7 +121,7 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
u32 __iomem *map;
int err = 0;
- i915_gem_object_lock(ctx->obj);
+ i915_gem_object_lock(ctx->obj, NULL);
err = i915_gem_object_set_to_gtt_domain(ctx->obj, false);
i915_gem_object_unlock(ctx->obj);
if (err)
@@ -147,7 +153,7 @@ static int wc_set(struct context *ctx, unsigned long offset, u32 v)
u32 *map;
int err;
- i915_gem_object_lock(ctx->obj);
+ i915_gem_object_lock(ctx->obj, NULL);
err = i915_gem_object_set_to_wc_domain(ctx->obj, true);
i915_gem_object_unlock(ctx->obj);
if (err)
@@ -170,7 +176,7 @@ static int wc_get(struct context *ctx, unsigned long offset, u32 *v)
u32 *map;
int err;
- i915_gem_object_lock(ctx->obj);
+ i915_gem_object_lock(ctx->obj, NULL);
err = i915_gem_object_set_to_wc_domain(ctx->obj, false);
i915_gem_object_unlock(ctx->obj);
if (err)
@@ -193,27 +199,27 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v)
u32 *cs;
int err;
- i915_gem_object_lock(ctx->obj);
+ i915_gem_object_lock(ctx->obj, NULL);
err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
- i915_gem_object_unlock(ctx->obj);
if (err)
- return err;
+ goto out_unlock;
vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0);
- if (IS_ERR(vma))
- return PTR_ERR(vma);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto out_unlock;
+ }
rq = intel_engine_create_kernel_request(ctx->engine);
if (IS_ERR(rq)) {
- i915_vma_unpin(vma);
- return PTR_ERR(rq);
+ err = PTR_ERR(rq);
+ goto out_unpin;
}
cs = intel_ring_begin(rq, 4);
if (IS_ERR(cs)) {
- i915_request_add(rq);
- i915_vma_unpin(vma);
- return PTR_ERR(cs);
+ err = PTR_ERR(cs);
+ goto out_rq;
}
if (INTEL_GEN(ctx->engine->i915) >= 8) {
@@ -234,14 +240,16 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v)
}
intel_ring_advance(rq, cs);
- i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, true);
if (err == 0)
err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
- i915_vma_unlock(vma);
- i915_vma_unpin(vma);
+out_rq:
i915_request_add(rq);
+out_unpin:
+ i915_vma_unpin(vma);
+out_unlock:
+ i915_gem_object_unlock(ctx->obj);
return err;
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 7ffc3c751432..d3f87dc4eda3 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -461,9 +461,10 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
unsigned int n, m, need_flush;
int err;
+ i915_gem_object_lock(obj, NULL);
err = i915_gem_object_prepare_write(obj, &need_flush);
if (err)
- return err;
+ goto out;
for (n = 0; n < real_page_count(obj); n++) {
u32 *map;
@@ -479,7 +480,9 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
i915_gem_object_finish_access(obj);
obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
obj->write_domain = 0;
- return 0;
+out:
+ i915_gem_object_unlock(obj);
+ return err;
}
static noinline int cpu_check(struct drm_i915_gem_object *obj,
@@ -488,9 +491,10 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj,
unsigned int n, m, needs_flush;
int err;
+ i915_gem_object_lock(obj, NULL);
err = i915_gem_object_prepare_read(obj, &needs_flush);
if (err)
- return err;
+ goto out_unlock;
for (n = 0; n < real_page_count(obj); n++) {
u32 *map;
@@ -527,6 +531,8 @@ out_unmap:
}
i915_gem_object_finish_access(obj);
+out_unlock:
+ i915_gem_object_unlock(obj);
return err;
}
@@ -887,24 +893,15 @@ out_file:
return err;
}
-static struct i915_vma *rpcs_query_batch(struct i915_vma *vma)
+static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, struct i915_vma *vma)
{
- struct drm_i915_gem_object *obj;
u32 *cmd;
- int err;
- if (INTEL_GEN(vma->vm->i915) < 8)
- return ERR_PTR(-EINVAL);
+ GEM_BUG_ON(INTEL_GEN(vma->vm->i915) < 8);
- obj = i915_gem_object_create_internal(vma->vm->i915, PAGE_SIZE);
- if (IS_ERR(obj))
- return ERR_CAST(obj);
-
- cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
- if (IS_ERR(cmd)) {
- err = PTR_ERR(cmd);
- goto err;
- }
+ cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB);
+ if (IS_ERR(cmd))
+ return PTR_ERR(cmd);
*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
*cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
@@ -912,26 +909,12 @@ static struct i915_vma *rpcs_query_batch(struct i915_vma *vma)
*cmd++ = upper_32_bits(vma->node.start);
*cmd = MI_BATCH_BUFFER_END;
- __i915_gem_object_flush_map(obj, 0, 64);
- i915_gem_object_unpin_map(obj);
+ __i915_gem_object_flush_map(rpcs, 0, 64);
+ i915_gem_object_unpin_map(rpcs);
intel_gt_chipset_flush(vma->vm->gt);
- vma = i915_vma_instance(obj, vma->vm, NULL);
- if (IS_ERR(vma)) {
- err = PTR_ERR(vma);
- goto err;
- }
-
- err = i915_vma_pin(vma, 0, 0, PIN_USER);
- if (err)
- goto err;
-
- return vma;
-
-err:
- i915_gem_object_put(obj);
- return ERR_PTR(err);
+ return 0;
}
static int
@@ -939,52 +922,68 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
struct intel_context *ce,
struct i915_request **rq_out)
{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct i915_request *rq;
+ struct i915_gem_ww_ctx ww;
struct i915_vma *batch;
struct i915_vma *vma;
+ struct drm_i915_gem_object *rpcs;
int err;
GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
+ if (INTEL_GEN(i915) < 8)
+ return -EINVAL;
+
vma = i915_vma_instance(obj, ce->vm, NULL);
if (IS_ERR(vma))
return PTR_ERR(vma);
- i915_gem_object_lock(obj);
- err = i915_gem_object_set_to_gtt_domain(obj, false);
- i915_gem_object_unlock(obj);
- if (err)
- return err;
-
- err = i915_vma_pin(vma, 0, 0, PIN_USER);
- if (err)
- return err;
+ rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE);
+ if (IS_ERR(rpcs))
+ return PTR_ERR(rpcs);
- batch = rpcs_query_batch(vma);
+ batch = i915_vma_instance(rpcs, ce->vm, NULL);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
- goto err_vma;
+ goto err_put;
}
+ i915_gem_ww_ctx_init(&ww, false);
+retry:
+ err = i915_gem_object_lock(obj, &ww);
+ if (!err)
+ err = i915_gem_object_lock(rpcs, &ww);
+ if (!err)
+ err = i915_gem_object_set_to_gtt_domain(obj, false);
+ if (!err)
+ err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
+ if (err)
+ goto err_put;
+
+ err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER);
+ if (err)
+ goto err_vma;
+
+ err = rpcs_query_batch(rpcs, vma);
+ if (err)
+ goto err_batch;
+
rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_batch;
}
- i915_vma_lock(batch);
err = i915_request_await_object(rq, batch->obj, false);
if (err == 0)
err = i915_vma_move_to_active(batch, rq, 0);
- i915_vma_unlock(batch);
if (err)
goto skip_request;
- i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, true);
if (err == 0)
err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
- i915_vma_unlock(vma);
if (err)
goto skip_request;
@@ -1000,23 +999,24 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
if (err)
goto skip_request;
- i915_vma_unpin_and_release(&batch, 0);
- i915_vma_unpin(vma);
-
*rq_out = i915_request_get(rq);
- i915_request_add(rq);
-
- return 0;
-
skip_request:
- i915_request_set_error_once(rq, err);
+ if (err)
+ i915_request_set_error_once(rq, err);
i915_request_add(rq);
err_batch:
- i915_vma_unpin_and_release(&batch, 0);
+ i915_vma_unpin(batch);
err_vma:
i915_vma_unpin(vma);
-
+err_put:
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
+ i915_gem_object_put(rpcs);
return err;
}
@@ -1709,7 +1709,7 @@ static int read_from_scratch(struct i915_gem_context *ctx,
i915_request_add(rq);
- i915_gem_object_lock(obj);
+ i915_gem_object_lock(obj, NULL);
err = i915_gem_object_set_to_cpu_domain(obj, false);
i915_gem_object_unlock(obj);
if (err)
@@ -1748,7 +1748,7 @@ static int check_scratch_page(struct i915_gem_context *ctx, u32 *out)
if (!vm)
return -ENODEV;
- page = vm->scratch[0].base.page;
+ page = __px_page(vm->scratch[0]);
if (!page) {
pr_err("No scratch page!\n");
return -EINVAL;
@@ -1914,8 +1914,8 @@ static int mock_context_barrier(void *arg)
return -ENOMEM;
counter = 0;
- err = context_barrier_task(ctx, 0,
- NULL, NULL, mock_barrier_task, &counter);
+ err = context_barrier_task(ctx, 0, NULL, NULL, NULL,
+ mock_barrier_task, &counter);
if (err) {
pr_err("Failed at line %d, err=%d\n", __LINE__, err);
goto out;
@@ -1927,11 +1927,8 @@ static int mock_context_barrier(void *arg)
}
counter = 0;
- err = context_barrier_task(ctx, ALL_ENGINES,
- skip_unused_engines,
- NULL,
- mock_barrier_task,
- &counter);
+ err = context_barrier_task(ctx, ALL_ENGINES, skip_unused_engines,
+ NULL, NULL, mock_barrier_task, &counter);
if (err) {
pr_err("Failed at line %d, err=%d\n", __LINE__, err);
goto out;
@@ -1951,8 +1948,8 @@ static int mock_context_barrier(void *arg)
counter = 0;
context_barrier_inject_fault = BIT(RCS0);
- err = context_barrier_task(ctx, ALL_ENGINES,
- NULL, NULL, mock_barrier_task, &counter);
+ err = context_barrier_task(ctx, ALL_ENGINES, NULL, NULL, NULL,
+ mock_barrier_task, &counter);
context_barrier_inject_fault = 0;
if (err == -ENXIO)
err = 0;
@@ -1966,11 +1963,8 @@ static int mock_context_barrier(void *arg)
goto out;
counter = 0;
- err = context_barrier_task(ctx, ALL_ENGINES,
- skip_unused_engines,
- NULL,
- mock_barrier_task,
- &counter);
+ err = context_barrier_task(ctx, ALL_ENGINES, skip_unused_engines,
+ NULL, NULL, mock_barrier_task, &counter);
if (err) {
pr_err("Failed at line %d, err=%d\n", __LINE__, err);
goto out;
@@ -2003,7 +1997,7 @@ int i915_gem_context_mock_selftests(void)
err = i915_subtests(tests, i915);
- drm_dev_put(&i915->drm);
+ mock_destroy_device(i915);
return err;
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
index 2a52b92586b9..0845ce1ae37c 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
@@ -272,7 +272,7 @@ int i915_gem_dmabuf_mock_selftests(void)
err = i915_subtests(tests, i915);
- drm_dev_put(&i915->drm);
+ mock_destroy_device(i915);
return err;
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
index a49016f8ee0d..e1d50a5a1477 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
@@ -32,46 +32,39 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
if (IS_ERR(vma))
return PTR_ERR(vma);
- err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
+ err = i915_gem_object_lock(obj, &eb->ww);
+ if (err)
+ return err;
+
+ err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, PIN_USER | PIN_HIGH);
if (err)
return err;
/* 8-Byte aligned */
- if (!__reloc_entry_gpu(eb, vma,
- offsets[0] * sizeof(u32),
- 0)) {
- err = -EIO;
- goto unpin_vma;
- }
+ err = __reloc_entry_gpu(eb, vma, offsets[0] * sizeof(u32), 0);
+ if (err <= 0)
+ goto reloc_err;
/* !8-Byte aligned */
- if (!__reloc_entry_gpu(eb, vma,
- offsets[1] * sizeof(u32),
- 1)) {
- err = -EIO;
- goto unpin_vma;
- }
+ err = __reloc_entry_gpu(eb, vma, offsets[1] * sizeof(u32), 1);
+ if (err <= 0)
+ goto reloc_err;
/* Skip to the end of the cmd page */
- i = PAGE_SIZE / sizeof(u32) - RELOC_TAIL - 1;
+ i = PAGE_SIZE / sizeof(u32) - 1;
i -= eb->reloc_cache.rq_size;
memset32(eb->reloc_cache.rq_cmd + eb->reloc_cache.rq_size,
MI_NOOP, i);
eb->reloc_cache.rq_size += i;
- /* Force batch chaining */
- if (!__reloc_entry_gpu(eb, vma,
- offsets[2] * sizeof(u32),
- 2)) {
- err = -EIO;
- goto unpin_vma;
- }
+ /* Force next batch */
+ err = __reloc_entry_gpu(eb, vma, offsets[2] * sizeof(u32), 2);
+ if (err <= 0)
+ goto reloc_err;
GEM_BUG_ON(!eb->reloc_cache.rq);
rq = i915_request_get(eb->reloc_cache.rq);
- err = reloc_gpu_flush(&eb->reloc_cache);
- if (err)
- goto put_rq;
+ reloc_gpu_flush(eb, &eb->reloc_cache);
GEM_BUG_ON(eb->reloc_cache.rq);
err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, HZ / 2);
@@ -103,6 +96,11 @@ put_rq:
unpin_vma:
i915_vma_unpin(vma);
return err;
+
+reloc_err:
+ if (!err)
+ err = -EIO;
+ goto unpin_vma;
}
static int igt_gpu_reloc(void *arg)
@@ -124,6 +122,8 @@ static int igt_gpu_reloc(void *arg)
goto err_scratch;
}
+ intel_gt_pm_get(&eb.i915->gt);
+
for_each_uabi_engine(eb.engine, eb.i915) {
reloc_cache_init(&eb.reloc_cache, eb.i915);
memset(map, POISON_INUSE, 4096);
@@ -134,15 +134,29 @@ static int igt_gpu_reloc(void *arg)
err = PTR_ERR(eb.context);
goto err_pm;
}
+ eb.reloc_pool = NULL;
+ eb.reloc_context = NULL;
- err = intel_context_pin(eb.context);
- if (err)
- goto err_put;
+ i915_gem_ww_ctx_init(&eb.ww, false);
+retry:
+ err = intel_context_pin_ww(eb.context, &eb.ww);
+ if (!err) {
+ err = __igt_gpu_reloc(&eb, scratch);
+
+ intel_context_unpin(eb.context);
+ }
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&eb.ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&eb.ww);
- err = __igt_gpu_reloc(&eb, scratch);
+ if (eb.reloc_pool)
+ intel_gt_buffer_pool_put(eb.reloc_pool);
+ if (eb.reloc_context)
+ intel_context_put(eb.reloc_context);
- intel_context_unpin(eb.context);
-err_put:
intel_context_put(eb.context);
err_pm:
intel_engine_pm_put(eb.engine);
@@ -153,6 +167,7 @@ err_pm:
if (igt_flush_test(eb.i915))
err = -EIO;
+ intel_gt_pm_put(&eb.i915->gt);
err_scratch:
i915_gem_object_put(scratch);
return err;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 9c7402ce5bf9..d27d87a678c8 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -103,7 +103,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
- i915_gem_object_lock(obj);
+ i915_gem_object_lock(obj, NULL);
err = i915_gem_object_set_to_gtt_domain(obj, true);
i915_gem_object_unlock(obj);
if (err) {
@@ -188,7 +188,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj,
GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
- i915_gem_object_lock(obj);
+ i915_gem_object_lock(obj, NULL);
err = i915_gem_object_set_to_gtt_domain(obj, true);
i915_gem_object_unlock(obj);
if (err) {
@@ -528,31 +528,42 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
for_each_uabi_engine(engine, i915) {
struct i915_request *rq;
struct i915_vma *vma;
+ struct i915_gem_ww_ctx ww;
int err;
vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
if (IS_ERR(vma))
return PTR_ERR(vma);
- err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ i915_gem_ww_ctx_init(&ww, false);
+retry:
+ err = i915_gem_object_lock(obj, &ww);
+ if (!err)
+ err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
if (err)
- return err;
+ goto err;
rq = intel_engine_create_kernel_request(engine);
if (IS_ERR(rq)) {
- i915_vma_unpin(vma);
- return PTR_ERR(rq);
+ err = PTR_ERR(rq);
+ goto err_unpin;
}
- i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, true);
if (err == 0)
err = i915_vma_move_to_active(vma, rq,
EXEC_OBJECT_WRITE);
- i915_vma_unlock(vma);
i915_request_add(rq);
+err_unpin:
i915_vma_unpin(vma);
+err:
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
if (err)
return err;
}
@@ -1123,6 +1134,7 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
for_each_uabi_engine(engine, i915) {
struct i915_request *rq;
struct i915_vma *vma;
+ struct i915_gem_ww_ctx ww;
vma = i915_vma_instance(obj, engine->kernel_context->vm, NULL);
if (IS_ERR(vma)) {
@@ -1130,9 +1142,13 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
goto out_unmap;
}
- err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ i915_gem_ww_ctx_init(&ww, false);
+retry:
+ err = i915_gem_object_lock(obj, &ww);
+ if (!err)
+ err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
if (err)
- goto out_unmap;
+ goto out_ww;
rq = i915_request_create(engine->kernel_context);
if (IS_ERR(rq)) {
@@ -1140,11 +1156,9 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
goto out_unpin;
}
- i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, false);
if (err == 0)
err = i915_vma_move_to_active(vma, rq, 0);
- i915_vma_unlock(vma);
err = engine->emit_bb_start(rq, vma->node.start, 0, 0);
i915_request_get(rq);
@@ -1166,6 +1180,13 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
out_unpin:
i915_vma_unpin(vma);
+out_ww:
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
if (err)
goto out_unmap;
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
index faa5b6d91795..bf853c40ec65 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
@@ -85,7 +85,7 @@ int i915_gem_object_mock_selftests(void)
err = i915_subtests(tests, i915);
- drm_dev_put(&i915->drm);
+ mock_destroy_device(i915);
return err;
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
index 34932871b3a5..8cee68c6a6dc 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
@@ -44,7 +44,7 @@ static int mock_phys_object(void *arg)
}
/* Make the object dirty so that put_pages must do copy back the data */
- i915_gem_object_lock(obj);
+ i915_gem_object_lock(obj, NULL);
err = i915_gem_object_set_to_gtt_domain(obj, true);
i915_gem_object_unlock(obj);
if (err) {
@@ -73,6 +73,6 @@ int i915_gem_phys_mock_selftests(void)
err = i915_subtests(tests, i915);
- drm_dev_put(&i915->drm);
+ mock_destroy_device(i915);
return err;
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
index debaf7b18ab5..be30b27e2926 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
@@ -28,10 +28,9 @@ static struct sg_table *mock_map_dma_buf(struct dma_buf_attachment *attachment,
sg = sg_next(sg);
}
- if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) {
- err = -ENOMEM;
+ err = dma_map_sgtable(attachment->dev, st, dir, 0);
+ if (err)
goto err_st;
- }
return st;
@@ -46,7 +45,7 @@ static void mock_unmap_dma_buf(struct dma_buf_attachment *attachment,
struct sg_table *st,
enum dma_data_direction dir)
{
- dma_unmap_sg(attachment->dev, st->sgl, st->nents, dir);
+ dma_unmap_sgtable(attachment->dev, st, dir, 0);
sg_free_table(st);
kfree(st);
}