From 35c6367f516090a3086d37e7023b08608d555aba Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 11 Jun 2021 08:08:38 +0200 Subject: drm/i915/selftests: Reorder tasklet_disable vs local_bh_disable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Due to a change in requirements that disallows tasklet_disable() being called from atomic context, rearrange the selftest to avoid doing so. <3> [324.942939] BUG: sleeping function called from invalid context at kernel/softirq.c:888 <3> [324.942952] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 5601, name: i915_selftest <4> [324.942960] 1 lock held by i915_selftest/5601: <4> [324.942963] #0: ffff888101d19240 (&dev->mutex){....}-{3:3}, at: device_driver_attach+0x18/0x50 <3> [324.942987] Preemption disabled at: <3> [324.942990] [] live_hold_reset.part.65+0xc2/0x2f0 [i915] <4> [324.943255] CPU: 0 PID: 5601 Comm: i915_selftest Tainted: G U 5.13.0-rc5-CI-CI_DRM_10197+ #1 <4> [324.943259] Hardware name: Intel Corp. Geminilake/GLK RVP2 LP4SD (07), BIOS GELKRVPA.X64.0062.B30.1708222146 08/22/2017 <4> [324.943263] Call Trace: <4> [324.943267] dump_stack+0x7f/0xad <4> [324.943276] ___might_sleep.cold.123+0xf2/0x106 <4> [324.943286] tasklet_unlock_wait+0x2e/0xb0 <4> [324.943291] ? ktime_get_raw+0x81/0x120 <4> [324.943305] live_hold_reset.part.65+0x1ab/0x2f0 [i915] <4> [324.943500] __i915_subtests.cold.7+0x42/0x92 [i915] <4> [324.943723] ? __i915_live_teardown+0x50/0x50 [i915] <4> [324.943922] ? __intel_gt_live_setup+0x30/0x30 [i915] Fixes: da044747401fc ("tasklets: Replace spin wait in tasklet_unlock_wait()") Signed-off-by: Chris Wilson Reviewed-by: Thomas Hellström Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210611060838.647973-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gt/selftest_execlists.c | 55 ++++++++++++++++------------ 1 file changed, 32 insertions(+), 23 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index ea2203af0764..1c8108d30b85 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -551,6 +551,32 @@ static int live_pin_rewind(void *arg) return err; } +static int engine_lock_reset_tasklet(struct intel_engine_cs *engine) +{ + tasklet_disable(&engine->execlists.tasklet); + local_bh_disable(); + + if (test_and_set_bit(I915_RESET_ENGINE + engine->id, + &engine->gt->reset.flags)) { + local_bh_enable(); + tasklet_enable(&engine->execlists.tasklet); + + intel_gt_set_wedged(engine->gt); + return -EBUSY; + } + + return 0; +} + +static void engine_unlock_reset_tasklet(struct intel_engine_cs *engine) +{ + clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, + &engine->gt->reset.flags); + + local_bh_enable(); + tasklet_enable(&engine->execlists.tasklet); +} + static int live_hold_reset(void *arg) { struct intel_gt *gt = arg; @@ -598,15 +624,9 @@ static int live_hold_reset(void *arg) /* We have our request executing, now remove it and reset */ - local_bh_disable(); - if (test_and_set_bit(I915_RESET_ENGINE + id, - >->reset.flags)) { - local_bh_enable(); - intel_gt_set_wedged(gt); - err = -EBUSY; + err = engine_lock_reset_tasklet(engine); + if (err) goto out; - } - tasklet_disable(&engine->execlists.tasklet); engine->execlists.tasklet.callback(&engine->execlists.tasklet); GEM_BUG_ON(execlists_active(&engine->execlists) != rq); @@ -618,10 +638,7 @@ static int live_hold_reset(void *arg) __intel_engine_reset_bh(engine, NULL); GEM_BUG_ON(rq->fence.error != -EIO); - tasklet_enable(&engine->execlists.tasklet); - clear_and_wake_up_bit(I915_RESET_ENGINE + id, - >->reset.flags); - local_bh_enable(); + engine_unlock_reset_tasklet(engine); /* Check that we do not resubmit the held request */ if (!i915_request_wait(rq, 0, HZ / 5)) { @@ -4585,15 +4602,9 @@ static int reset_virtual_engine(struct intel_gt *gt, GEM_BUG_ON(engine == ve->engine); /* Take ownership of the reset and tasklet */ - local_bh_disable(); - if (test_and_set_bit(I915_RESET_ENGINE + engine->id, - >->reset.flags)) { - local_bh_enable(); - intel_gt_set_wedged(gt); - err = -EBUSY; + err = engine_lock_reset_tasklet(engine); + if (err) goto out_heartbeat; - } - tasklet_disable(&engine->execlists.tasklet); engine->execlists.tasklet.callback(&engine->execlists.tasklet); GEM_BUG_ON(execlists_active(&engine->execlists) != rq); @@ -4612,9 +4623,7 @@ static int reset_virtual_engine(struct intel_gt *gt, GEM_BUG_ON(rq->fence.error != -EIO); /* Release our grasp on the engine, letting CS flow again */ - tasklet_enable(&engine->execlists.tasklet); - clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags); - local_bh_enable(); + engine_unlock_reset_tasklet(engine); /* Check that we do not resubmit the held request */ i915_request_get(rq); -- cgit v1.2.3 From b4b9731b02c3ce859d85dec17ec63f4bf3140bf4 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Thu, 10 Jun 2021 16:35:25 +0200 Subject: drm/i915: Simplify userptr locking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use an rwlock instead of spinlock for the global notifier lock to reduce risk of contention in execbuf. Protect object state with the object lock whenever possible rather than with the global notifier lock Don't take an explicit page_ref in userptr_submit_init() but rather call get_pages() after obtaining the page list so that get_pages() holds the page_ref. This means we don't need to call userptr_submit_fini(), which is needed to avoid awkward locking in our upcoming VM_BIND code. Signed-off-by: Thomas Hellström Reviewed-by: Maarten Lankhorst Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210610143525.624677-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 21 +++----- drivers/gpu/drm/i915/gem/i915_gem_object.h | 2 - drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 72 ++++++++------------------ drivers/gpu/drm/i915/i915_drv.h | 2 +- 4 files changed, 31 insertions(+), 66 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index a8abc9af5ff4..201fed19d120 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -994,7 +994,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) } } -static void eb_release_vmas(struct i915_execbuffer *eb, bool final, bool release_userptr) +static void eb_release_vmas(struct i915_execbuffer *eb, bool final) { const unsigned int count = eb->buffer_count; unsigned int i; @@ -1008,11 +1008,6 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final, bool release eb_unreserve_vma(ev); - if (release_userptr && ev->flags & __EXEC_OBJECT_USERPTR_INIT) { - ev->flags &= ~__EXEC_OBJECT_USERPTR_INIT; - i915_gem_object_userptr_submit_fini(vma->obj); - } - if (final) i915_vma_put(vma); } @@ -1990,7 +1985,7 @@ repeat: } /* We may process another execbuffer during the unlock... */ - eb_release_vmas(eb, false, true); + eb_release_vmas(eb, false); i915_gem_ww_ctx_fini(&eb->ww); if (rq) { @@ -2094,7 +2089,7 @@ repeat_validate: err: if (err == -EDEADLK) { - eb_release_vmas(eb, false, false); + eb_release_vmas(eb, false); err = i915_gem_ww_ctx_backoff(&eb->ww); if (!err) goto repeat_validate; @@ -2191,7 +2186,7 @@ retry: err: if (err == -EDEADLK) { - eb_release_vmas(eb, false, false); + eb_release_vmas(eb, false); err = i915_gem_ww_ctx_backoff(&eb->ww); if (!err) goto retry; @@ -2268,7 +2263,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) #ifdef CONFIG_MMU_NOTIFIER if (!err && (eb->args->flags & __EXEC_USERPTR_USED)) { - spin_lock(&eb->i915->mm.notifier_lock); + read_lock(&eb->i915->mm.notifier_lock); /* * count is always at least 1, otherwise __EXEC_USERPTR_USED @@ -2286,7 +2281,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) break; } - spin_unlock(&eb->i915->mm.notifier_lock); + read_unlock(&eb->i915->mm.notifier_lock); } #endif @@ -3435,7 +3430,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, err = eb_lookup_vmas(&eb); if (err) { - eb_release_vmas(&eb, true, true); + eb_release_vmas(&eb, true); goto err_engine; } @@ -3528,7 +3523,7 @@ err_request: i915_request_put(eb.request); err_vma: - eb_release_vmas(&eb, true, true); + eb_release_vmas(&eb, true); if (eb.trampoline) i915_vma_unpin(eb.trampoline); WARN_ON(err == -EDEADLK); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index e9eecebf5c9d..d66aa00d023a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -609,14 +609,12 @@ i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj); int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj); -void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj); int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj); #else static inline bool i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) { return false; } static inline int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; } static inline int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; } -static inline void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); } static inline int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; } #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 7487bab11f0b..4b0acc7eaa27 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -67,11 +67,11 @@ static bool i915_gem_userptr_invalidate(struct mmu_interval_notifier *mni, if (!mmu_notifier_range_blockable(range)) return false; - spin_lock(&i915->mm.notifier_lock); + write_lock(&i915->mm.notifier_lock); mmu_interval_set_seq(mni, cur_seq); - spin_unlock(&i915->mm.notifier_lock); + write_unlock(&i915->mm.notifier_lock); /* * We don't wait when the process is exiting. This is valid @@ -107,16 +107,15 @@ i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj) static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); struct page **pvec = NULL; - spin_lock(&i915->mm.notifier_lock); + assert_object_held_shared(obj); + if (!--obj->userptr.page_ref) { pvec = obj->userptr.pvec; obj->userptr.pvec = NULL; } GEM_BUG_ON(obj->userptr.page_ref < 0); - spin_unlock(&i915->mm.notifier_lock); if (pvec) { const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; @@ -128,7 +127,6 @@ static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj) static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; unsigned int max_segment = i915_sg_segment_size(); struct sg_table *st; @@ -141,16 +139,13 @@ static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) if (!st) return -ENOMEM; - spin_lock(&i915->mm.notifier_lock); - if (GEM_WARN_ON(!obj->userptr.page_ref)) { - spin_unlock(&i915->mm.notifier_lock); - ret = -EFAULT; + if (!obj->userptr.page_ref) { + ret = -EAGAIN; goto err_free; } obj->userptr.page_ref++; pvec = obj->userptr.pvec; - spin_unlock(&i915->mm.notifier_lock); alloc_table: sg = __sg_alloc_table_from_pages(st, pvec, num_pages, 0, @@ -241,7 +236,7 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, i915_gem_object_userptr_drop_ref(obj); } -static int i915_gem_object_userptr_unbind(struct drm_i915_gem_object *obj, bool get_pages) +static int i915_gem_object_userptr_unbind(struct drm_i915_gem_object *obj) { struct sg_table *pages; int err; @@ -259,15 +254,11 @@ static int i915_gem_object_userptr_unbind(struct drm_i915_gem_object *obj, bool if (!IS_ERR_OR_NULL(pages)) i915_gem_userptr_put_pages(obj, pages); - if (get_pages) - err = ____i915_gem_object_get_pages(obj); - return err; } int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; struct page **pvec; unsigned int gup_flags = 0; @@ -277,39 +268,22 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) if (obj->userptr.notifier.mm != current->mm) return -EFAULT; + notifier_seq = mmu_interval_read_begin(&obj->userptr.notifier); + ret = i915_gem_object_lock_interruptible(obj, NULL); if (ret) return ret; - /* optimistically try to preserve current pages while unlocked */ - if (i915_gem_object_has_pages(obj) && - !mmu_interval_check_retry(&obj->userptr.notifier, - obj->userptr.notifier_seq)) { - spin_lock(&i915->mm.notifier_lock); - if (obj->userptr.pvec && - !mmu_interval_read_retry(&obj->userptr.notifier, - obj->userptr.notifier_seq)) { - obj->userptr.page_ref++; - - /* We can keep using the current binding, this is the fastpath */ - ret = 1; - } - spin_unlock(&i915->mm.notifier_lock); + if (notifier_seq == obj->userptr.notifier_seq && obj->userptr.pvec) { + i915_gem_object_unlock(obj); + return 0; } - if (!ret) { - /* Make sure userptr is unbound for next attempt, so we don't use stale pages. */ - ret = i915_gem_object_userptr_unbind(obj, false); - } + ret = i915_gem_object_userptr_unbind(obj); i915_gem_object_unlock(obj); - if (ret < 0) + if (ret) return ret; - if (ret > 0) - return 0; - - notifier_seq = mmu_interval_read_begin(&obj->userptr.notifier); - pvec = kvmalloc_array(num_pages, sizeof(struct page *), GFP_KERNEL); if (!pvec) return -ENOMEM; @@ -329,7 +303,9 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) } ret = 0; - spin_lock(&i915->mm.notifier_lock); + ret = i915_gem_object_lock_interruptible(obj, NULL); + if (ret) + goto out; if (mmu_interval_read_retry(&obj->userptr.notifier, !obj->userptr.page_ref ? notifier_seq : @@ -341,12 +317,14 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) if (!obj->userptr.page_ref++) { obj->userptr.pvec = pvec; obj->userptr.notifier_seq = notifier_seq; - pvec = NULL; + ret = ____i915_gem_object_get_pages(obj); } + obj->userptr.page_ref--; + out_unlock: - spin_unlock(&i915->mm.notifier_lock); + i915_gem_object_unlock(obj); out: if (pvec) { @@ -369,11 +347,6 @@ int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj) return 0; } -void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj) -{ - i915_gem_object_userptr_drop_ref(obj); -} - int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj) { int err; @@ -396,7 +369,6 @@ int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj) i915_gem_object_unlock(obj); } - i915_gem_object_userptr_submit_fini(obj); return err; } @@ -572,7 +544,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, int i915_gem_init_userptr(struct drm_i915_private *dev_priv) { #ifdef CONFIG_MMU_NOTIFIER - spin_lock_init(&dev_priv->mm.notifier_lock); + rwlock_init(&dev_priv->mm.notifier_lock); #endif return 0; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 38ff2fb89744..01e11fe38642 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -552,7 +552,7 @@ struct i915_gem_mm { * notifier_lock for mmu notifiers, memory may not be allocated * while holding this lock. */ - spinlock_t notifier_lock; + rwlock_t notifier_lock; #endif /* shrinker accounting, also useful for userland debugging */ -- cgit v1.2.3 From 8c209f42cb3a209c366bae2956c98d8ed0514773 Mon Sep 17 00:00:00 2001 From: Clint Taylor Date: Tue, 8 Jun 2021 10:47:21 -0700 Subject: drm/i915/adl_p: Add initial ADL_P Workarounds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most of the context WA are already implemented. Adding adl_p platform tag to reflect so. v2: adjust comments for clarity (MattR) BSpec: 54369 Cc: Matt Roper Cc: Aditya Swarup Reviewed-by: Matt Roper Signed-off-by: Radhakrishna Sripada Signed-off-by: Anusha Srivatsa Signed-off-by: Madhumitha Tolakanahalli Pradeep Signed-off-by: José Roberto de Souza Signed-off-by: Swathi Dhanavanthri Signed-off-by: Clint Taylor Link: https://patchwork.freedesktop.org/patch/msgid/20210608174721.17593-1-clinton.a.taylor@intel.com --- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 4 +-- drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 2 +- drivers/gpu/drm/i915/gt/intel_workarounds.c | 39 ++++++++++++++------------- drivers/gpu/drm/i915/intel_pm.c | 8 +++--- 4 files changed, 29 insertions(+), 24 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 71ac57670043..882bfd499e55 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -2667,7 +2667,7 @@ static bool cnl_ddi_hdmi_pll_dividers(struct intel_crtc_state *crtc_state) } /* - * Display WA #22010492432: ehl, tgl + * Display WA #22010492432: ehl, tgl, adl-p * Program half of the nominal DCO divider fraction value. */ static bool @@ -2675,7 +2675,7 @@ ehl_combo_pll_div_frac_wa_needed(struct drm_i915_private *i915) { return ((IS_PLATFORM(i915, INTEL_ELKHARTLAKE) && IS_JSL_EHL_REVID(i915, EHL_REVID_B0, REVID_FOREVER)) || - IS_TIGERLAKE(i915)) && + IS_TIGERLAKE(i915) || IS_ALDERLAKE_P(i915)) && i915->dpll.ref_clks.nssc == 38400; } diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 94e0a5669f90..87b06572fd2e 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -208,7 +208,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) flags |= PIPE_CONTROL_FLUSH_L3; flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; - /* Wa_1409600907:tgl */ + /* Wa_1409600907:tgl,adl-p */ flags |= PIPE_CONTROL_DEPTH_STALL; flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index b62d1e31a645..977a76e648e0 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -640,15 +640,16 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, gen12_ctx_gt_tuning_init(engine, wal); /* - * Wa_1409142259:tgl - * Wa_1409347922:tgl - * Wa_1409252684:tgl - * Wa_1409217633:tgl - * Wa_1409207793:tgl - * Wa_1409178076:tgl - * Wa_1408979724:tgl - * Wa_14010443199:rkl - * Wa_14010698770:rkl + * Wa_1409142259:tgl,dg1,adl-p + * Wa_1409347922:tgl,dg1,adl-p + * Wa_1409252684:tgl,dg1,adl-p + * Wa_1409217633:tgl,dg1,adl-p + * Wa_1409207793:tgl,dg1,adl-p + * Wa_1409178076:tgl,dg1,adl-p + * Wa_1408979724:tgl,dg1,adl-p + * Wa_14010443199:tgl,rkl,dg1,adl-p + * Wa_14010698770:tgl,rkl,dg1,adl-s,adl-p + * Wa_1409342910:tgl,rkl,dg1,adl-s,adl-p */ wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3, GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); @@ -1113,7 +1114,7 @@ gen12_gt_workarounds_init(struct drm_i915_private *i915, { wa_init_mcr(i915, wal); - /* Wa_14011060649:tgl,rkl,dg1,adls */ + /* Wa_14011060649:tgl,rkl,dg1,adls,adl-p */ wa_14011060649(i915, wal); } @@ -1633,38 +1634,40 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN7_DISABLE_SAMPLER_PREFETCH); } - if (IS_ALDERLAKE_S(i915) || IS_DG1(i915) || + if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { - /* Wa_1606931601:tgl,rkl,dg1,adl-s */ + /* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */ wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ); /* * Wa_1407928979:tgl A* * Wa_18011464164:tgl[B0+],dg1[B0+] * Wa_22010931296:tgl[B0+],dg1[B0+] - * Wa_14010919138:rkl,dg1,adl-s + * Wa_14010919138:rkl,dg1,adl-s,adl-p */ wa_write_or(wal, GEN7_FF_THREAD_MODE, GEN12_FF_TESSELATION_DOP_GATE_DISABLE); /* - * Wa_1606700617:tgl,dg1 - * Wa_22010271021:tgl,rkl,dg1, adl-s + * Wa_1606700617:tgl,dg1,adl-p + * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p + * Wa_14010826681:tgl,dg1,rkl,adl-p */ wa_masked_en(wal, GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE); } - if (IS_ALDERLAKE_S(i915) || IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) || + if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || + IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { - /* Wa_1409804808:tgl,rkl,dg1[a0],adl-s */ + /* Wa_1409804808:tgl,rkl,dg1[a0],adl-s,adl-p */ wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_PUSH_CONST_DEREF_HOLD_DIS); /* * Wa_1409085225:tgl - * Wa_14010229206:tgl,rkl,dg1[a0],adl-s + * Wa_14010229206:tgl,rkl,dg1[a0],adl-s,adl-p */ wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH); } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 45fefa0ed160..ef8d9b2284b3 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7352,15 +7352,17 @@ static void icl_init_clock_gating(struct drm_i915_private *dev_priv) static void gen12lp_init_clock_gating(struct drm_i915_private *dev_priv) { /* Wa_1409120013:tgl,rkl,adl_s,dg1 */ - intel_uncore_write(&dev_priv->uncore, ILK_DPFC_CHICKEN, - ILK_DPFC_CHICKEN_COMP_DUMMY_PIXEL); + if (IS_TIGERLAKE(dev_priv) || IS_ROCKETLAKE(dev_priv) || + IS_ALDERLAKE_S(dev_priv) || IS_DG1(dev_priv)) + intel_uncore_write(&dev_priv->uncore, ILK_DPFC_CHICKEN, + ILK_DPFC_CHICKEN_COMP_DUMMY_PIXEL); /* Wa_1409825376:tgl (pre-prod)*/ if (IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B1)) intel_uncore_write(&dev_priv->uncore, GEN9_CLKGATE_DIS_3, intel_uncore_read(&dev_priv->uncore, GEN9_CLKGATE_DIS_3) | TGL_VRH_GATING_DIS); - /* Wa_14011059788:tgl,rkl,adl_s,dg1 */ + /* Wa_14011059788:tgl,rkl,adl_s,dg1,adl-p */ intel_uncore_rmw(&dev_priv->uncore, GEN10_DFR_RATIO_EN_AND_CHICKEN, 0, DFR_DISABLE); -- cgit v1.2.3 From c865204e84a1a5c35e055b45971524efe4616e31 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Tue, 15 Jun 2021 14:24:08 +0200 Subject: drm/i915/ttm: Fix memory leaks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix two memory leaks introduced with the ttm backend. Fixes: 213d50927763 ("drm/i915/ttm: Introduce a TTM i915 gem object backend") Signed-off-by: Thomas Hellström Reviewed-by: Maarten Lankhorst Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210615122408.32347-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index bf33724bed5c..cdf36d3cf02a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -119,6 +119,7 @@ static void i915_ttm_tt_destroy(struct ttm_device *bdev, struct ttm_tt *ttm) struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); ttm_tt_destroy_common(bdev, ttm); + ttm_tt_fini(ttm); kfree(i915_tt); } @@ -214,6 +215,7 @@ static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo) if (likely(obj)) { /* This releases all gem object bindings to the backend. */ + i915_ttm_free_cached_io_st(obj); __i915_gem_free_object(obj); } } -- cgit v1.2.3 From 88be9a0a06b73ecd85a688a7c174c941e9692e92 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 16 Jun 2021 16:24:55 +0100 Subject: drm/i915/ttm: add ttm_buddy_man MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add back our standalone i915_buddy allocator and integrate it into a ttm_resource_manager. This will plug into our ttm backend for managing device local-memory in the next couple of patches. v2(Thomas): - Return -ENOSPC from the buddy; ttm expects this in order to trigger eviction - Drop the unnecessary inline - bo->page_alignment is in page units Signed-off-by: Matthew Auld Cc: Thomas Hellström Acked-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20210616152501.394518-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/Makefile | 2 + drivers/gpu/drm/i915/i915_buddy.c | 412 ++++++++++++++ drivers/gpu/drm/i915/i915_buddy.h | 133 +++++ drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 248 ++++++++ drivers/gpu/drm/i915/i915_ttm_buddy_manager.h | 56 ++ drivers/gpu/drm/i915/selftests/i915_buddy.c | 789 ++++++++++++++++++++++++++ 6 files changed, 1640 insertions(+) create mode 100644 drivers/gpu/drm/i915/i915_buddy.c create mode 100644 drivers/gpu/drm/i915/i915_buddy.h create mode 100644 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c create mode 100644 drivers/gpu/drm/i915/i915_ttm_buddy_manager.h create mode 100644 drivers/gpu/drm/i915/selftests/i915_buddy.c (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index f57dfc74d6ce..6c84e96ab26a 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -162,6 +162,7 @@ gem-y += \ i915-y += \ $(gem-y) \ i915_active.o \ + i915_buddy.o \ i915_cmd_parser.o \ i915_gem_evict.o \ i915_gem_gtt.o \ @@ -171,6 +172,7 @@ i915-y += \ i915_request.o \ i915_scheduler.o \ i915_trace_points.o \ + i915_ttm_buddy_manager.o \ i915_vma.o \ intel_wopcm.o diff --git a/drivers/gpu/drm/i915/i915_buddy.c b/drivers/gpu/drm/i915/i915_buddy.c new file mode 100644 index 000000000000..29dd7d0310c1 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_buddy.c @@ -0,0 +1,412 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include + +#include "i915_buddy.h" + +#include "i915_gem.h" +#include "i915_utils.h" + +static struct i915_buddy_block *i915_block_alloc(struct i915_buddy_mm *mm, + struct i915_buddy_block *parent, + unsigned int order, + u64 offset) +{ + struct i915_buddy_block *block; + + GEM_BUG_ON(order > I915_BUDDY_MAX_ORDER); + + block = kmem_cache_zalloc(mm->slab_blocks, GFP_KERNEL); + if (!block) + return NULL; + + block->header = offset; + block->header |= order; + block->parent = parent; + + GEM_BUG_ON(block->header & I915_BUDDY_HEADER_UNUSED); + return block; +} + +static void i915_block_free(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + kmem_cache_free(mm->slab_blocks, block); +} + +static void mark_allocated(struct i915_buddy_block *block) +{ + block->header &= ~I915_BUDDY_HEADER_STATE; + block->header |= I915_BUDDY_ALLOCATED; + + list_del(&block->link); +} + +static void mark_free(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + block->header &= ~I915_BUDDY_HEADER_STATE; + block->header |= I915_BUDDY_FREE; + + list_add(&block->link, + &mm->free_list[i915_buddy_block_order(block)]); +} + +static void mark_split(struct i915_buddy_block *block) +{ + block->header &= ~I915_BUDDY_HEADER_STATE; + block->header |= I915_BUDDY_SPLIT; + + list_del(&block->link); +} + +int i915_buddy_init(struct i915_buddy_mm *mm, u64 size, u64 chunk_size) +{ + unsigned int i; + u64 offset; + + if (size < chunk_size) + return -EINVAL; + + if (chunk_size < PAGE_SIZE) + return -EINVAL; + + if (!is_power_of_2(chunk_size)) + return -EINVAL; + + size = round_down(size, chunk_size); + + mm->size = size; + mm->chunk_size = chunk_size; + mm->max_order = ilog2(size) - ilog2(chunk_size); + + GEM_BUG_ON(mm->max_order > I915_BUDDY_MAX_ORDER); + + mm->slab_blocks = KMEM_CACHE(i915_buddy_block, SLAB_HWCACHE_ALIGN); + if (!mm->slab_blocks) + return -ENOMEM; + + mm->free_list = kmalloc_array(mm->max_order + 1, + sizeof(struct list_head), + GFP_KERNEL); + if (!mm->free_list) + goto out_destroy_slab; + + for (i = 0; i <= mm->max_order; ++i) + INIT_LIST_HEAD(&mm->free_list[i]); + + mm->n_roots = hweight64(size); + + mm->roots = kmalloc_array(mm->n_roots, + sizeof(struct i915_buddy_block *), + GFP_KERNEL); + if (!mm->roots) + goto out_free_list; + + offset = 0; + i = 0; + + /* + * Split into power-of-two blocks, in case we are given a size that is + * not itself a power-of-two. + */ + do { + struct i915_buddy_block *root; + unsigned int order; + u64 root_size; + + root_size = rounddown_pow_of_two(size); + order = ilog2(root_size) - ilog2(chunk_size); + + root = i915_block_alloc(mm, NULL, order, offset); + if (!root) + goto out_free_roots; + + mark_free(mm, root); + + GEM_BUG_ON(i > mm->max_order); + GEM_BUG_ON(i915_buddy_block_size(mm, root) < chunk_size); + + mm->roots[i] = root; + + offset += root_size; + size -= root_size; + i++; + } while (size); + + return 0; + +out_free_roots: + while (i--) + i915_block_free(mm, mm->roots[i]); + kfree(mm->roots); +out_free_list: + kfree(mm->free_list); +out_destroy_slab: + kmem_cache_destroy(mm->slab_blocks); + return -ENOMEM; +} + +void i915_buddy_fini(struct i915_buddy_mm *mm) +{ + int i; + + for (i = 0; i < mm->n_roots; ++i) { + GEM_WARN_ON(!i915_buddy_block_is_free(mm->roots[i])); + i915_block_free(mm, mm->roots[i]); + } + + kfree(mm->roots); + kfree(mm->free_list); + kmem_cache_destroy(mm->slab_blocks); +} + +static int split_block(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + unsigned int block_order = i915_buddy_block_order(block) - 1; + u64 offset = i915_buddy_block_offset(block); + + GEM_BUG_ON(!i915_buddy_block_is_free(block)); + GEM_BUG_ON(!i915_buddy_block_order(block)); + + block->left = i915_block_alloc(mm, block, block_order, offset); + if (!block->left) + return -ENOMEM; + + block->right = i915_block_alloc(mm, block, block_order, + offset + (mm->chunk_size << block_order)); + if (!block->right) { + i915_block_free(mm, block->left); + return -ENOMEM; + } + + mark_free(mm, block->left); + mark_free(mm, block->right); + + mark_split(block); + + return 0; +} + +static struct i915_buddy_block * +get_buddy(struct i915_buddy_block *block) +{ + struct i915_buddy_block *parent; + + parent = block->parent; + if (!parent) + return NULL; + + if (parent->left == block) + return parent->right; + + return parent->left; +} + +static void __i915_buddy_free(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + struct i915_buddy_block *parent; + + while ((parent = block->parent)) { + struct i915_buddy_block *buddy; + + buddy = get_buddy(block); + + if (!i915_buddy_block_is_free(buddy)) + break; + + list_del(&buddy->link); + + i915_block_free(mm, block); + i915_block_free(mm, buddy); + + block = parent; + } + + mark_free(mm, block); +} + +void i915_buddy_free(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + GEM_BUG_ON(!i915_buddy_block_is_allocated(block)); + __i915_buddy_free(mm, block); +} + +void i915_buddy_free_list(struct i915_buddy_mm *mm, struct list_head *objects) +{ + struct i915_buddy_block *block, *on; + + list_for_each_entry_safe(block, on, objects, link) { + i915_buddy_free(mm, block); + cond_resched(); + } + INIT_LIST_HEAD(objects); +} + +/* + * Allocate power-of-two block. The order value here translates to: + * + * 0 = 2^0 * mm->chunk_size + * 1 = 2^1 * mm->chunk_size + * 2 = 2^2 * mm->chunk_size + * ... + */ +struct i915_buddy_block * +i915_buddy_alloc(struct i915_buddy_mm *mm, unsigned int order) +{ + struct i915_buddy_block *block = NULL; + unsigned int i; + int err; + + for (i = order; i <= mm->max_order; ++i) { + block = list_first_entry_or_null(&mm->free_list[i], + struct i915_buddy_block, + link); + if (block) + break; + } + + if (!block) + return ERR_PTR(-ENOSPC); + + GEM_BUG_ON(!i915_buddy_block_is_free(block)); + + while (i != order) { + err = split_block(mm, block); + if (unlikely(err)) + goto out_free; + + /* Go low */ + block = block->left; + i--; + } + + mark_allocated(block); + kmemleak_update_trace(block); + return block; + +out_free: + if (i != order) + __i915_buddy_free(mm, block); + return ERR_PTR(err); +} + +static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2) +{ + return s1 <= e2 && e1 >= s2; +} + +static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2) +{ + return s1 <= s2 && e1 >= e2; +} + +/* + * Allocate range. Note that it's safe to chain together multiple alloc_ranges + * with the same blocks list. + * + * Intended for pre-allocating portions of the address space, for example to + * reserve a block for the initial framebuffer or similar, hence the expectation + * here is that i915_buddy_alloc() is still the main vehicle for + * allocations, so if that's not the case then the drm_mm range allocator is + * probably a much better fit, and so you should probably go use that instead. + */ +int i915_buddy_alloc_range(struct i915_buddy_mm *mm, + struct list_head *blocks, + u64 start, u64 size) +{ + struct i915_buddy_block *block; + struct i915_buddy_block *buddy; + LIST_HEAD(allocated); + LIST_HEAD(dfs); + u64 end; + int err; + int i; + + if (size < mm->chunk_size) + return -EINVAL; + + if (!IS_ALIGNED(size | start, mm->chunk_size)) + return -EINVAL; + + if (range_overflows(start, size, mm->size)) + return -EINVAL; + + for (i = 0; i < mm->n_roots; ++i) + list_add_tail(&mm->roots[i]->tmp_link, &dfs); + + end = start + size - 1; + + do { + u64 block_start; + u64 block_end; + + block = list_first_entry_or_null(&dfs, + struct i915_buddy_block, + tmp_link); + if (!block) + break; + + list_del(&block->tmp_link); + + block_start = i915_buddy_block_offset(block); + block_end = block_start + i915_buddy_block_size(mm, block) - 1; + + if (!overlaps(start, end, block_start, block_end)) + continue; + + if (i915_buddy_block_is_allocated(block)) { + err = -ENOSPC; + goto err_free; + } + + if (contains(start, end, block_start, block_end)) { + if (!i915_buddy_block_is_free(block)) { + err = -ENOSPC; + goto err_free; + } + + mark_allocated(block); + list_add_tail(&block->link, &allocated); + continue; + } + + if (!i915_buddy_block_is_split(block)) { + err = split_block(mm, block); + if (unlikely(err)) + goto err_undo; + } + + list_add(&block->right->tmp_link, &dfs); + list_add(&block->left->tmp_link, &dfs); + } while (1); + + list_splice_tail(&allocated, blocks); + return 0; + +err_undo: + /* + * We really don't want to leave around a bunch of split blocks, since + * bigger is better, so make sure we merge everything back before we + * free the allocated blocks. + */ + buddy = get_buddy(block); + if (buddy && + (i915_buddy_block_is_free(block) && + i915_buddy_block_is_free(buddy))) + __i915_buddy_free(mm, block); + +err_free: + i915_buddy_free_list(mm, &allocated); + return err; +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_buddy.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_buddy.h b/drivers/gpu/drm/i915/i915_buddy.h new file mode 100644 index 000000000000..37f8c42071d1 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_buddy.h @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef __I915_BUDDY_H__ +#define __I915_BUDDY_H__ + +#include +#include +#include + +struct i915_buddy_block { +#define I915_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) +#define I915_BUDDY_HEADER_STATE GENMASK_ULL(11, 10) +#define I915_BUDDY_ALLOCATED (1 << 10) +#define I915_BUDDY_FREE (2 << 10) +#define I915_BUDDY_SPLIT (3 << 10) +/* Free to be used, if needed in the future */ +#define I915_BUDDY_HEADER_UNUSED GENMASK_ULL(9, 6) +#define I915_BUDDY_HEADER_ORDER GENMASK_ULL(5, 0) + u64 header; + + struct i915_buddy_block *left; + struct i915_buddy_block *right; + struct i915_buddy_block *parent; + + void *private; /* owned by creator */ + + /* + * While the block is allocated by the user through i915_buddy_alloc*, + * the user has ownership of the link, for example to maintain within + * a list, if so desired. As soon as the block is freed with + * i915_buddy_free* ownership is given back to the mm. + */ + struct list_head link; + struct list_head tmp_link; +}; + +/* Order-zero must be at least PAGE_SIZE */ +#define I915_BUDDY_MAX_ORDER (63 - PAGE_SHIFT) + +/* + * Binary Buddy System. + * + * Locking should be handled by the user, a simple mutex around + * i915_buddy_alloc* and i915_buddy_free* should suffice. + */ +struct i915_buddy_mm { + struct kmem_cache *slab_blocks; + /* Maintain a free list for each order. */ + struct list_head *free_list; + + /* + * Maintain explicit binary tree(s) to track the allocation of the + * address space. This gives us a simple way of finding a buddy block + * and performing the potentially recursive merge step when freeing a + * block. Nodes are either allocated or free, in which case they will + * also exist on the respective free list. + */ + struct i915_buddy_block **roots; + + /* + * Anything from here is public, and remains static for the lifetime of + * the mm. Everything above is considered do-not-touch. + */ + unsigned int n_roots; + unsigned int max_order; + + /* Must be at least PAGE_SIZE */ + u64 chunk_size; + u64 size; +}; + +static inline u64 +i915_buddy_block_offset(struct i915_buddy_block *block) +{ + return block->header & I915_BUDDY_HEADER_OFFSET; +} + +static inline unsigned int +i915_buddy_block_order(struct i915_buddy_block *block) +{ + return block->header & I915_BUDDY_HEADER_ORDER; +} + +static inline unsigned int +i915_buddy_block_state(struct i915_buddy_block *block) +{ + return block->header & I915_BUDDY_HEADER_STATE; +} + +static inline bool +i915_buddy_block_is_allocated(struct i915_buddy_block *block) +{ + return i915_buddy_block_state(block) == I915_BUDDY_ALLOCATED; +} + +static inline bool +i915_buddy_block_is_free(struct i915_buddy_block *block) +{ + return i915_buddy_block_state(block) == I915_BUDDY_FREE; +} + +static inline bool +i915_buddy_block_is_split(struct i915_buddy_block *block) +{ + return i915_buddy_block_state(block) == I915_BUDDY_SPLIT; +} + +static inline u64 +i915_buddy_block_size(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + return mm->chunk_size << i915_buddy_block_order(block); +} + +int i915_buddy_init(struct i915_buddy_mm *mm, u64 size, u64 chunk_size); + +void i915_buddy_fini(struct i915_buddy_mm *mm); + +struct i915_buddy_block * +i915_buddy_alloc(struct i915_buddy_mm *mm, unsigned int order); + +int i915_buddy_alloc_range(struct i915_buddy_mm *mm, + struct list_head *blocks, + u64 start, u64 size); + +void i915_buddy_free(struct i915_buddy_mm *mm, struct i915_buddy_block *block); + +void i915_buddy_free_list(struct i915_buddy_mm *mm, struct list_head *objects); + +#endif diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c new file mode 100644 index 000000000000..fc7ad5c035b8 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include + +#include +#include + +#include "i915_ttm_buddy_manager.h" + +#include "i915_buddy.h" +#include "i915_gem.h" + +struct i915_ttm_buddy_manager { + struct ttm_resource_manager manager; + struct i915_buddy_mm mm; + struct list_head reserved; + struct mutex lock; +}; + +static struct i915_ttm_buddy_manager * +to_buddy_manager(struct ttm_resource_manager *man) +{ + return container_of(man, struct i915_ttm_buddy_manager, manager); +} + +static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man, + struct ttm_buffer_object *bo, + const struct ttm_place *place, + struct ttm_resource **res) +{ + struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); + struct i915_ttm_buddy_resource *bman_res; + struct i915_buddy_mm *mm = &bman->mm; + unsigned long n_pages; + unsigned int min_order; + u64 min_page_size; + u64 size; + int err; + + GEM_BUG_ON(place->fpfn || place->lpfn); + + bman_res = kzalloc(sizeof(*bman_res), GFP_KERNEL); + if (!bman_res) + return -ENOMEM; + + ttm_resource_init(bo, place, &bman_res->base); + INIT_LIST_HEAD(&bman_res->blocks); + bman_res->mm = mm; + + GEM_BUG_ON(!bman_res->base.num_pages); + size = bman_res->base.num_pages << PAGE_SHIFT; + + min_page_size = bo->page_alignment << PAGE_SHIFT; + GEM_BUG_ON(min_page_size < mm->chunk_size); + min_order = ilog2(min_page_size) - ilog2(mm->chunk_size); + if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { + size = roundup_pow_of_two(size); + min_order = ilog2(size) - ilog2(mm->chunk_size); + } + + if (size > mm->size) { + err = -E2BIG; + goto err_free_res; + } + + n_pages = size >> ilog2(mm->chunk_size); + + do { + struct i915_buddy_block *block; + unsigned int order; + + order = fls(n_pages) - 1; + GEM_BUG_ON(order > mm->max_order); + GEM_BUG_ON(order < min_order); + + do { + mutex_lock(&bman->lock); + block = i915_buddy_alloc(mm, order); + mutex_unlock(&bman->lock); + if (!IS_ERR(block)) + break; + + if (order-- == min_order) { + err = -ENOSPC; + goto err_free_blocks; + } + } while (1); + + n_pages -= BIT(order); + + list_add_tail(&block->link, &bman_res->blocks); + + if (!n_pages) + break; + } while (1); + + *res = &bman_res->base; + return 0; + +err_free_blocks: + mutex_lock(&bman->lock); + i915_buddy_free_list(mm, &bman_res->blocks); + mutex_unlock(&bman->lock); +err_free_res: + kfree(bman_res); + return err; +} + +static void i915_ttm_buddy_man_free(struct ttm_resource_manager *man, + struct ttm_resource *res) +{ + struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res); + struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); + + mutex_lock(&bman->lock); + i915_buddy_free_list(&bman->mm, &bman_res->blocks); + mutex_unlock(&bman->lock); + + kfree(bman_res); +} + +static const struct ttm_resource_manager_func i915_ttm_buddy_manager_func = { + .alloc = i915_ttm_buddy_man_alloc, + .free = i915_ttm_buddy_man_free, +}; + + +/** + * i915_ttm_buddy_man_init - Setup buddy allocator based ttm manager + * @bdev: The ttm device + * @type: Memory type we want to manage + * @use_tt: Set use_tt for the manager + * @size: The size in bytes to manage + * @chunk_size: The minimum page size in bytes for our allocations i.e + * order-zero + * + * Note that the starting address is assumed to be zero here, since this + * simplifies keeping the property where allocated blocks having natural + * power-of-two alignment. So long as the real starting address is some large + * power-of-two, or naturally start from zero, then this should be fine. Also + * the &i915_ttm_buddy_man_reserve interface can be used to preserve alignment + * if say there is some unusable range from the start of the region. We can + * revisit this in the future and make the interface accept an actual starting + * offset and let it take care of the rest. + * + * Note that if the @size is not aligned to the @chunk_size then we perform the + * required rounding to get the usable size. The final size in pages can be + * taken from &ttm_resource_manager.size. + * + * Return: 0 on success, negative error code on failure. + */ +int i915_ttm_buddy_man_init(struct ttm_device *bdev, + unsigned int type, bool use_tt, + u64 size, u64 chunk_size) +{ + struct ttm_resource_manager *man; + struct i915_ttm_buddy_manager *bman; + int err; + + bman = kzalloc(sizeof(*bman), GFP_KERNEL); + if (!bman) + return -ENOMEM; + + err = i915_buddy_init(&bman->mm, size, chunk_size); + if (err) + goto err_free_bman; + + mutex_init(&bman->lock); + INIT_LIST_HEAD(&bman->reserved); + + man = &bman->manager; + man->use_tt = use_tt; + man->func = &i915_ttm_buddy_manager_func; + ttm_resource_manager_init(man, bman->mm.size >> PAGE_SHIFT); + + ttm_resource_manager_set_used(man, true); + ttm_set_driver_manager(bdev, type, man); + + return 0; + +err_free_bman: + kfree(bman); + return err; +} + +/** + * i915_ttm_buddy_man_fini - Destroy the buddy allocator ttm manager + * @bdev: The ttm device + * @type: Memory type we want to manage + * + * Note that if we reserved anything with &i915_ttm_buddy_man_reserve, this will + * also be freed for us here. + * + * Return: 0 on success, negative error code on failure. + */ +int i915_ttm_buddy_man_fini(struct ttm_device *bdev, unsigned int type) +{ + struct ttm_resource_manager *man = ttm_manager_type(bdev, type); + struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); + struct i915_buddy_mm *mm = &bman->mm; + int ret; + + ttm_resource_manager_set_used(man, false); + + ret = ttm_resource_manager_evict_all(bdev, man); + if (ret) + return ret; + + ttm_set_driver_manager(bdev, type, NULL); + + mutex_lock(&bman->lock); + i915_buddy_free_list(mm, &bman->reserved); + i915_buddy_fini(mm); + mutex_unlock(&bman->lock); + + ttm_resource_manager_cleanup(man); + kfree(bman); + + return 0; +} + +/** + * i915_ttm_buddy_man_reserve - Reserve address range + * @man: The buddy allocator ttm manager + * @start: The offset in bytes, where the region start is assumed to be zero + * @size: The size in bytes + * + * Note that the starting address for the region is always assumed to be zero. + * + * Return: 0 on success, negative error code on failure. + */ +int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man, + u64 start, u64 size) +{ + struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); + struct i915_buddy_mm *mm = &bman->mm; + int ret; + + mutex_lock(&bman->lock); + ret = i915_buddy_alloc_range(mm, &bman->reserved, start, size); + mutex_unlock(&bman->lock); + + return ret; +} + diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h new file mode 100644 index 000000000000..26026213e20a --- /dev/null +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef __I915_TTM_BUDDY_MANAGER_H__ +#define __I915_TTM_BUDDY_MANAGER_H__ + +#include +#include + +#include + +struct ttm_device; +struct ttm_resource_manager; +struct i915_buddy_mm; + +/** + * struct i915_ttm_buddy_resource + * + * @base: struct ttm_resource base class we extend + * @blocks: the list of struct i915_buddy_block for this resource/allocation + * @mm: the struct i915_buddy_mm for this resource + * + * Extends the struct ttm_resource to manage an address space allocation with + * one or more struct i915_buddy_block. + */ +struct i915_ttm_buddy_resource { + struct ttm_resource base; + struct list_head blocks; + struct i915_buddy_mm *mm; +}; + +/** + * to_ttm_buddy_resource + * + * @res: the resource to upcast + * + * Upcast the struct ttm_resource object into a struct i915_ttm_buddy_resource. + */ +static inline struct i915_ttm_buddy_resource * +to_ttm_buddy_resource(struct ttm_resource *res) +{ + return container_of(res, struct i915_ttm_buddy_resource, base); +} + +int i915_ttm_buddy_man_init(struct ttm_device *bdev, + unsigned type, bool use_tt, + u64 size, u64 chunk_size); +int i915_ttm_buddy_man_fini(struct ttm_device *bdev, + unsigned int type); + +int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man, + u64 start, u64 size); + +#endif diff --git a/drivers/gpu/drm/i915/selftests/i915_buddy.c b/drivers/gpu/drm/i915/selftests/i915_buddy.c new file mode 100644 index 000000000000..f0f5c4df8dbc --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_buddy.c @@ -0,0 +1,789 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include + +#include "../i915_selftest.h" +#include "i915_random.h" + +static void __igt_dump_block(struct i915_buddy_mm *mm, + struct i915_buddy_block *block, + bool buddy) +{ + pr_err("block info: header=%llx, state=%u, order=%d, offset=%llx size=%llx root=%s buddy=%s\n", + block->header, + i915_buddy_block_state(block), + i915_buddy_block_order(block), + i915_buddy_block_offset(block), + i915_buddy_block_size(mm, block), + yesno(!block->parent), + yesno(buddy)); +} + +static void igt_dump_block(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + struct i915_buddy_block *buddy; + + __igt_dump_block(mm, block, false); + + buddy = get_buddy(block); + if (buddy) + __igt_dump_block(mm, buddy, true); +} + +static int igt_check_block(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + struct i915_buddy_block *buddy; + unsigned int block_state; + u64 block_size; + u64 offset; + int err = 0; + + block_state = i915_buddy_block_state(block); + + if (block_state != I915_BUDDY_ALLOCATED && + block_state != I915_BUDDY_FREE && + block_state != I915_BUDDY_SPLIT) { + pr_err("block state mismatch\n"); + err = -EINVAL; + } + + block_size = i915_buddy_block_size(mm, block); + offset = i915_buddy_block_offset(block); + + if (block_size < mm->chunk_size) { + pr_err("block size smaller than min size\n"); + err = -EINVAL; + } + + if (!is_power_of_2(block_size)) { + pr_err("block size not power of two\n"); + err = -EINVAL; + } + + if (!IS_ALIGNED(block_size, mm->chunk_size)) { + pr_err("block size not aligned to min size\n"); + err = -EINVAL; + } + + if (!IS_ALIGNED(offset, mm->chunk_size)) { + pr_err("block offset not aligned to min size\n"); + err = -EINVAL; + } + + if (!IS_ALIGNED(offset, block_size)) { + pr_err("block offset not aligned to block size\n"); + err = -EINVAL; + } + + buddy = get_buddy(block); + + if (!buddy && block->parent) { + pr_err("buddy has gone fishing\n"); + err = -EINVAL; + } + + if (buddy) { + if (i915_buddy_block_offset(buddy) != (offset ^ block_size)) { + pr_err("buddy has wrong offset\n"); + err = -EINVAL; + } + + if (i915_buddy_block_size(mm, buddy) != block_size) { + pr_err("buddy size mismatch\n"); + err = -EINVAL; + } + + if (i915_buddy_block_state(buddy) == block_state && + block_state == I915_BUDDY_FREE) { + pr_err("block and its buddy are free\n"); + err = -EINVAL; + } + } + + return err; +} + +static int igt_check_blocks(struct i915_buddy_mm *mm, + struct list_head *blocks, + u64 expected_size, + bool is_contiguous) +{ + struct i915_buddy_block *block; + struct i915_buddy_block *prev; + u64 total; + int err = 0; + + block = NULL; + prev = NULL; + total = 0; + + list_for_each_entry(block, blocks, link) { + err = igt_check_block(mm, block); + + if (!i915_buddy_block_is_allocated(block)) { + pr_err("block not allocated\n"), + err = -EINVAL; + } + + if (is_contiguous && prev) { + u64 prev_block_size; + u64 prev_offset; + u64 offset; + + prev_offset = i915_buddy_block_offset(prev); + prev_block_size = i915_buddy_block_size(mm, prev); + offset = i915_buddy_block_offset(block); + + if (offset != (prev_offset + prev_block_size)) { + pr_err("block offset mismatch\n"); + err = -EINVAL; + } + } + + if (err) + break; + + total += i915_buddy_block_size(mm, block); + prev = block; + } + + if (!err) { + if (total != expected_size) { + pr_err("size mismatch, expected=%llx, found=%llx\n", + expected_size, total); + err = -EINVAL; + } + return err; + } + + if (prev) { + pr_err("prev block, dump:\n"); + igt_dump_block(mm, prev); + } + + if (block) { + pr_err("bad block, dump:\n"); + igt_dump_block(mm, block); + } + + return err; +} + +static int igt_check_mm(struct i915_buddy_mm *mm) +{ + struct i915_buddy_block *root; + struct i915_buddy_block *prev; + unsigned int i; + u64 total; + int err = 0; + + if (!mm->n_roots) { + pr_err("n_roots is zero\n"); + return -EINVAL; + } + + if (mm->n_roots != hweight64(mm->size)) { + pr_err("n_roots mismatch, n_roots=%u, expected=%lu\n", + mm->n_roots, hweight64(mm->size)); + return -EINVAL; + } + + root = NULL; + prev = NULL; + total = 0; + + for (i = 0; i < mm->n_roots; ++i) { + struct i915_buddy_block *block; + unsigned int order; + + root = mm->roots[i]; + if (!root) { + pr_err("root(%u) is NULL\n", i); + err = -EINVAL; + break; + } + + err = igt_check_block(mm, root); + + if (!i915_buddy_block_is_free(root)) { + pr_err("root not free\n"); + err = -EINVAL; + } + + order = i915_buddy_block_order(root); + + if (!i) { + if (order != mm->max_order) { + pr_err("max order root missing\n"); + err = -EINVAL; + } + } + + if (prev) { + u64 prev_block_size; + u64 prev_offset; + u64 offset; + + prev_offset = i915_buddy_block_offset(prev); + prev_block_size = i915_buddy_block_size(mm, prev); + offset = i915_buddy_block_offset(root); + + if (offset != (prev_offset + prev_block_size)) { + pr_err("root offset mismatch\n"); + err = -EINVAL; + } + } + + block = list_first_entry_or_null(&mm->free_list[order], + struct i915_buddy_block, + link); + if (block != root) { + pr_err("root mismatch at order=%u\n", order); + err = -EINVAL; + } + + if (err) + break; + + prev = root; + total += i915_buddy_block_size(mm, root); + } + + if (!err) { + if (total != mm->size) { + pr_err("expected mm size=%llx, found=%llx\n", mm->size, + total); + err = -EINVAL; + } + return err; + } + + if (prev) { + pr_err("prev root(%u), dump:\n", i - 1); + igt_dump_block(mm, prev); + } + + if (root) { + pr_err("bad root(%u), dump:\n", i); + igt_dump_block(mm, root); + } + + return err; +} + +static void igt_mm_config(u64 *size, u64 *chunk_size) +{ + I915_RND_STATE(prng); + u32 s, ms; + + /* Nothing fancy, just try to get an interesting bit pattern */ + + prandom_seed_state(&prng, i915_selftest.random_seed); + + /* Let size be a random number of pages up to 8 GB (2M pages) */ + s = 1 + i915_prandom_u32_max_state((BIT(33 - 12)) - 1, &prng); + /* Let the chunk size be a random power of 2 less than size */ + ms = BIT(i915_prandom_u32_max_state(ilog2(s), &prng)); + /* Round size down to the chunk size */ + s &= -ms; + + /* Convert from pages to bytes */ + *chunk_size = (u64)ms << 12; + *size = (u64)s << 12; +} + +static int igt_buddy_alloc_smoke(void *arg) +{ + struct i915_buddy_mm mm; + IGT_TIMEOUT(end_time); + I915_RND_STATE(prng); + u64 chunk_size; + u64 mm_size; + int *order; + int err, i; + + igt_mm_config(&mm_size, &chunk_size); + + pr_info("buddy_init with size=%llx, chunk_size=%llx\n", mm_size, chunk_size); + + err = i915_buddy_init(&mm, mm_size, chunk_size); + if (err) { + pr_err("buddy_init failed(%d)\n", err); + return err; + } + + order = i915_random_order(mm.max_order + 1, &prng); + if (!order) + goto out_fini; + + for (i = 0; i <= mm.max_order; ++i) { + struct i915_buddy_block *block; + int max_order = order[i]; + bool timeout = false; + LIST_HEAD(blocks); + int order; + u64 total; + + err = igt_check_mm(&mm); + if (err) { + pr_err("pre-mm check failed, abort\n"); + break; + } + + pr_info("filling from max_order=%u\n", max_order); + + order = max_order; + total = 0; + + do { +retry: + block = i915_buddy_alloc(&mm, order); + if (IS_ERR(block)) { + err = PTR_ERR(block); + if (err == -ENOMEM) { + pr_info("buddy_alloc hit -ENOMEM with order=%d\n", + order); + } else { + if (order--) { + err = 0; + goto retry; + } + + pr_err("buddy_alloc with order=%d failed(%d)\n", + order, err); + } + + break; + } + + list_add_tail(&block->link, &blocks); + + if (i915_buddy_block_order(block) != order) { + pr_err("buddy_alloc order mismatch\n"); + err = -EINVAL; + break; + } + + total += i915_buddy_block_size(&mm, block); + + if (__igt_timeout(end_time, NULL)) { + timeout = true; + break; + } + } while (total < mm.size); + + if (!err) + err = igt_check_blocks(&mm, &blocks, total, false); + + i915_buddy_free_list(&mm, &blocks); + + if (!err) { + err = igt_check_mm(&mm); + if (err) + pr_err("post-mm check failed\n"); + } + + if (err || timeout) + break; + + cond_resched(); + } + + if (err == -ENOMEM) + err = 0; + + kfree(order); +out_fini: + i915_buddy_fini(&mm); + + return err; +} + +static int igt_buddy_alloc_pessimistic(void *arg) +{ + const unsigned int max_order = 16; + struct i915_buddy_block *block, *bn; + struct i915_buddy_mm mm; + unsigned int order; + LIST_HEAD(blocks); + int err; + + /* + * Create a pot-sized mm, then allocate one of each possible + * order within. This should leave the mm with exactly one + * page left. + */ + + err = i915_buddy_init(&mm, PAGE_SIZE << max_order, PAGE_SIZE); + if (err) { + pr_err("buddy_init failed(%d)\n", err); + return err; + } + GEM_BUG_ON(mm.max_order != max_order); + + for (order = 0; order < max_order; order++) { + block = i915_buddy_alloc(&mm, order); + if (IS_ERR(block)) { + pr_info("buddy_alloc hit -ENOMEM with order=%d\n", + order); + err = PTR_ERR(block); + goto err; + } + + list_add_tail(&block->link, &blocks); + } + + /* And now the last remaining block available */ + block = i915_buddy_alloc(&mm, 0); + if (IS_ERR(block)) { + pr_info("buddy_alloc hit -ENOMEM on final alloc\n"); + err = PTR_ERR(block); + goto err; + } + list_add_tail(&block->link, &blocks); + + /* Should be completely full! */ + for (order = max_order; order--; ) { + block = i915_buddy_alloc(&mm, order); + if (!IS_ERR(block)) { + pr_info("buddy_alloc unexpectedly succeeded at order %d, it should be full!", + order); + list_add_tail(&block->link, &blocks); + err = -EINVAL; + goto err; + } + } + + block = list_last_entry(&blocks, typeof(*block), link); + list_del(&block->link); + i915_buddy_free(&mm, block); + + /* As we free in increasing size, we make available larger blocks */ + order = 1; + list_for_each_entry_safe(block, bn, &blocks, link) { + list_del(&block->link); + i915_buddy_free(&mm, block); + + block = i915_buddy_alloc(&mm, order); + if (IS_ERR(block)) { + pr_info("buddy_alloc (realloc) hit -ENOMEM with order=%d\n", + order); + err = PTR_ERR(block); + goto err; + } + i915_buddy_free(&mm, block); + order++; + } + + /* To confirm, now the whole mm should be available */ + block = i915_buddy_alloc(&mm, max_order); + if (IS_ERR(block)) { + pr_info("buddy_alloc (realloc) hit -ENOMEM with order=%d\n", + max_order); + err = PTR_ERR(block); + goto err; + } + i915_buddy_free(&mm, block); + +err: + i915_buddy_free_list(&mm, &blocks); + i915_buddy_fini(&mm); + return err; +} + +static int igt_buddy_alloc_optimistic(void *arg) +{ + const int max_order = 16; + struct i915_buddy_block *block; + struct i915_buddy_mm mm; + LIST_HEAD(blocks); + int order; + int err; + + /* + * Create a mm with one block of each order available, and + * try to allocate them all. + */ + + err = i915_buddy_init(&mm, + PAGE_SIZE * ((1 << (max_order + 1)) - 1), + PAGE_SIZE); + if (err) { + pr_err("buddy_init failed(%d)\n", err); + return err; + } + GEM_BUG_ON(mm.max_order != max_order); + + for (order = 0; order <= max_order; order++) { + block = i915_buddy_alloc(&mm, order); + if (IS_ERR(block)) { + pr_info("buddy_alloc hit -ENOMEM with order=%d\n", + order); + err = PTR_ERR(block); + goto err; + } + + list_add_tail(&block->link, &blocks); + } + + /* Should be completely full! */ + block = i915_buddy_alloc(&mm, 0); + if (!IS_ERR(block)) { + pr_info("buddy_alloc unexpectedly succeeded, it should be full!"); + list_add_tail(&block->link, &blocks); + err = -EINVAL; + goto err; + } + +err: + i915_buddy_free_list(&mm, &blocks); + i915_buddy_fini(&mm); + return err; +} + +static int igt_buddy_alloc_pathological(void *arg) +{ + const int max_order = 16; + struct i915_buddy_block *block; + struct i915_buddy_mm mm; + LIST_HEAD(blocks); + LIST_HEAD(holes); + int order, top; + int err; + + /* + * Create a pot-sized mm, then allocate one of each possible + * order within. This should leave the mm with exactly one + * page left. Free the largest block, then whittle down again. + * Eventually we will have a fully 50% fragmented mm. + */ + + err = i915_buddy_init(&mm, PAGE_SIZE << max_order, PAGE_SIZE); + if (err) { + pr_err("buddy_init failed(%d)\n", err); + return err; + } + GEM_BUG_ON(mm.max_order != max_order); + + for (top = max_order; top; top--) { + /* Make room by freeing the largest allocated block */ + block = list_first_entry_or_null(&blocks, typeof(*block), link); + if (block) { + list_del(&block->link); + i915_buddy_free(&mm, block); + } + + for (order = top; order--; ) { + block = i915_buddy_alloc(&mm, order); + if (IS_ERR(block)) { + pr_info("buddy_alloc hit -ENOMEM with order=%d, top=%d\n", + order, top); + err = PTR_ERR(block); + goto err; + } + list_add_tail(&block->link, &blocks); + } + + /* There should be one final page for this sub-allocation */ + block = i915_buddy_alloc(&mm, 0); + if (IS_ERR(block)) { + pr_info("buddy_alloc hit -ENOMEM for hole\n"); + err = PTR_ERR(block); + goto err; + } + list_add_tail(&block->link, &holes); + + block = i915_buddy_alloc(&mm, top); + if (!IS_ERR(block)) { + pr_info("buddy_alloc unexpectedly succeeded at top-order %d/%d, it should be full!", + top, max_order); + list_add_tail(&block->link, &blocks); + err = -EINVAL; + goto err; + } + } + + i915_buddy_free_list(&mm, &holes); + + /* Nothing larger than blocks of chunk_size now available */ + for (order = 1; order <= max_order; order++) { + block = i915_buddy_alloc(&mm, order); + if (!IS_ERR(block)) { + pr_info("buddy_alloc unexpectedly succeeded at order %d, it should be full!", + order); + list_add_tail(&block->link, &blocks); + err = -EINVAL; + goto err; + } + } + +err: + list_splice_tail(&holes, &blocks); + i915_buddy_free_list(&mm, &blocks); + i915_buddy_fini(&mm); + return err; +} + +static int igt_buddy_alloc_range(void *arg) +{ + struct i915_buddy_mm mm; + unsigned long page_num; + LIST_HEAD(blocks); + u64 chunk_size; + u64 offset; + u64 size; + u64 rem; + int err; + + igt_mm_config(&size, &chunk_size); + + pr_info("buddy_init with size=%llx, chunk_size=%llx\n", size, chunk_size); + + err = i915_buddy_init(&mm, size, chunk_size); + if (err) { + pr_err("buddy_init failed(%d)\n", err); + return err; + } + + err = igt_check_mm(&mm); + if (err) { + pr_err("pre-mm check failed, abort, abort, abort!\n"); + goto err_fini; + } + + rem = mm.size; + offset = 0; + + for_each_prime_number_from(page_num, 1, ULONG_MAX - 1) { + struct i915_buddy_block *block; + LIST_HEAD(tmp); + + size = min(page_num * mm.chunk_size, rem); + + err = i915_buddy_alloc_range(&mm, &tmp, offset, size); + if (err) { + if (err == -ENOMEM) { + pr_info("alloc_range hit -ENOMEM with size=%llx\n", + size); + } else { + pr_err("alloc_range with offset=%llx, size=%llx failed(%d)\n", + offset, size, err); + } + + break; + } + + block = list_first_entry_or_null(&tmp, + struct i915_buddy_block, + link); + if (!block) { + pr_err("alloc_range has no blocks\n"); + err = -EINVAL; + break; + } + + if (i915_buddy_block_offset(block) != offset) { + pr_err("alloc_range start offset mismatch, found=%llx, expected=%llx\n", + i915_buddy_block_offset(block), offset); + err = -EINVAL; + } + + if (!err) + err = igt_check_blocks(&mm, &tmp, size, true); + + list_splice_tail(&tmp, &blocks); + + if (err) + break; + + offset += size; + + rem -= size; + if (!rem) + break; + + cond_resched(); + } + + if (err == -ENOMEM) + err = 0; + + i915_buddy_free_list(&mm, &blocks); + + if (!err) { + err = igt_check_mm(&mm); + if (err) + pr_err("post-mm check failed\n"); + } + +err_fini: + i915_buddy_fini(&mm); + + return err; +} + +static int igt_buddy_alloc_limit(void *arg) +{ + struct i915_buddy_block *block; + struct i915_buddy_mm mm; + const u64 size = U64_MAX; + int err; + + err = i915_buddy_init(&mm, size, PAGE_SIZE); + if (err) + return err; + + if (mm.max_order != I915_BUDDY_MAX_ORDER) { + pr_err("mm.max_order(%d) != %d\n", + mm.max_order, I915_BUDDY_MAX_ORDER); + err = -EINVAL; + goto out_fini; + } + + block = i915_buddy_alloc(&mm, mm.max_order); + if (IS_ERR(block)) { + err = PTR_ERR(block); + goto out_fini; + } + + if (i915_buddy_block_order(block) != mm.max_order) { + pr_err("block order(%d) != %d\n", + i915_buddy_block_order(block), mm.max_order); + err = -EINVAL; + goto out_free; + } + + if (i915_buddy_block_size(&mm, block) != + BIT_ULL(mm.max_order) * PAGE_SIZE) { + pr_err("block size(%llu) != %llu\n", + i915_buddy_block_size(&mm, block), + BIT_ULL(mm.max_order) * PAGE_SIZE); + err = -EINVAL; + goto out_free; + } + +out_free: + i915_buddy_free(&mm, block); +out_fini: + i915_buddy_fini(&mm); + return err; +} + +int i915_buddy_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_buddy_alloc_pessimistic), + SUBTEST(igt_buddy_alloc_optimistic), + SUBTEST(igt_buddy_alloc_pathological), + SUBTEST(igt_buddy_alloc_smoke), + SUBTEST(igt_buddy_alloc_range), + SUBTEST(igt_buddy_alloc_limit), + }; + + return i915_subtests(tests, NULL); +} -- cgit v1.2.3 From f701b16d4cc535d24facdfdd21dc97a3691e5576 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 16 Jun 2021 16:24:56 +0100 Subject: drm/i915/ttm: add i915_sg_from_buddy_resource MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to be able to build an sg table from our list of buddy blocks, so that we can later plug this into our ttm backend, and replace our use of the range manager. Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20210616152501.394518-2-matthew.auld@intel.com --- drivers/gpu/drm/i915/i915_scatterlist.c | 80 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_scatterlist.h | 5 +++ 2 files changed, 85 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c index 69e9e6c3135e..4a6712dca838 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.c +++ b/drivers/gpu/drm/i915/i915_scatterlist.c @@ -6,6 +6,9 @@ #include "i915_scatterlist.h" +#include "i915_buddy.h" +#include "i915_ttm_buddy_manager.h" + #include #include @@ -104,6 +107,83 @@ struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node, return st; } +/** + * i915_sg_from_buddy_resource - Create an sg_table from a struct + * i915_buddy_block list + * @res: The struct i915_ttm_buddy_resource. + * @region_start: An offset to add to the dma addresses of the sg list. + * + * Create a struct sg_table, initializing it from struct i915_buddy_block list, + * taking a maximum segment length into account, splitting into segments + * if necessary. + * + * Return: A pointer to a kmalloced struct sg_table on success, negative + * error code cast to an error pointer on failure. + */ +struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res, + u64 region_start) +{ + struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res); + const u64 size = res->num_pages << PAGE_SHIFT; + const u64 max_segment = rounddown(UINT_MAX, PAGE_SIZE); + struct i915_buddy_mm *mm = bman_res->mm; + struct list_head *blocks = &bman_res->blocks; + struct i915_buddy_block *block; + struct scatterlist *sg; + struct sg_table *st; + resource_size_t prev_end; + + GEM_BUG_ON(list_empty(blocks)); + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return ERR_PTR(-ENOMEM); + + if (sg_alloc_table(st, res->num_pages, GFP_KERNEL)) { + kfree(st); + return ERR_PTR(-ENOMEM); + } + + sg = st->sgl; + st->nents = 0; + prev_end = (resource_size_t)-1; + + list_for_each_entry(block, blocks, link) { + u64 block_size, offset; + + block_size = min_t(u64, size, i915_buddy_block_size(mm, block)); + offset = i915_buddy_block_offset(block); + + while (block_size) { + u64 len; + + if (offset != prev_end || sg->length >= max_segment) { + if (st->nents) + sg = __sg_next(sg); + + sg_dma_address(sg) = region_start + offset; + sg_dma_len(sg) = 0; + sg->length = 0; + st->nents++; + } + + len = min(block_size, max_segment - sg->length); + sg->length += len; + sg_dma_len(sg) += len; + + offset += len; + block_size -= len; + + prev_end = offset; + } + } + + sg_mark_end(sg); + i915_sg_trim(st); + + return st; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/scatterlist.c" #endif diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h index 5acca45ea981..b8bd5925b03f 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.h +++ b/drivers/gpu/drm/i915/i915_scatterlist.h @@ -14,6 +14,7 @@ #include "i915_gem.h" struct drm_mm_node; +struct ttm_resource; /* * Optimised SGL iterator for GEM objects @@ -145,4 +146,8 @@ bool i915_sg_trim(struct sg_table *orig_st); struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node, u64 region_start); + +struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res, + u64 region_start); + #endif -- cgit v1.2.3 From 38f28c0695c0413b701f67105bff2573c667492a Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 16 Jun 2021 16:24:57 +0100 Subject: drm/i915/ttm: Calculate the object placement at get_pages time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of relying on a static placement, calculate at get_pages() time. This should work for LMEM regions and system for now. For stolen we need to take preallocated range into account. That will if needed be added later. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210616152501.394518-3-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 93 ++++++++++++++++++++++++--------- drivers/gpu/drm/i915/intel_region_ttm.c | 8 ++- drivers/gpu/drm/i915/intel_region_ttm.h | 2 + 3 files changed, 77 insertions(+), 26 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index cdf36d3cf02a..d7595688e182 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -24,6 +24,11 @@ #define I915_TTM_PRIO_NO_PAGES 1 #define I915_TTM_PRIO_HAS_PAGES 2 +/* + * Size of struct ttm_place vector in on-stack struct ttm_placement allocs + */ +#define I915_TTM_MAX_PLACEMENTS INTEL_REGION_UNKNOWN + /** * struct i915_ttm_tt - TTM page vector with additional private information * @ttm: The base TTM page vector. @@ -42,36 +47,71 @@ struct i915_ttm_tt { struct sg_table *cached_st; }; -static const struct ttm_place lmem0_sys_placement_flags[] = { - { - .fpfn = 0, - .lpfn = 0, - .mem_type = I915_PL_LMEM0, - .flags = 0, - }, { - .fpfn = 0, - .lpfn = 0, - .mem_type = I915_PL_SYSTEM, - .flags = 0, - } -}; - -static struct ttm_placement i915_lmem0_placement = { - .num_placement = 1, - .placement = &lmem0_sys_placement_flags[0], - .num_busy_placement = 1, - .busy_placement = &lmem0_sys_placement_flags[0], +static const struct ttm_place sys_placement_flags = { + .fpfn = 0, + .lpfn = 0, + .mem_type = I915_PL_SYSTEM, + .flags = 0, }; static struct ttm_placement i915_sys_placement = { .num_placement = 1, - .placement = &lmem0_sys_placement_flags[1], + .placement = &sys_placement_flags, .num_busy_placement = 1, - .busy_placement = &lmem0_sys_placement_flags[1], + .busy_placement = &sys_placement_flags, }; static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj); +static enum ttm_caching +i915_ttm_select_tt_caching(const struct drm_i915_gem_object *obj) +{ + /* + * Objects only allowed in system get cached cpu-mappings. + * Other objects get WC mapping for now. Even if in system. + */ + if (obj->mm.region->type == INTEL_MEMORY_SYSTEM && + obj->mm.n_placements <= 1) + return ttm_cached; + + return ttm_write_combined; +} + +static void +i915_ttm_place_from_region(const struct intel_memory_region *mr, + struct ttm_place *place) +{ + memset(place, 0, sizeof(*place)); + place->mem_type = intel_region_to_ttm_type(mr); +} + +static void +i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj, + struct ttm_place *requested, + struct ttm_place *busy, + struct ttm_placement *placement) +{ + unsigned int num_allowed = obj->mm.n_placements; + unsigned int i; + + placement->num_placement = 1; + i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] : + obj->mm.region, requested); + + /* Cache this on object? */ + placement->num_busy_placement = num_allowed; + for (i = 0; i < placement->num_busy_placement; ++i) + i915_ttm_place_from_region(obj->mm.placements[i], busy + i); + + if (num_allowed == 0) { + *busy = *requested; + placement->num_busy_placement = 1; + } + + placement->placement = requested; + placement->busy_placement = busy; +} + static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, uint32_t page_flags) { @@ -89,7 +129,8 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, man->use_tt) page_flags |= TTM_PAGE_FLAG_ZERO_ALLOC; - ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, ttm_write_combined); + ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, + i915_ttm_select_tt_caching(obj)); if (ret) { kfree(i915_tt); return NULL; @@ -416,10 +457,15 @@ static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) .no_wait_gpu = false, }; struct sg_table *st; + struct ttm_place requested, busy[I915_TTM_MAX_PLACEMENTS]; + struct ttm_placement placement; int ret; + GEM_BUG_ON(obj->mm.n_placements > I915_TTM_MAX_PLACEMENTS); + /* Move to the requested placement. */ - ret = ttm_bo_validate(bo, &i915_lmem0_placement, &ctx); + i915_ttm_placement_from_obj(obj, &requested, busy, &placement); + ret = ttm_bo_validate(bo, &placement, &ctx); if (ret) return ret == -ENOSPC ? -ENXIO : ret; @@ -625,7 +671,6 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN); mutex_init(&obj->ttm.get_io_page.lock); - bo_type = (obj->flags & I915_BO_ALLOC_USER) ? ttm_bo_type_device : ttm_bo_type_kernel; diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 27fe0668d094..5a664f6cc93f 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -50,12 +50,16 @@ void intel_region_ttm_device_fini(struct drm_i915_private *dev_priv) * driver-private types for now, reserving TTM_PL_VRAM for stolen * memory and TTM_PL_TT for GGTT use if decided to implement this. */ -static int intel_region_to_ttm_type(struct intel_memory_region *mem) +int intel_region_to_ttm_type(const struct intel_memory_region *mem) { int type; GEM_BUG_ON(mem->type != INTEL_MEMORY_LOCAL && - mem->type != INTEL_MEMORY_MOCK); + mem->type != INTEL_MEMORY_MOCK && + mem->type != INTEL_MEMORY_SYSTEM); + + if (mem->type == INTEL_MEMORY_SYSTEM) + return TTM_PL_SYSTEM; type = mem->instance + TTM_PL_PRIV; GEM_BUG_ON(type >= TTM_NUM_MEM_TYPES); diff --git a/drivers/gpu/drm/i915/intel_region_ttm.h b/drivers/gpu/drm/i915/intel_region_ttm.h index e8cf830fda6f..649491844e79 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.h +++ b/drivers/gpu/drm/i915/intel_region_ttm.h @@ -28,6 +28,8 @@ struct sg_table *intel_region_ttm_node_to_st(struct intel_memory_region *mem, void intel_region_ttm_node_free(struct intel_memory_region *mem, struct ttm_resource *node); +int intel_region_to_ttm_type(const struct intel_memory_region *mem); + struct ttm_device_funcs *i915_ttm_driver(void); #ifdef CONFIG_DRM_I915_SELFTEST -- cgit v1.2.3 From beb6a22911ff6f7e933670b43e4bda5be56bd8f9 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 16 Jun 2021 16:24:58 +0100 Subject: drm/i915/ttm: pass along the I915_BO_ALLOC_CONTIGUOUS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we just ignore the I915_BO_ALLOC_CONTIGUOUS flag, which is fine since everything is already contiguous with the ttm range manager. However in the next patch we want to switch over to the ttm buddy manager, where allocations are by default not contiguous. v2(Thomas): - Forward ALLOC_CONTIG for all regions Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20210616152501.394518-4-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index d7595688e182..90d342e3df24 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -79,10 +79,14 @@ i915_ttm_select_tt_caching(const struct drm_i915_gem_object *obj) static void i915_ttm_place_from_region(const struct intel_memory_region *mr, - struct ttm_place *place) + struct ttm_place *place, + unsigned int flags) { memset(place, 0, sizeof(*place)); place->mem_type = intel_region_to_ttm_type(mr); + + if (flags & I915_BO_ALLOC_CONTIGUOUS) + place->flags = TTM_PL_FLAG_CONTIGUOUS; } static void @@ -92,16 +96,17 @@ i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj, struct ttm_placement *placement) { unsigned int num_allowed = obj->mm.n_placements; + unsigned int flags = obj->flags; unsigned int i; placement->num_placement = 1; i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] : - obj->mm.region, requested); + obj->mm.region, requested, flags); /* Cache this on object? */ placement->num_busy_placement = num_allowed; for (i = 0; i < placement->num_busy_placement; ++i) - i915_ttm_place_from_region(obj->mm.placements[i], busy + i); + i915_ttm_place_from_region(obj->mm.placements[i], busy + i, flags); if (num_allowed == 0) { *busy = *requested; -- cgit v1.2.3 From 687c7d0fcf8014a006416d7dc7474a101a85bf00 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 16 Jun 2021 16:24:59 +0100 Subject: drm/i915/ttm: remove node usage in our naming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that ttm_resource_manager just returns a generic ttm_resource we don't need to reference the mm_node stuff anymore which mostly only makes sense for drm_mm_node. In the next few patches we want switch over to the ttm_buddy_man which is just another type of ttm_resource so reflect that in the naming. Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20210616152501.394518-5-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 5 ++-- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 2 +- drivers/gpu/drm/i915/intel_region_ttm.c | 30 ++++++++++++------------ drivers/gpu/drm/i915/intel_region_ttm.h | 14 +++++------ drivers/gpu/drm/i915/selftests/mock_region.c | 16 ++++++------- 5 files changed, 34 insertions(+), 33 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 2a23b77424b3..3a2d9ecf8e03 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -265,9 +265,10 @@ struct drm_i915_gem_object { struct intel_memory_region *region; /** - * Memory manager node allocated for this object. + * Memory manager resource allocated for this object. Only + * needed for the mock region. */ - void *st_mm_node; + struct ttm_resource *res; /** * Element within memory_region->objects or region->purgeable diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 90d342e3df24..20119ea9c418 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -327,7 +327,7 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, if (man->use_tt) return i915_ttm_tt_get_st(bo->ttm); - return intel_region_ttm_node_to_st(obj->mm.region, res); + return intel_region_ttm_resource_to_st(obj->mm.region, res); } static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 5a664f6cc93f..f9d616544728 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -68,7 +68,7 @@ int intel_region_to_ttm_type(const struct intel_memory_region *mem) } static struct ttm_resource * -intel_region_ttm_node_reserve(struct intel_memory_region *mem, +intel_region_ttm_resource_reserve(struct intel_memory_region *mem, resource_size_t offset, resource_size_t size) { @@ -100,12 +100,12 @@ intel_region_ttm_node_reserve(struct intel_memory_region *mem, } /** - * intel_region_ttm_node_free - Free a node allocated from a resource manager - * @mem: The region the node was allocated from. - * @node: The opaque node representing an allocation. + * intel_region_ttm_resource_free - Free a resource allocated from a resource manager + * @mem: The region the resource was allocated from. + * @res: The opaque resource representing an allocation. */ -void intel_region_ttm_node_free(struct intel_memory_region *mem, - struct ttm_resource *res) +void intel_region_ttm_resource_free(struct intel_memory_region *mem, + struct ttm_resource *res) { struct ttm_resource_manager *man = mem->region_private; @@ -113,8 +113,8 @@ void intel_region_ttm_node_free(struct intel_memory_region *mem, } static const struct intel_memory_region_private_ops priv_ops = { - .reserve = intel_region_ttm_node_reserve, - .free = intel_region_ttm_node_free, + .reserve = intel_region_ttm_resource_reserve, + .free = intel_region_ttm_resource_free, }; int intel_region_ttm_init(struct intel_memory_region *mem) @@ -157,10 +157,10 @@ void intel_region_ttm_fini(struct intel_memory_region *mem) } /** - * intel_region_ttm_node_to_st - Convert an opaque TTM resource manager node + * intel_region_ttm_resource_to_st - Convert an opaque TTM resource manager resource * to an sg_table. * @mem: The memory region. - * @node: The resource manager node obtained from the TTM resource manager. + * @res: The resource manager resource obtained from the TTM resource manager. * * The gem backends typically use sg-tables for operations on the underlying * io_memory. So provide a way for the backends to translate the @@ -168,8 +168,8 @@ void intel_region_ttm_fini(struct intel_memory_region *mem) * * Return: A malloced sg_table on success, an error pointer on failure. */ -struct sg_table *intel_region_ttm_node_to_st(struct intel_memory_region *mem, - struct ttm_resource *res) +struct sg_table *intel_region_ttm_resource_to_st(struct intel_memory_region *mem, + struct ttm_resource *res) { struct ttm_range_mgr_node *range_node = container_of(res, typeof(*range_node), base); @@ -196,9 +196,9 @@ struct sg_table *intel_region_ttm_node_to_st(struct intel_memory_region *mem, * Return: A valid pointer on success, an error pointer on failure. */ struct ttm_resource * -intel_region_ttm_node_alloc(struct intel_memory_region *mem, - resource_size_t size, - unsigned int flags) +intel_region_ttm_resource_alloc(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) { struct ttm_resource_manager *man = mem->region_private; struct ttm_place place = {}; diff --git a/drivers/gpu/drm/i915/intel_region_ttm.h b/drivers/gpu/drm/i915/intel_region_ttm.h index 649491844e79..6f44075920f2 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.h +++ b/drivers/gpu/drm/i915/intel_region_ttm.h @@ -22,11 +22,11 @@ int intel_region_ttm_init(struct intel_memory_region *mem); void intel_region_ttm_fini(struct intel_memory_region *mem); -struct sg_table *intel_region_ttm_node_to_st(struct intel_memory_region *mem, - struct ttm_resource *res); +struct sg_table *intel_region_ttm_resource_to_st(struct intel_memory_region *mem, + struct ttm_resource *res); -void intel_region_ttm_node_free(struct intel_memory_region *mem, - struct ttm_resource *node); +void intel_region_ttm_resource_free(struct intel_memory_region *mem, + struct ttm_resource *res); int intel_region_to_ttm_type(const struct intel_memory_region *mem); @@ -34,8 +34,8 @@ struct ttm_device_funcs *i915_ttm_driver(void); #ifdef CONFIG_DRM_I915_SELFTEST struct ttm_resource * -intel_region_ttm_node_alloc(struct intel_memory_region *mem, - resource_size_t size, - unsigned int flags); +intel_region_ttm_resource_alloc(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags); #endif #endif /* _INTEL_REGION_TTM_H_ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index eafc5a04975c..6120d43fe504 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -16,7 +16,7 @@ static void mock_region_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) { - intel_region_ttm_node_free(obj->mm.region, obj->mm.st_mm_node); + intel_region_ttm_resource_free(obj->mm.region, obj->mm.res); sg_free_table(pages); kfree(pages); } @@ -30,15 +30,15 @@ static int mock_region_get_pages(struct drm_i915_gem_object *obj) if (obj->flags & I915_BO_ALLOC_CONTIGUOUS) flags |= I915_ALLOC_CONTIGUOUS; - obj->mm.st_mm_node = intel_region_ttm_node_alloc(obj->mm.region, - obj->base.size, - flags); - if (IS_ERR(obj->mm.st_mm_node)) - return PTR_ERR(obj->mm.st_mm_node); + obj->mm.res = intel_region_ttm_resource_alloc(obj->mm.region, + obj->base.size, + flags); + if (IS_ERR(obj->mm.res)) + return PTR_ERR(obj->mm.res); - pages = intel_region_ttm_node_to_st(obj->mm.region, obj->mm.st_mm_node); + pages = intel_region_ttm_resource_to_st(obj->mm.region, obj->mm.res); if (IS_ERR(pages)) { - intel_region_ttm_node_free(obj->mm.region, obj->mm.st_mm_node); + intel_region_ttm_resource_free(obj->mm.region, obj->mm.res); return PTR_ERR(pages); } -- cgit v1.2.3 From d53ec322dc7de32a59bf1c2a56b93e90fc2f1c28 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 16 Jun 2021 16:25:00 +0100 Subject: drm/i915/ttm: switch over to ttm_buddy_man MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move back to the buddy allocator for managing device local memory, and restore the lost mock selftests. Keep around the range manager related bits, since we likely need this for managing stolen at some point. For stolen we also don't need to reserve anything so no need to support a generic reserve interface. v2(Thomas): - bo->page_alignment is in page units, not bytes Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20210616152501.394518-6-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 20 +-- drivers/gpu/drm/i915/intel_memory_region.c | 55 +------ drivers/gpu/drm/i915/intel_memory_region.h | 17 --- drivers/gpu/drm/i915/intel_region_ttm.c | 122 ++++++--------- .../gpu/drm/i915/selftests/intel_memory_region.c | 170 ++++++++++++++------- drivers/gpu/drm/i915/selftests/mock_region.c | 12 +- 6 files changed, 180 insertions(+), 216 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 20119ea9c418..b8739f3d3462 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -175,11 +175,7 @@ static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo, struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); /* Will do for now. Our pinned objects are still on TTM's LRU lists */ - if (!i915_gem_object_evictable(obj)) - return false; - - /* This isn't valid with a buddy allocator */ - return ttm_bo_eviction_valuable(bo, place); + return i915_gem_object_evictable(obj); } static void i915_ttm_evict_flags(struct ttm_buffer_object *bo, @@ -654,20 +650,8 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, static struct lock_class_key lock_class; struct drm_i915_private *i915 = mem->i915; enum ttm_bo_type bo_type; - size_t alignment = 0; int ret; - /* Adjust alignment to GPU- and CPU huge page sizes. */ - - if (mem->is_range_manager) { - if (size >= SZ_1G) - alignment = SZ_1G >> PAGE_SHIFT; - else if (size >= SZ_2M) - alignment = SZ_2M >> PAGE_SHIFT; - else if (size >= SZ_64K) - alignment = SZ_64K >> PAGE_SHIFT; - } - drm_gem_private_object_init(&i915->drm, &obj->base, size); i915_gem_object_init(obj, &i915_gem_ttm_obj_ops, &lock_class, flags); i915_gem_object_init_memory_region(obj, mem); @@ -688,7 +672,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, */ obj->base.vma_node.driver_private = i915_gem_to_ttm(obj); ret = ttm_bo_init(&i915->bdev, i915_gem_to_ttm(obj), size, - bo_type, &i915_sys_placement, alignment, + bo_type, &i915_sys_placement, 1, true, NULL, NULL, i915_ttm_bo_destroy); if (!ret) diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index 12fb5423fd5e..df59f884d37c 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -5,6 +5,7 @@ #include "intel_memory_region.h" #include "i915_drv.h" +#include "i915_ttm_buddy_manager.h" static const struct { u16 class; @@ -28,11 +29,6 @@ static const struct { }, }; -struct intel_region_reserve { - struct list_head link; - struct ttm_resource *res; -}; - struct intel_memory_region * intel_memory_region_lookup(struct drm_i915_private *i915, u16 class, u16 instance) @@ -63,27 +59,6 @@ intel_memory_region_by_type(struct drm_i915_private *i915, return NULL; } -/** - * intel_memory_region_unreserve - Unreserve all previously reserved - * ranges - * @mem: The region containing the reserved ranges. - */ -void intel_memory_region_unreserve(struct intel_memory_region *mem) -{ - struct intel_region_reserve *reserve, *next; - - if (!mem->priv_ops || !mem->priv_ops->free) - return; - - mutex_lock(&mem->mm_lock); - list_for_each_entry_safe(reserve, next, &mem->reserved, link) { - list_del(&reserve->link); - mem->priv_ops->free(mem, reserve->res); - kfree(reserve); - } - mutex_unlock(&mem->mm_lock); -} - /** * intel_memory_region_reserve - Reserve a memory range * @mem: The region for which we want to reserve a range. @@ -96,28 +71,11 @@ int intel_memory_region_reserve(struct intel_memory_region *mem, resource_size_t offset, resource_size_t size) { - int ret; - struct intel_region_reserve *reserve; - - if (!mem->priv_ops || !mem->priv_ops->reserve) - return -EINVAL; - - reserve = kzalloc(sizeof(*reserve), GFP_KERNEL); - if (!reserve) - return -ENOMEM; + struct ttm_resource_manager *man = mem->region_private; - reserve->res = mem->priv_ops->reserve(mem, offset, size); - if (IS_ERR(reserve->res)) { - ret = PTR_ERR(reserve->res); - kfree(reserve); - return ret; - } - - mutex_lock(&mem->mm_lock); - list_add_tail(&reserve->link, &mem->reserved); - mutex_unlock(&mem->mm_lock); + GEM_BUG_ON(mem->is_range_manager); - return 0; + return i915_ttm_buddy_man_reserve(man, offset, size); } struct intel_memory_region * @@ -149,9 +107,6 @@ intel_memory_region_create(struct drm_i915_private *i915, mutex_init(&mem->objects.lock); INIT_LIST_HEAD(&mem->objects.list); - INIT_LIST_HEAD(&mem->reserved); - - mutex_init(&mem->mm_lock); if (ops->init) { err = ops->init(mem); @@ -182,11 +137,9 @@ static void __intel_memory_region_destroy(struct kref *kref) struct intel_memory_region *mem = container_of(kref, typeof(*mem), kref); - intel_memory_region_unreserve(mem); if (mem->ops->release) mem->ops->release(mem); - mutex_destroy(&mem->mm_lock); mutex_destroy(&mem->objects.lock); kfree(mem); } diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index c7e635d62e1a..b04fb22726d9 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -59,19 +59,10 @@ struct intel_memory_region_ops { unsigned int flags); }; -struct intel_memory_region_private_ops { - struct ttm_resource *(*reserve)(struct intel_memory_region *mem, - resource_size_t offset, - resource_size_t size); - void (*free)(struct intel_memory_region *mem, - struct ttm_resource *res); -}; - struct intel_memory_region { struct drm_i915_private *i915; const struct intel_memory_region_ops *ops; - const struct intel_memory_region_private_ops *priv_ops; struct io_mapping iomap; struct resource region; @@ -79,8 +70,6 @@ struct intel_memory_region { /* For fake LMEM */ struct drm_mm_node fake_mappable; - struct mutex mm_lock; - struct kref kref; resource_size_t io_start; @@ -94,8 +83,6 @@ struct intel_memory_region { char name[16]; bool private; /* not for userspace */ - struct list_head reserved; - dma_addr_t remap_addr; struct { @@ -103,8 +90,6 @@ struct intel_memory_region { struct list_head list; } objects; - size_t chunk_size; - unsigned int max_order; bool is_range_manager; void *region_private; @@ -138,8 +123,6 @@ __printf(2, 3) void intel_memory_region_set_name(struct intel_memory_region *mem, const char *fmt, ...); -void intel_memory_region_unreserve(struct intel_memory_region *mem); - int intel_memory_region_reserve(struct intel_memory_region *mem, resource_size_t offset, resource_size_t size); diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index f9d616544728..052253c81e98 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -8,6 +8,7 @@ #include "i915_drv.h" #include "i915_scatterlist.h" +#include "i915_ttm_buddy_manager.h" #include "intel_region_ttm.h" @@ -67,72 +68,28 @@ int intel_region_to_ttm_type(const struct intel_memory_region *mem) return type; } -static struct ttm_resource * -intel_region_ttm_resource_reserve(struct intel_memory_region *mem, - resource_size_t offset, - resource_size_t size) -{ - struct ttm_resource_manager *man = mem->region_private; - struct ttm_place place = {}; - struct ttm_buffer_object mock_bo = {}; - struct ttm_resource *res; - int ret; - - /* - * Having to use a mock_bo is unfortunate but stems from some - * drivers having private managers that insist to know what the - * allocate memory is intended for, using it to send private - * data to the manager. Also recently the bo has been used to send - * alignment info to the manager. Assume that apart from the latter, - * none of the managers we use will ever access the buffer object - * members, hoping we can pass the alignment info in the - * struct ttm_place in the future. - */ - - place.fpfn = offset >> PAGE_SHIFT; - place.lpfn = place.fpfn + (size >> PAGE_SHIFT); - mock_bo.base.size = size; - ret = man->func->alloc(man, &mock_bo, &place, &res); - if (ret == -ENOSPC) - ret = -ENXIO; - - return ret ? ERR_PTR(ret) : res; -} - /** - * intel_region_ttm_resource_free - Free a resource allocated from a resource manager - * @mem: The region the resource was allocated from. - * @res: The opaque resource representing an allocation. + * intel_region_ttm_init - Initialize a memory region for TTM. + * @mem: The region to initialize. + * + * This function initializes a suitable TTM resource manager for the + * region, and if it's a LMEM region type, attaches it to the TTM + * device. MOCK regions are NOT attached to the TTM device, since we don't + * have one for the mock selftests. + * + * Return: 0 on success, negative error code on failure. */ -void intel_region_ttm_resource_free(struct intel_memory_region *mem, - struct ttm_resource *res) -{ - struct ttm_resource_manager *man = mem->region_private; - - man->func->free(man, res); -} - -static const struct intel_memory_region_private_ops priv_ops = { - .reserve = intel_region_ttm_resource_reserve, - .free = intel_region_ttm_resource_free, -}; - int intel_region_ttm_init(struct intel_memory_region *mem) { struct ttm_device *bdev = &mem->i915->bdev; int mem_type = intel_region_to_ttm_type(mem); int ret; - ret = ttm_range_man_init(bdev, mem_type, false, - resource_size(&mem->region) >> PAGE_SHIFT); + ret = i915_ttm_buddy_man_init(bdev, mem_type, false, + resource_size(&mem->region), PAGE_SIZE); if (ret) return ret; - mem->chunk_size = PAGE_SIZE; - mem->max_order = - get_order(rounddown_pow_of_two(resource_size(&mem->region))); - mem->is_range_manager = true; - mem->priv_ops = &priv_ops; mem->region_private = ttm_manager_type(bdev, mem_type); return 0; @@ -150,8 +107,8 @@ void intel_region_ttm_fini(struct intel_memory_region *mem) { int ret; - ret = ttm_range_man_fini(&mem->i915->bdev, - intel_region_to_ttm_type(mem)); + ret = i915_ttm_buddy_man_fini(&mem->i915->bdev, + intel_region_to_ttm_type(mem)); GEM_WARN_ON(ret); mem->region_private = NULL; } @@ -171,12 +128,15 @@ void intel_region_ttm_fini(struct intel_memory_region *mem) struct sg_table *intel_region_ttm_resource_to_st(struct intel_memory_region *mem, struct ttm_resource *res) { - struct ttm_range_mgr_node *range_node = - container_of(res, typeof(*range_node), base); + if (mem->is_range_manager) { + struct ttm_range_mgr_node *range_node = + to_ttm_range_mgr_node(res); - GEM_WARN_ON(!mem->is_range_manager); - return i915_sg_from_mm_node(&range_node->mm_nodes[0], - mem->region.start); + return i915_sg_from_mm_node(&range_node->mm_nodes[0], + mem->region.start); + } else { + return i915_sg_from_buddy_resource(res, mem->region.start); + } } #ifdef CONFIG_DRM_I915_SELFTEST @@ -206,25 +166,35 @@ intel_region_ttm_resource_alloc(struct intel_memory_region *mem, struct ttm_resource *res; int ret; - /* - * We ignore the flags for now since we're using the range - * manager and contigous and min page size would be fulfilled - * by default if size is min page size aligned. - */ mock_bo.base.size = size; - - if (mem->is_range_manager) { - if (size >= SZ_1G) - mock_bo.page_alignment = SZ_1G >> PAGE_SHIFT; - else if (size >= SZ_2M) - mock_bo.page_alignment = SZ_2M >> PAGE_SHIFT; - else if (size >= SZ_64K) - mock_bo.page_alignment = SZ_64K >> PAGE_SHIFT; - } + mock_bo.page_alignment = 1; + place.flags = flags; ret = man->func->alloc(man, &mock_bo, &place, &res); if (ret == -ENOSPC) ret = -ENXIO; return ret ? ERR_PTR(ret) : res; } + #endif + +void intel_region_ttm_node_free(struct intel_memory_region *mem, + struct ttm_resource *res) +{ + struct ttm_resource_manager *man = mem->region_private; + + man->func->free(man, res); +} + +/** + * intel_region_ttm_resource_free - Free a resource allocated from a resource manager + * @mem: The region the resource was allocated from. + * @res: The opaque resource representing an allocation. + */ +void intel_region_ttm_resource_free(struct intel_memory_region *mem, + struct ttm_resource *res) +{ + struct ttm_resource_manager *man = mem->region_private; + + man->func->free(man, res); +} diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index c85d516b85cd..118a66c29695 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -20,7 +20,9 @@ #include "gem/selftests/mock_context.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" +#include "i915_buddy.h" #include "i915_memcpy.h" +#include "i915_ttm_buddy_manager.h" #include "selftests/igt_flush_test.h" #include "selftests/i915_random.h" @@ -57,10 +59,9 @@ static int igt_mock_fill(void *arg) LIST_HEAD(objects); int err = 0; - page_size = mem->chunk_size; + page_size = PAGE_SIZE; + max_pages = div64_u64(total, page_size); rem = total; -retry: - max_pages = div64_u64(rem, page_size); for_each_prime_number_from(page_num, 1, max_pages) { resource_size_t size = page_num * page_size; @@ -86,11 +87,6 @@ retry: err = 0; if (err == -ENXIO) { if (page_num * page_size <= rem) { - if (mem->is_range_manager && max_pages > 1) { - max_pages >>= 1; - goto retry; - } - pr_err("%s failed, space still left in region\n", __func__); err = -EINVAL; @@ -157,6 +153,7 @@ static bool is_contiguous(struct drm_i915_gem_object *obj) static int igt_mock_reserve(void *arg) { struct intel_memory_region *mem = arg; + struct drm_i915_private *i915 = mem->i915; resource_size_t avail = resource_size(&mem->region); struct drm_i915_gem_object *obj; const u32 chunk_size = SZ_32M; @@ -166,16 +163,18 @@ static int igt_mock_reserve(void *arg) LIST_HEAD(objects); int err = 0; - if (!list_empty(&mem->reserved)) { - pr_err("%s region reserved list is not empty\n", __func__); - return -EINVAL; - } - count = avail / chunk_size; order = i915_random_order(count, &prng); if (!order) return 0; + mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0); + if (IS_ERR(mem)) { + pr_err("failed to create memory region\n"); + err = PTR_ERR(mem); + goto out_close; + } + /* Reserve a bunch of ranges within the region */ for (i = 0; i < count; ++i) { u64 start = order[i] * chunk_size; @@ -205,18 +204,12 @@ static int igt_mock_reserve(void *arg) do { u32 size = i915_prandom_u32_max_state(cur_avail, &prng); -retry: size = max_t(u32, round_up(size, PAGE_SIZE), PAGE_SIZE); obj = igt_object_create(mem, &objects, size, 0); if (IS_ERR(obj)) { - if (PTR_ERR(obj) == -ENXIO) { - if (mem->is_range_manager && - size > mem->chunk_size) { - size >>= 1; - goto retry; - } + if (PTR_ERR(obj) == -ENXIO) break; - } + err = PTR_ERR(obj); goto out_close; } @@ -232,7 +225,7 @@ retry: out_close: kfree(order); close_objects(mem, &objects); - intel_memory_region_unreserve(mem); + intel_memory_region_put(mem); return err; } @@ -252,7 +245,7 @@ static int igt_mock_contiguous(void *arg) total = resource_size(&mem->region); /* Min size */ - obj = igt_object_create(mem, &objects, mem->chunk_size, + obj = igt_object_create(mem, &objects, PAGE_SIZE, I915_BO_ALLOC_CONTIGUOUS); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -333,17 +326,15 @@ static int igt_mock_contiguous(void *arg) min = target; target = total >> 1; - if (!mem->is_range_manager) { - /* Make sure we can still allocate all the fragmented space */ - obj = igt_object_create(mem, &objects, target, 0); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto err_close_objects; - } - - igt_object_release(obj); + /* Make sure we can still allocate all the fragmented space */ + obj = igt_object_create(mem, &objects, target, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err_close_objects; } + igt_object_release(obj); + /* * Even though we have enough free space, we don't have a big enough * contiguous block. Make sure that holds true. @@ -362,7 +353,7 @@ static int igt_mock_contiguous(void *arg) } target >>= 1; - } while (target >= mem->chunk_size); + } while (target >= PAGE_SIZE); err_close_objects: list_splice_tail(&holes, &objects); @@ -374,7 +365,9 @@ static int igt_mock_splintered_region(void *arg) { struct intel_memory_region *mem = arg; struct drm_i915_private *i915 = mem->i915; + struct i915_ttm_buddy_resource *res; struct drm_i915_gem_object *obj; + struct i915_buddy_mm *mm; unsigned int expected_order; LIST_HEAD(objects); u64 size; @@ -382,7 +375,7 @@ static int igt_mock_splintered_region(void *arg) /* * Sanity check we can still allocate everything even if the - * max_order != mm.size. i.e our starting address space size is not a + * mm.max_order != mm.size. i.e our starting address space size is not a * power-of-two. */ @@ -391,20 +384,29 @@ static int igt_mock_splintered_region(void *arg) if (IS_ERR(mem)) return PTR_ERR(mem); - expected_order = get_order(rounddown_pow_of_two(size)); - if (mem->max_order != expected_order) { - pr_err("%s order mismatch(%u != %u)\n", - __func__, mem->max_order, expected_order); - err = -EINVAL; - goto out_put; - } - obj = igt_object_create(mem, &objects, size, 0); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto out_close; } + res = to_ttm_buddy_resource(obj->mm.res); + mm = res->mm; + if (mm->size != size) { + pr_err("%s size mismatch(%llu != %llu)\n", + __func__, mm->size, size); + err = -EINVAL; + goto out_put; + } + + expected_order = get_order(rounddown_pow_of_two(size)); + if (mm->max_order != expected_order) { + pr_err("%s order mismatch(%u != %u)\n", + __func__, mm->max_order, expected_order); + err = -EINVAL; + goto out_put; + } + close_objects(mem, &objects); /* @@ -415,15 +417,12 @@ static int igt_mock_splintered_region(void *arg) * sure that does indeed hold true. */ - if (!mem->is_range_manager) { - obj = igt_object_create(mem, &objects, size, - I915_BO_ALLOC_CONTIGUOUS); - if (!IS_ERR(obj)) { - pr_err("%s too large contiguous allocation was not rejected\n", - __func__); - err = -EINVAL; - goto out_close; - } + obj = igt_object_create(mem, &objects, size, I915_BO_ALLOC_CONTIGUOUS); + if (!IS_ERR(obj)) { + pr_err("%s too large contiguous allocation was not rejected\n", + __func__); + err = -EINVAL; + goto out_close; } obj = igt_object_create(mem, &objects, rounddown_pow_of_two(size), @@ -442,6 +441,74 @@ out_put: return err; } +#ifndef SZ_8G +#define SZ_8G BIT_ULL(33) +#endif + +static int igt_mock_max_segment(void *arg) +{ + const unsigned int max_segment = rounddown(UINT_MAX, PAGE_SIZE); + struct intel_memory_region *mem = arg; + struct drm_i915_private *i915 = mem->i915; + struct i915_ttm_buddy_resource *res; + struct drm_i915_gem_object *obj; + struct i915_buddy_block *block; + struct i915_buddy_mm *mm; + struct list_head *blocks; + struct scatterlist *sg; + LIST_HEAD(objects); + u64 size; + int err = 0; + + /* + * While we may create very large contiguous blocks, we may need + * to break those down for consumption elsewhere. In particular, + * dma-mapping with scatterlist elements have an implicit limit of + * UINT_MAX on each element. + */ + + size = SZ_8G; + mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0); + if (IS_ERR(mem)) + return PTR_ERR(mem); + + obj = igt_object_create(mem, &objects, size, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_put; + } + + res = to_ttm_buddy_resource(obj->mm.res); + blocks = &res->blocks; + mm = res->mm; + size = 0; + list_for_each_entry(block, blocks, link) { + if (i915_buddy_block_size(mm, block) > size) + size = i915_buddy_block_size(mm, block); + } + if (size < max_segment) { + pr_err("%s: Failed to create a huge contiguous block [> %u], largest block %lld\n", + __func__, max_segment, size); + err = -EINVAL; + goto out_close; + } + + for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) { + if (sg->length > max_segment) { + pr_err("%s: Created an oversized scatterlist entry, %u > %u\n", + __func__, sg->length, max_segment); + err = -EINVAL; + goto out_close; + } + } + +out_close: + close_objects(mem, &objects); +out_put: + intel_memory_region_put(mem); + return err; +} + static int igt_gpu_write_dw(struct intel_context *ce, struct i915_vma *vma, u32 dword, @@ -1046,6 +1113,7 @@ int intel_memory_region_mock_selftests(void) SUBTEST(igt_mock_fill), SUBTEST(igt_mock_contiguous), SUBTEST(igt_mock_splintered_region), + SUBTEST(igt_mock_max_segment), }; struct intel_memory_region *mem; struct drm_i915_private *i915; diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index 6120d43fe504..3b3264311c91 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -3,6 +3,7 @@ * Copyright © 2019-2021 Intel Corporation */ +#include #include #include @@ -25,10 +26,11 @@ static int mock_region_get_pages(struct drm_i915_gem_object *obj) { unsigned int flags; struct sg_table *pages; + int err; flags = I915_ALLOC_MIN_PAGE_SIZE; if (obj->flags & I915_BO_ALLOC_CONTIGUOUS) - flags |= I915_ALLOC_CONTIGUOUS; + flags |= TTM_PL_FLAG_CONTIGUOUS; obj->mm.res = intel_region_ttm_resource_alloc(obj->mm.region, obj->base.size, @@ -38,13 +40,17 @@ static int mock_region_get_pages(struct drm_i915_gem_object *obj) pages = intel_region_ttm_resource_to_st(obj->mm.region, obj->mm.res); if (IS_ERR(pages)) { - intel_region_ttm_resource_free(obj->mm.region, obj->mm.res); - return PTR_ERR(pages); + err = PTR_ERR(pages); + goto err_free_resource; } __i915_gem_object_set_pages(obj, pages, i915_sg_dma_sizes(pages->sgl)); return 0; + +err_free_resource: + intel_region_ttm_resource_free(obj->mm.region, obj->mm.res); + return err; } static const struct drm_i915_gem_object_ops mock_region_obj_ops = { -- cgit v1.2.3 From 13c2ceb6addb6b14468e09b75832c98909eed8e7 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 16 Jun 2021 16:25:01 +0100 Subject: drm/i915/ttm: restore min_page_size behaviour MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We now have bo->page_alignment which perfectly describes what we need if we have min page size restrictions for lmem. We can also drop the flag here, since this is the default behaviour for all objects. v2(Thomas): - bo->page_alignment is in page units Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20210616152501.394518-7-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 4 ++-- drivers/gpu/drm/i915/intel_memory_region.h | 3 +-- drivers/gpu/drm/i915/intel_region_ttm.c | 2 +- drivers/gpu/drm/i915/selftests/mock_region.c | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index b8739f3d3462..9366b18d1bc6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -672,9 +672,9 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, */ obj->base.vma_node.driver_private = i915_gem_to_ttm(obj); ret = ttm_bo_init(&i915->bdev, i915_gem_to_ttm(obj), size, - bo_type, &i915_sys_placement, 1, + bo_type, &i915_sys_placement, + mem->min_page_size >> PAGE_SHIFT, true, NULL, NULL, i915_ttm_bo_destroy); - if (!ret) obj->ttm.created = true; diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index b04fb22726d9..2be8433d373a 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -40,8 +40,7 @@ enum intel_region_id { #define REGION_STOLEN_SMEM BIT(INTEL_REGION_STOLEN_SMEM) #define REGION_STOLEN_LMEM BIT(INTEL_REGION_STOLEN_LMEM) -#define I915_ALLOC_MIN_PAGE_SIZE BIT(0) -#define I915_ALLOC_CONTIGUOUS BIT(1) +#define I915_ALLOC_CONTIGUOUS BIT(0) #define for_each_memory_region(mr, i915, id) \ for (id = 0; id < ARRAY_SIZE((i915)->mm.regions); id++) \ diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 052253c81e98..d53d78dec2be 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -167,7 +167,7 @@ intel_region_ttm_resource_alloc(struct intel_memory_region *mem, int ret; mock_bo.base.size = size; - mock_bo.page_alignment = 1; + mock_bo.page_alignment = mem->min_page_size >> PAGE_SHIFT; place.flags = flags; ret = man->func->alloc(man, &mock_bo, &place, &res); diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index 3b3264311c91..fa786dede608 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -28,7 +28,7 @@ static int mock_region_get_pages(struct drm_i915_gem_object *obj) struct sg_table *pages; int err; - flags = I915_ALLOC_MIN_PAGE_SIZE; + flags = 0; if (obj->flags & I915_BO_ALLOC_CONTIGUOUS) flags |= TTM_PL_FLAG_CONTIGUOUS; -- cgit v1.2.3 From dc2408d86e5ae88ec981e1315f95b7d4d15169fe Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 16 Jun 2021 11:03:50 +0200 Subject: drm/i915/gem: Remove duplicated call to ops->pread MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Between commit ae30af84edb5b7cc95485922e43afd909a892e1b Author: Maarten Lankhorst Date: Tue Mar 23 16:50:00 2021 +0100 drm/i915: Disable userptr pread/pwrite support. and commit 0049b688459b846f819b6e51c24cd0781fcfde41 Author: Matthew Auld Date: Thu Nov 5 15:49:33 2020 +0000 drm/i915/gem: Allow backends to override pread implementation this accidentally landed twice. Reviewed-by: Matthew Auld Cc: Matthew Auld Cc: Jason Ekstrand Cc: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210616090350.828696-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/i915_gem.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6a0a3f0e36e1..07aa80773a02 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -469,12 +469,6 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, if (ret != -ENODEV) goto out; - ret = -ENODEV; - if (obj->ops->pread) - ret = obj->ops->pread(obj, args); - if (ret != -ENODEV) - goto out; - ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); -- cgit v1.2.3 From a6c5b891252143ee9bc02f649282905954d6705c Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 17 Jun 2021 09:37:19 +0100 Subject: drm/i915/ttm: remove unused function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_region_ttm_node_free is no longer used. Also fixup the related kerneldoc. Reported-by: kernel test robot Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20210617083719.497619-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/intel_region_ttm.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index d53d78dec2be..4cd10f364e84 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -141,7 +141,7 @@ struct sg_table *intel_region_ttm_resource_to_st(struct intel_memory_region *mem #ifdef CONFIG_DRM_I915_SELFTEST /** - * intel_region_ttm_node_alloc - Allocate memory resources from a region + * intel_region_ttm_resource_alloc - Allocate memory resources from a region * @mem: The memory region, * @size: The requested size in bytes * @flags: Allocation flags @@ -150,8 +150,8 @@ struct sg_table *intel_region_ttm_resource_to_st(struct intel_memory_region *mem * memory from standalone TTM range managers, without the TTM eviction * functionality. Don't use if you are not completely sure that's the * case. The returned opaque node can be converted to an sg_table using - * intel_region_ttm_node_to_st(), and can be freed using - * intel_region_ttm_node_free(). + * intel_region_ttm_resource_to_st(), and can be freed using + * intel_region_ttm_resource_free(). * * Return: A valid pointer on success, an error pointer on failure. */ @@ -178,14 +178,6 @@ intel_region_ttm_resource_alloc(struct intel_memory_region *mem, #endif -void intel_region_ttm_node_free(struct intel_memory_region *mem, - struct ttm_resource *res) -{ - struct ttm_resource_manager *man = mem->region_private; - - man->func->free(man, res); -} - /** * intel_region_ttm_resource_free - Free a resource allocated from a resource manager * @mem: The region the resource was allocated from. -- cgit v1.2.3 From 1c4dbe056dab0b7c2a2f42f4d393cc7b9bdb98ad Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Thu, 17 Jun 2021 08:30:07 +0200 Subject: drm/i915: Reference objects on the ww object list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the ww transaction endpoint easily end up far out-of-scope of the objects on the ww object list, particularly for contending lock objects, make sure we reference objects on the list so they don't disappear under us. This comes with a performance penalty so it's been debated whether this is really needed. But I think this is motivated by the fact that locking is typically difficult to get right, and whatever we can do to make it simpler for developers moving forward should be done, unless the performance impact is far too high. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210617063018.92802-2-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_object.h | 8 ++++++-- drivers/gpu/drm/i915/i915_gem.c | 4 ++++ 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index d66aa00d023a..241666931945 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -169,13 +169,17 @@ static inline int __i915_gem_object_lock(struct drm_i915_gem_object *obj, else ret = dma_resv_lock(obj->base.resv, ww ? &ww->ctx : NULL); - if (!ret && ww) + if (!ret && ww) { + i915_gem_object_get(obj); list_add_tail(&obj->obj_link, &ww->obj_list); + } if (ret == -EALREADY) ret = 0; - if (ret == -EDEADLK) + if (ret == -EDEADLK) { + i915_gem_object_get(obj); ww->contended = obj; + } return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 07aa80773a02..0a815e15ffc7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1216,6 +1216,7 @@ static void i915_gem_ww_ctx_unlock_all(struct i915_gem_ww_ctx *ww) while ((obj = list_first_entry_or_null(&ww->obj_list, struct drm_i915_gem_object, obj_link))) { list_del(&obj->obj_link); i915_gem_object_unlock(obj); + i915_gem_object_put(obj); } } @@ -1223,6 +1224,7 @@ void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj) { list_del(&obj->obj_link); i915_gem_object_unlock(obj); + i915_gem_object_put(obj); } void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ww) @@ -1247,6 +1249,8 @@ int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ww) if (!ret) list_add_tail(&ww->contended->obj_link, &ww->obj_list); + else + i915_gem_object_put(ww->contended); ww->contended = NULL; -- cgit v1.2.3 From 5c43ec5d538a5fa1736d298e63a3f8ac03009eea Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Thu, 17 Jun 2021 08:30:08 +0200 Subject: drm/i915: Break out dma_resv ww locking utilities to separate files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As we're about to add more ww-related functionality, break out the dma_resv ww locking utilities to their own files Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210617063018.92802-3-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gem/i915_gem_object.h | 1 + drivers/gpu/drm/i915/gt/intel_renderstate.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 56 ------------------------- drivers/gpu/drm/i915/i915_gem.h | 12 ------ drivers/gpu/drm/i915/i915_gem_ww.c | 63 +++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_ww.h | 21 ++++++++++ 7 files changed, 87 insertions(+), 68 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_gem_ww.c create mode 100644 drivers/gpu/drm/i915/i915_gem_ww.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 6c84e96ab26a..dde698f3bff4 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -166,6 +166,7 @@ i915-y += \ i915_cmd_parser.o \ i915_gem_evict.o \ i915_gem_gtt.o \ + i915_gem_ww.o \ i915_gem.o \ i915_globals.o \ i915_query.o \ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 241666931945..7bf4dd46d8d2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -14,6 +14,7 @@ #include "display/intel_frontbuffer.h" #include "i915_gem_object_types.h" #include "i915_gem_gtt.h" +#include "i915_gem_ww.h" #include "i915_vma_types.h" /* diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.h b/drivers/gpu/drm/i915/gt/intel_renderstate.h index 48f009203917..4da4c5234ef0 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.h +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.h @@ -8,6 +8,7 @@ #include #include "i915_gem.h" +#include "i915_gem_ww.h" struct i915_request; struct intel_context; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0a815e15ffc7..590efc8b0265 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1201,62 +1201,6 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) return ret; } -void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ww, bool intr) -{ - ww_acquire_init(&ww->ctx, &reservation_ww_class); - INIT_LIST_HEAD(&ww->obj_list); - ww->intr = intr; - ww->contended = NULL; -} - -static void i915_gem_ww_ctx_unlock_all(struct i915_gem_ww_ctx *ww) -{ - struct drm_i915_gem_object *obj; - - while ((obj = list_first_entry_or_null(&ww->obj_list, struct drm_i915_gem_object, obj_link))) { - list_del(&obj->obj_link); - i915_gem_object_unlock(obj); - i915_gem_object_put(obj); - } -} - -void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj) -{ - list_del(&obj->obj_link); - i915_gem_object_unlock(obj); - i915_gem_object_put(obj); -} - -void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ww) -{ - i915_gem_ww_ctx_unlock_all(ww); - WARN_ON(ww->contended); - ww_acquire_fini(&ww->ctx); -} - -int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ww) -{ - int ret = 0; - - if (WARN_ON(!ww->contended)) - return -EINVAL; - - i915_gem_ww_ctx_unlock_all(ww); - if (ww->intr) - ret = dma_resv_lock_slow_interruptible(ww->contended->base.resv, &ww->ctx); - else - dma_resv_lock_slow(ww->contended->base.resv, &ww->ctx); - - if (!ret) - list_add_tail(&ww->contended->obj_link, &ww->obj_list); - else - i915_gem_object_put(ww->contended); - - ww->contended = NULL; - - return ret; -} - #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_gem_device.c" #include "selftests/i915_gem.c" diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index 440c35f1abc9..d0752e5553db 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -123,16 +123,4 @@ static inline bool __tasklet_is_scheduled(struct tasklet_struct *t) return test_bit(TASKLET_STATE_SCHED, &t->state); } -struct i915_gem_ww_ctx { - struct ww_acquire_ctx ctx; - struct list_head obj_list; - bool intr; - struct drm_i915_gem_object *contended; -}; - -void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ctx, bool intr); -void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ctx); -int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ctx); -void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj); - #endif /* __I915_GEM_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem_ww.c b/drivers/gpu/drm/i915/i915_gem_ww.c new file mode 100644 index 000000000000..3f6ff139478e --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_ww.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ +#include +#include "i915_gem_ww.h" +#include "gem/i915_gem_object.h" + +void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ww, bool intr) +{ + ww_acquire_init(&ww->ctx, &reservation_ww_class); + INIT_LIST_HEAD(&ww->obj_list); + ww->intr = intr; + ww->contended = NULL; +} + +static void i915_gem_ww_ctx_unlock_all(struct i915_gem_ww_ctx *ww) +{ + struct drm_i915_gem_object *obj; + + while ((obj = list_first_entry_or_null(&ww->obj_list, struct drm_i915_gem_object, obj_link))) { + list_del(&obj->obj_link); + i915_gem_object_unlock(obj); + i915_gem_object_put(obj); + } +} + +void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj) +{ + list_del(&obj->obj_link); + i915_gem_object_unlock(obj); + i915_gem_object_put(obj); +} + +void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ww) +{ + i915_gem_ww_ctx_unlock_all(ww); + WARN_ON(ww->contended); + ww_acquire_fini(&ww->ctx); +} + +int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ww) +{ + int ret = 0; + + if (WARN_ON(!ww->contended)) + return -EINVAL; + + i915_gem_ww_ctx_unlock_all(ww); + if (ww->intr) + ret = dma_resv_lock_slow_interruptible(ww->contended->base.resv, &ww->ctx); + else + dma_resv_lock_slow(ww->contended->base.resv, &ww->ctx); + + if (!ret) + list_add_tail(&ww->contended->obj_link, &ww->obj_list); + else + i915_gem_object_put(ww->contended); + + ww->contended = NULL; + + return ret; +} diff --git a/drivers/gpu/drm/i915/i915_gem_ww.h b/drivers/gpu/drm/i915/i915_gem_ww.h new file mode 100644 index 000000000000..f2d8769e4118 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_ww.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ +#ifndef __I915_GEM_WW_H__ +#define __I915_GEM_WW_H__ + +#include + +struct i915_gem_ww_ctx { + struct ww_acquire_ctx ctx; + struct list_head obj_list; + struct drm_i915_gem_object *contended; + bool intr; +}; + +void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ctx, bool intr); +void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ctx); +int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ctx); +void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj); +#endif -- cgit v1.2.3 From 3b86eb82dc2c9989ca6e53cb597a2a1390c64d12 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Thu, 17 Jun 2021 08:30:09 +0200 Subject: drm/i915: Introduce a ww transaction helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a for_i915_gem_ww(){} utility to help make the code around a ww transaction more readable. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210617063018.92802-4-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/i915_gem_ww.h | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_gem_ww.h b/drivers/gpu/drm/i915/i915_gem_ww.h index f2d8769e4118..f6b1a796667b 100644 --- a/drivers/gpu/drm/i915/i915_gem_ww.h +++ b/drivers/gpu/drm/i915/i915_gem_ww.h @@ -11,11 +11,40 @@ struct i915_gem_ww_ctx { struct ww_acquire_ctx ctx; struct list_head obj_list; struct drm_i915_gem_object *contended; - bool intr; + unsigned short intr; + unsigned short loop; }; void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ctx, bool intr); void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ctx); int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ctx); void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj); + +/* Internal functions used by the inlines! Don't use. */ +static inline int __i915_gem_ww_fini(struct i915_gem_ww_ctx *ww, int err) +{ + ww->loop = 0; + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(ww); + if (!err) + ww->loop = 1; + } + + if (!ww->loop) + i915_gem_ww_ctx_fini(ww); + + return err; +} + +static inline void +__i915_gem_ww_init(struct i915_gem_ww_ctx *ww, bool intr) +{ + i915_gem_ww_ctx_init(ww, intr); + ww->loop = 1; +} + +#define for_i915_gem_ww(_ww, _err, _intr) \ + for (__i915_gem_ww_init(_ww, _intr); (_ww)->loop; \ + _err = __i915_gem_ww_fini(_ww, _err)) + #endif -- cgit v1.2.3 From 0dcd6fdf3b4be20995d8d35198ac252833613ab9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 17 Jun 2021 08:30:10 +0200 Subject: drm/i915/gt: Add an insert_entry for gen8_ppgtt In the next patch, we will want to write a PTE for an explicit dma address, outside of the usual vma. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210617063018.92802-5-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 21c8b7350b7a..1b676d7700bf 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -555,6 +555,24 @@ static void gen8_ppgtt_insert(struct i915_address_space *vm, } } +static void gen8_ppgtt_insert_entry(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 flags) +{ + u64 idx = offset >> GEN8_PTE_SHIFT; + struct i915_page_directory * const pdp = + gen8_pdp_for_page_index(vm, idx); + struct i915_page_directory *pd = + i915_pd_entry(pdp, gen8_pd_index(idx, 2)); + gen8_pte_t *vaddr; + + vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1))); + vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags); + clflush_cache_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr)); +} + static int gen8_init_scratch(struct i915_address_space *vm) { u32 pte_flags; @@ -734,6 +752,7 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt) ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND; ppgtt->vm.insert_entries = gen8_ppgtt_insert; + ppgtt->vm.insert_page = gen8_ppgtt_insert_entry; ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; ppgtt->vm.clear_range = gen8_ppgtt_clear; -- cgit v1.2.3 From 3607e1e9ba7553e39b175fa14d10a48677083607 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 17 Jun 2021 08:30:11 +0200 Subject: drm/i915/gt: Add a routine to iterate over the pagetables of a GTT In the next patch, we will want to look at the dma addresses of individual page tables, so add a routine to iterate over them. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210617063018.92802-6-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 49 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_gtt.h | 7 ++++++ 2 files changed, 56 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 1b676d7700bf..3d02c726c746 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -361,6 +361,54 @@ static void gen8_ppgtt_alloc(struct i915_address_space *vm, &start, start + length, vm->top); } +static void __gen8_ppgtt_foreach(struct i915_address_space *vm, + struct i915_page_directory *pd, + u64 *start, u64 end, int lvl, + void (*fn)(struct i915_address_space *vm, + struct i915_page_table *pt, + void *data), + void *data) +{ + unsigned int idx, len; + + len = gen8_pd_range(*start, end, lvl--, &idx); + + spin_lock(&pd->lock); + do { + struct i915_page_table *pt = pd->entry[idx]; + + atomic_inc(&pt->used); + spin_unlock(&pd->lock); + + if (lvl) { + __gen8_ppgtt_foreach(vm, as_pd(pt), start, end, lvl, + fn, data); + } else { + fn(vm, pt, data); + *start += gen8_pt_count(*start, end); + } + + spin_lock(&pd->lock); + atomic_dec(&pt->used); + } while (idx++, --len); + spin_unlock(&pd->lock); +} + +static void gen8_ppgtt_foreach(struct i915_address_space *vm, + u64 start, u64 length, + void (*fn)(struct i915_address_space *vm, + struct i915_page_table *pt, + void *data), + void *data) +{ + start >>= GEN8_PTE_SHIFT; + length >>= GEN8_PTE_SHIFT; + + __gen8_ppgtt_foreach(vm, i915_vm_to_ppgtt(vm)->pd, + &start, start + length, vm->top, + fn, data); +} + static __always_inline u64 gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, struct i915_page_directory *pdp, @@ -755,6 +803,7 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt) ppgtt->vm.insert_page = gen8_ppgtt_insert_entry; ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; ppgtt->vm.clear_range = gen8_ppgtt_clear; + ppgtt->vm.foreach = gen8_ppgtt_foreach; ppgtt->vm.pte_encode = gen8_pte_encode; diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index edea95b97c36..9bd89f2a01ff 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -296,6 +296,13 @@ struct i915_address_space { u32 flags); void (*cleanup)(struct i915_address_space *vm); + void (*foreach)(struct i915_address_space *vm, + u64 start, u64 length, + void (*fn)(struct i915_address_space *vm, + struct i915_page_table *pt, + void *data), + void *data); + struct i915_vma_ops vma_ops; I915_SELFTEST_DECLARE(struct fault_attr fault_attr); -- cgit v1.2.3 From b4ef95309110122e05b0d17310b80a0abde881d9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 17 Jun 2021 08:30:12 +0200 Subject: drm/i915/gt: Export the pinned context constructor and destructor Allow internal clients to create and destroy a pinned context. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210617063018.92802-7-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_engine.h | 11 +++++++++++ drivers/gpu/drm/i915/gt/intel_engine_cs.c | 27 +++++++++++++++++---------- 2 files changed, 28 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 8d9184920c51..36ea9eb52bb5 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -19,7 +19,9 @@ #include "intel_workarounds.h" struct drm_printer; +struct intel_context; struct intel_gt; +struct lock_class_key; /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, * but keeps the logic simple. Indeed, the whole purpose of this macro is just @@ -256,6 +258,15 @@ struct i915_request * intel_engine_find_active_request(struct intel_engine_cs *engine); u32 intel_engine_context_size(struct intel_gt *gt, u8 class); +struct intel_context * +intel_engine_create_pinned_context(struct intel_engine_cs *engine, + struct i915_address_space *vm, + unsigned int ring_size, + unsigned int hwsp, + struct lock_class_key *key, + const char *name); + +void intel_engine_destroy_pinned_context(struct intel_context *ce); void intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 9ceddfbb1687..fcbaad18ac91 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -810,11 +810,13 @@ intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) #endif } -static struct intel_context * -create_pinned_context(struct intel_engine_cs *engine, - unsigned int hwsp, - struct lock_class_key *key, - const char *name) +struct intel_context * +intel_engine_create_pinned_context(struct intel_engine_cs *engine, + struct i915_address_space *vm, + unsigned int ring_size, + unsigned int hwsp, + struct lock_class_key *key, + const char *name) { struct intel_context *ce; int err; @@ -825,6 +827,10 @@ create_pinned_context(struct intel_engine_cs *engine, __set_bit(CONTEXT_BARRIER_BIT, &ce->flags); ce->timeline = page_pack_bits(NULL, hwsp); + ce->ring = __intel_context_ring_size(ring_size); + + i915_vm_put(ce->vm); + ce->vm = i915_vm_get(vm); err = intel_context_pin(ce); /* perma-pin so it is always available */ if (err) { @@ -843,7 +849,7 @@ create_pinned_context(struct intel_engine_cs *engine, return ce; } -static void destroy_pinned_context(struct intel_context *ce) +void intel_engine_destroy_pinned_context(struct intel_context *ce) { struct intel_engine_cs *engine = ce->engine; struct i915_vma *hwsp = engine->status_page.vma; @@ -863,8 +869,9 @@ create_kernel_context(struct intel_engine_cs *engine) { static struct lock_class_key kernel; - return create_pinned_context(engine, I915_GEM_HWS_SEQNO_ADDR, - &kernel, "kernel_context"); + return intel_engine_create_pinned_context(engine, engine->gt->vm, SZ_4K, + I915_GEM_HWS_SEQNO_ADDR, + &kernel, "kernel_context"); } /** @@ -907,7 +914,7 @@ static int engine_init_common(struct intel_engine_cs *engine) return 0; err_context: - destroy_pinned_context(ce); + intel_engine_destroy_pinned_context(ce); return ret; } @@ -969,7 +976,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) fput(engine->default_state); if (engine->kernel_context) - destroy_pinned_context(engine->kernel_context); + intel_engine_destroy_pinned_context(engine->kernel_context); GEM_BUG_ON(!llist_empty(&engine->barrier_tasks)); cleanup_status_page(engine); -- cgit v1.2.3 From cf586021642d8017cde111b7dd1ba86224e9da51 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 17 Jun 2021 08:30:13 +0200 Subject: drm/i915/gt: Pipelined page migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we pipeline the PTE updates and then do the copy of those pages within a single unpreemptible command packet, we can submit the copies and leave them to be scheduled without having to synchronously wait under a global lock. In order to manage migration, we need to preallocate the page tables (and keep them pinned and available for use at any time), causing a bottleneck for migrations as all clients must contend on the limited resources. By inlining the ppGTT updates and performing the blit atomically, each client only owns the PTE while in use, and so we can reschedule individual operations however we see fit. And most importantly, we do not need to take a global lock on the shared vm, and wait until the operation is complete before releasing the lock for others to claim the PTE for themselves. Signed-off-by: Chris Wilson Co-developed-by: Thomas Hellström Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210617063018.92802-8-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gt/intel_engine.h | 1 + drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 2 + drivers/gpu/drm/i915/gt/intel_migrate.c | 542 +++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_migrate.h | 45 ++ drivers/gpu/drm/i915/gt/intel_migrate_types.h | 15 + drivers/gpu/drm/i915/gt/intel_ring.h | 1 + drivers/gpu/drm/i915/gt/selftest_migrate.c | 291 +++++++++++ .../gpu/drm/i915/selftests/i915_live_selftests.h | 1 + 9 files changed, 899 insertions(+) create mode 100644 drivers/gpu/drm/i915/gt/intel_migrate.c create mode 100644 drivers/gpu/drm/i915/gt/intel_migrate.h create mode 100644 drivers/gpu/drm/i915/gt/intel_migrate_types.h create mode 100644 drivers/gpu/drm/i915/gt/selftest_migrate.c (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index dde698f3bff4..5e10e0628c56 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -108,6 +108,7 @@ gt-y += \ gt/intel_gtt.o \ gt/intel_llc.o \ gt/intel_lrc.o \ + gt/intel_migrate.o \ gt/intel_mocs.o \ gt/intel_ppgtt.o \ gt/intel_rc6.o \ diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 36ea9eb52bb5..62f7440bc111 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -188,6 +188,7 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) #define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT * sizeof(u32)) #define I915_GEM_HWS_SEQNO 0x40 #define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32)) +#define I915_GEM_HWS_MIGRATE (0x42 * sizeof(u32)) #define I915_GEM_HWS_SCRATCH 0x80 #define I915_HWS_CSB_BUF0_INDEX 0x10 diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index 2694dbb9967e..1c3af0fc0456 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -123,8 +123,10 @@ #define MI_SEMAPHORE_SAD_NEQ_SDD (5 << 12) #define MI_SEMAPHORE_TOKEN_MASK REG_GENMASK(9, 5) #define MI_SEMAPHORE_TOKEN_SHIFT 5 +#define MI_STORE_DATA_IMM MI_INSTR(0x20, 0) #define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1) #define MI_STORE_DWORD_IMM_GEN4 MI_INSTR(0x20, 2) +#define MI_STORE_QWORD_IMM_GEN8 (MI_INSTR(0x20, 3) | REG_BIT(21)) #define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */ #define MI_USE_GGTT (1 << 22) /* g4x+ */ #define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1) diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c new file mode 100644 index 000000000000..e2e860063e7b --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -0,0 +1,542 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_context.h" +#include "intel_gpu_commands.h" +#include "intel_gt.h" +#include "intel_gtt.h" +#include "intel_migrate.h" +#include "intel_ring.h" + +struct insert_pte_data { + u64 offset; + bool is_lmem; +}; + +#define CHUNK_SZ SZ_8M /* ~1ms at 8GiB/s preemption delay */ + +static bool engine_supports_migration(struct intel_engine_cs *engine) +{ + if (!engine) + return false; + + /* + * We need the ability to prevent aribtration (MI_ARB_ON_OFF), + * the ability to write PTE using inline data (MI_STORE_DATA) + * and of course the ability to do the block transfer (blits). + */ + GEM_BUG_ON(engine->class != COPY_ENGINE_CLASS); + + return true; +} + +static void insert_pte(struct i915_address_space *vm, + struct i915_page_table *pt, + void *data) +{ + struct insert_pte_data *d = data; + + vm->insert_page(vm, px_dma(pt), d->offset, I915_CACHE_NONE, + d->is_lmem ? PTE_LM : 0); + d->offset += PAGE_SIZE; +} + +static struct i915_address_space *migrate_vm(struct intel_gt *gt) +{ + struct i915_vm_pt_stash stash = {}; + struct i915_ppgtt *vm; + int err; + int i; + + /* + * We construct a very special VM for use by all migration contexts, + * it is kept pinned so that it can be used at any time. As we need + * to pre-allocate the page directories for the migration VM, this + * limits us to only using a small number of prepared vma. + * + * To be able to pipeline and reschedule migration operations while + * avoiding unnecessary contention on the vm itself, the PTE updates + * are inline with the blits. All the blits use the same fixed + * addresses, with the backing store redirection being updated on the + * fly. Only 2 implicit vma are used for all migration operations. + * + * We lay the ppGTT out as: + * + * [0, CHUNK_SZ) -> first object + * [CHUNK_SZ, 2 * CHUNK_SZ) -> second object + * [2 * CHUNK_SZ, 2 * CHUNK_SZ + 2 * CHUNK_SZ >> 9] -> PTE + * + * By exposing the dma addresses of the page directories themselves + * within the ppGTT, we are then able to rewrite the PTE prior to use. + * But the PTE update and subsequent migration operation must be atomic, + * i.e. within the same non-preemptible window so that we do not switch + * to another migration context that overwrites the PTE. + * + * TODO: Add support for huge LMEM PTEs + */ + + vm = i915_ppgtt_create(gt); + if (IS_ERR(vm)) + return ERR_CAST(vm); + + if (!vm->vm.allocate_va_range || !vm->vm.foreach) { + err = -ENODEV; + goto err_vm; + } + + /* + * Each engine instance is assigned its own chunk in the VM, so + * that we can run multiple instances concurrently + */ + for (i = 0; i < ARRAY_SIZE(gt->engine_class[COPY_ENGINE_CLASS]); i++) { + struct intel_engine_cs *engine; + u64 base = (u64)i << 32; + struct insert_pte_data d = {}; + struct i915_gem_ww_ctx ww; + u64 sz; + + engine = gt->engine_class[COPY_ENGINE_CLASS][i]; + if (!engine_supports_migration(engine)) + continue; + + /* + * We copy in 8MiB chunks. Each PDE covers 2MiB, so we need + * 4x2 page directories for source/destination. + */ + sz = 2 * CHUNK_SZ; + d.offset = base + sz; + + /* + * We need another page directory setup so that we can write + * the 8x512 PTE in each chunk. + */ + sz += (sz >> 12) * sizeof(u64); + + err = i915_vm_alloc_pt_stash(&vm->vm, &stash, sz); + if (err) + goto err_vm; + + for_i915_gem_ww(&ww, err, true) { + err = i915_vm_lock_objects(&vm->vm, &ww); + if (err) + continue; + err = i915_vm_map_pt_stash(&vm->vm, &stash); + if (err) + continue; + + vm->vm.allocate_va_range(&vm->vm, &stash, base, sz); + } + i915_vm_free_pt_stash(&vm->vm, &stash); + if (err) + goto err_vm; + + /* Now allow the GPU to rewrite the PTE via its own ppGTT */ + d.is_lmem = i915_gem_object_is_lmem(vm->vm.scratch[0]); + vm->vm.foreach(&vm->vm, base, base + sz, insert_pte, &d); + } + + return &vm->vm; + +err_vm: + i915_vm_put(&vm->vm); + return ERR_PTR(err); +} + +static struct intel_engine_cs *first_copy_engine(struct intel_gt *gt) +{ + struct intel_engine_cs *engine; + int i; + + for (i = 0; i < ARRAY_SIZE(gt->engine_class[COPY_ENGINE_CLASS]); i++) { + engine = gt->engine_class[COPY_ENGINE_CLASS][i]; + if (engine_supports_migration(engine)) + return engine; + } + + return NULL; +} + +static struct intel_context *pinned_context(struct intel_gt *gt) +{ + static struct lock_class_key key; + struct intel_engine_cs *engine; + struct i915_address_space *vm; + struct intel_context *ce; + + engine = first_copy_engine(gt); + if (!engine) + return ERR_PTR(-ENODEV); + + vm = migrate_vm(gt); + if (IS_ERR(vm)) + return ERR_CAST(vm); + + ce = intel_engine_create_pinned_context(engine, vm, SZ_512K, + I915_GEM_HWS_MIGRATE, + &key, "migrate"); + i915_vm_put(ce->vm); + return ce; +} + +int intel_migrate_init(struct intel_migrate *m, struct intel_gt *gt) +{ + struct intel_context *ce; + + memset(m, 0, sizeof(*m)); + + ce = pinned_context(gt); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + m->context = ce; + return 0; +} + +static int random_index(unsigned int max) +{ + return upper_32_bits(mul_u32_u32(get_random_u32(), max)); +} + +static struct intel_context *__migrate_engines(struct intel_gt *gt) +{ + struct intel_engine_cs *engines[MAX_ENGINE_INSTANCE]; + struct intel_engine_cs *engine; + unsigned int count, i; + + count = 0; + for (i = 0; i < ARRAY_SIZE(gt->engine_class[COPY_ENGINE_CLASS]); i++) { + engine = gt->engine_class[COPY_ENGINE_CLASS][i]; + if (engine_supports_migration(engine)) + engines[count++] = engine; + } + + return intel_context_create(engines[random_index(count)]); +} + +struct intel_context *intel_migrate_create_context(struct intel_migrate *m) +{ + struct intel_context *ce; + + /* + * We randomly distribute contexts across the engines upon constrction, + * as they all share the same pinned vm, and so in order to allow + * multiple blits to run in parallel, we must construct each blit + * to use a different range of the vm for its GTT. This has to be + * known at construction, so we can not use the late greedy load + * balancing of the virtual-engine. + */ + ce = __migrate_engines(m->context->engine->gt); + if (IS_ERR(ce)) + return ce; + + ce->ring = __intel_context_ring_size(SZ_256K); + + i915_vm_put(ce->vm); + ce->vm = i915_vm_get(m->context->vm); + + return ce; +} + +static inline struct sgt_dma sg_sgt(struct scatterlist *sg) +{ + dma_addr_t addr = sg_dma_address(sg); + + return (struct sgt_dma){ sg, addr, addr + sg_dma_len(sg) }; +} + +static int emit_no_arbitration(struct i915_request *rq) +{ + u32 *cs; + + cs = intel_ring_begin(rq, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* Explicitly disable preemption for this request. */ + *cs++ = MI_ARB_ON_OFF; + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + return 0; +} + +static int emit_pte(struct i915_request *rq, + struct sgt_dma *it, + enum i915_cache_level cache_level, + bool is_lmem, + u64 offset, + int length) +{ + const u64 encode = rq->context->vm->pte_encode(0, cache_level, + is_lmem ? PTE_LM : 0); + struct intel_ring *ring = rq->ring; + int total = 0; + u32 *hdr, *cs; + int pkt; + + GEM_BUG_ON(INTEL_GEN(rq->engine->i915) < 8); + + /* Compute the page directory offset for the target address range */ + offset += (u64)rq->engine->instance << 32; + offset >>= 12; + offset *= sizeof(u64); + offset += 2 * CHUNK_SZ; + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* Pack as many PTE updates as possible into a single MI command */ + pkt = min_t(int, 0x400, ring->space / sizeof(u32) + 5); + pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); + + hdr = cs; + *cs++ = MI_STORE_DATA_IMM | REG_BIT(21); /* as qword elements */ + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + + do { + if (cs - hdr >= pkt) { + *hdr += cs - hdr - 2; + *cs++ = MI_NOOP; + + ring->emit = (void *)cs - ring->vaddr; + intel_ring_advance(rq, cs); + intel_ring_update_space(ring); + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + pkt = min_t(int, 0x400, ring->space / sizeof(u32) + 5); + pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); + + hdr = cs; + *cs++ = MI_STORE_DATA_IMM | REG_BIT(21); + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + } + + *cs++ = lower_32_bits(encode | it->dma); + *cs++ = upper_32_bits(encode | it->dma); + + offset += 8; + total += I915_GTT_PAGE_SIZE; + + it->dma += I915_GTT_PAGE_SIZE; + if (it->dma >= it->max) { + it->sg = __sg_next(it->sg); + if (!it->sg || sg_dma_len(it->sg) == 0) + break; + + it->dma = sg_dma_address(it->sg); + it->max = it->dma + sg_dma_len(it->sg); + } + } while (total < length); + + *hdr += cs - hdr - 2; + *cs++ = MI_NOOP; + + ring->emit = (void *)cs - ring->vaddr; + intel_ring_advance(rq, cs); + intel_ring_update_space(ring); + + return total; +} + +static bool wa_1209644611_applies(int gen, u32 size) +{ + u32 height = size >> PAGE_SHIFT; + + if (gen != 11) + return false; + + return height % 4 == 3 && height <= 8; +} + +static int emit_copy(struct i915_request *rq, int size) +{ + const int gen = INTEL_GEN(rq->engine->i915); + u32 instance = rq->engine->instance; + u32 *cs; + + cs = intel_ring_begin(rq, gen >= 8 ? 10 : 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + if (gen >= 9 && !wa_1209644611_applies(gen, size)) { + *cs++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2); + *cs++ = BLT_DEPTH_32 | PAGE_SIZE; + *cs++ = 0; + *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cs++ = CHUNK_SZ; /* dst offset */ + *cs++ = instance; + *cs++ = 0; + *cs++ = PAGE_SIZE; + *cs++ = 0; /* src offset */ + *cs++ = instance; + } else if (gen >= 8) { + *cs++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2); + *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; + *cs++ = 0; + *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cs++ = CHUNK_SZ; /* dst offset */ + *cs++ = instance; + *cs++ = 0; + *cs++ = PAGE_SIZE; + *cs++ = 0; /* src offset */ + *cs++ = instance; + } else { + GEM_BUG_ON(instance); + *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); + *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; + *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE; + *cs++ = CHUNK_SZ; /* dst offset */ + *cs++ = PAGE_SIZE; + *cs++ = 0; /* src offset */ + } + + intel_ring_advance(rq, cs); + return 0; +} + +int +intel_context_migrate_copy(struct intel_context *ce, + struct dma_fence *await, + struct scatterlist *src, + enum i915_cache_level src_cache_level, + bool src_is_lmem, + struct scatterlist *dst, + enum i915_cache_level dst_cache_level, + bool dst_is_lmem, + struct i915_request **out) +{ + struct sgt_dma it_src = sg_sgt(src), it_dst = sg_sgt(dst); + struct i915_request *rq; + int err; + + *out = NULL; + + GEM_BUG_ON(ce->ring->size < SZ_64K); + + do { + int len; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_ce; + } + + if (await) { + err = i915_request_await_dma_fence(rq, await); + if (err) + goto out_rq; + + if (rq->engine->emit_init_breadcrumb) { + err = rq->engine->emit_init_breadcrumb(rq); + if (err) + goto out_rq; + } + + await = NULL; + } + + /* The PTE updates + copy must not be interrupted. */ + err = emit_no_arbitration(rq); + if (err) + goto out_rq; + + len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem, 0, + CHUNK_SZ); + if (len <= 0) { + err = len; + goto out_rq; + } + + err = emit_pte(rq, &it_dst, dst_cache_level, dst_is_lmem, + CHUNK_SZ, len); + if (err < 0) + goto out_rq; + if (err < len) { + err = -EINVAL; + goto out_rq; + } + + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto out_rq; + + err = emit_copy(rq, len); + + /* Arbitration is re-enabled between requests. */ +out_rq: + if (*out) + i915_request_put(*out); + *out = i915_request_get(rq); + i915_request_add(rq); + if (err || !it_src.sg || !sg_dma_len(it_src.sg)) + break; + + cond_resched(); + } while (1); + +out_ce: + return err; +} + +int intel_migrate_copy(struct intel_migrate *m, + struct i915_gem_ww_ctx *ww, + struct dma_fence *await, + struct scatterlist *src, + enum i915_cache_level src_cache_level, + bool src_is_lmem, + struct scatterlist *dst, + enum i915_cache_level dst_cache_level, + bool dst_is_lmem, + struct i915_request **out) +{ + struct intel_context *ce; + int err; + + *out = NULL; + if (!m->context) + return -ENODEV; + + ce = intel_migrate_create_context(m); + if (IS_ERR(ce)) + ce = intel_context_get(m->context); + GEM_BUG_ON(IS_ERR(ce)); + + err = intel_context_pin_ww(ce, ww); + if (err) + goto out; + + err = intel_context_migrate_copy(ce, await, + src, src_cache_level, src_is_lmem, + dst, dst_cache_level, dst_is_lmem, + out); + + intel_context_unpin(ce); +out: + intel_context_put(ce); + return err; +} + +void intel_migrate_fini(struct intel_migrate *m) +{ + struct intel_context *ce; + + ce = fetch_and_zero(&m->context); + if (!ce) + return; + + intel_engine_destroy_pinned_context(ce); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_migrate.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.h b/drivers/gpu/drm/i915/gt/intel_migrate.h new file mode 100644 index 000000000000..32c61190ed73 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_migrate.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef __INTEL_MIGRATE__ +#define __INTEL_MIGRATE__ + +#include "intel_migrate_types.h" + +struct dma_fence; +struct i915_request; +struct i915_gem_ww_ctx; +struct intel_gt; +struct scatterlist; +enum i915_cache_level; + +int intel_migrate_init(struct intel_migrate *m, struct intel_gt *gt); + +struct intel_context *intel_migrate_create_context(struct intel_migrate *m); + +int intel_migrate_copy(struct intel_migrate *m, + struct i915_gem_ww_ctx *ww, + struct dma_fence *await, + struct scatterlist *src, + enum i915_cache_level src_cache_level, + bool src_is_lmem, + struct scatterlist *dst, + enum i915_cache_level dst_cache_level, + bool dst_is_lmem, + struct i915_request **out); + +int intel_context_migrate_copy(struct intel_context *ce, + struct dma_fence *await, + struct scatterlist *src, + enum i915_cache_level src_cache_level, + bool src_is_lmem, + struct scatterlist *dst, + enum i915_cache_level dst_cache_level, + bool dst_is_lmem, + struct i915_request **out); + +void intel_migrate_fini(struct intel_migrate *m); + +#endif /* __INTEL_MIGRATE__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_migrate_types.h b/drivers/gpu/drm/i915/gt/intel_migrate_types.h new file mode 100644 index 000000000000..d98230597f42 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_migrate_types.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef __INTEL_MIGRATE_TYPES__ +#define __INTEL_MIGRATE_TYPES__ + +struct intel_context; + +struct intel_migrate { + struct intel_context *context; +}; + +#endif /* __INTEL_MIGRATE_TYPES__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h index dbf5f14a136f..1b32dadfb8c3 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.h +++ b/drivers/gpu/drm/i915/gt/intel_ring.h @@ -49,6 +49,7 @@ static inline void intel_ring_advance(struct i915_request *rq, u32 *cs) * intel_ring_begin()). */ GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs); + GEM_BUG_ON(!IS_ALIGNED(rq->ring->emit, 8)); /* RING_TAIL qword align */ } static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos) diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c new file mode 100644 index 000000000000..9784d149ebf1 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -0,0 +1,291 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include "selftests/i915_random.h" + +static const unsigned int sizes[] = { + SZ_4K, + SZ_64K, + SZ_2M, + CHUNK_SZ - SZ_4K, + CHUNK_SZ, + CHUNK_SZ + SZ_4K, + SZ_64M, +}; + +static struct drm_i915_gem_object * +create_lmem_or_internal(struct drm_i915_private *i915, size_t size) +{ + if (HAS_LMEM(i915)) { + struct drm_i915_gem_object *obj; + + obj = i915_gem_object_create_lmem(i915, size, 0); + if (!IS_ERR(obj)) + return obj; + } + + return i915_gem_object_create_internal(i915, size); +} + +static int copy(struct intel_migrate *migrate, + int (*fn)(struct intel_migrate *migrate, + struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object *src, + struct drm_i915_gem_object *dst, + struct i915_request **out), + u32 sz, struct rnd_state *prng) +{ + struct drm_i915_private *i915 = migrate->context->engine->i915; + struct drm_i915_gem_object *src, *dst; + struct i915_request *rq; + struct i915_gem_ww_ctx ww; + u32 *vaddr; + int err = 0; + int i; + + src = create_lmem_or_internal(i915, sz); + if (IS_ERR(src)) + return 0; + + dst = i915_gem_object_create_internal(i915, sz); + if (IS_ERR(dst)) + goto err_free_src; + + for_i915_gem_ww(&ww, err, true) { + err = i915_gem_object_lock(src, &ww); + if (err) + continue; + + err = i915_gem_object_lock(dst, &ww); + if (err) + continue; + + vaddr = i915_gem_object_pin_map(src, I915_MAP_WC); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + continue; + } + + for (i = 0; i < sz / sizeof(u32); i++) + vaddr[i] = i; + i915_gem_object_flush_map(src); + + vaddr = i915_gem_object_pin_map(dst, I915_MAP_WC); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto unpin_src; + } + + for (i = 0; i < sz / sizeof(u32); i++) + vaddr[i] = ~i; + i915_gem_object_flush_map(dst); + + err = fn(migrate, &ww, src, dst, &rq); + if (!err) + continue; + + if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS) + pr_err("%ps failed, size: %u\n", fn, sz); + if (rq) { + i915_request_wait(rq, 0, HZ); + i915_request_put(rq); + } + i915_gem_object_unpin_map(dst); +unpin_src: + i915_gem_object_unpin_map(src); + } + if (err) + goto err_out; + + if (rq) { + if (i915_request_wait(rq, 0, HZ) < 0) { + pr_err("%ps timed out, size: %u\n", fn, sz); + err = -ETIME; + } + i915_request_put(rq); + } + + for (i = 0; !err && i < sz / PAGE_SIZE; i++) { + int x = i * 1024 + i915_prandom_u32_max_state(1024, prng); + + if (vaddr[x] != x) { + pr_err("%ps failed, size: %u, offset: %zu\n", + fn, sz, x * sizeof(u32)); + igt_hexdump(vaddr + i * 1024, 4096); + err = -EINVAL; + } + } + + i915_gem_object_unpin_map(dst); + i915_gem_object_unpin_map(src); + +err_out: + i915_gem_object_put(dst); +err_free_src: + i915_gem_object_put(src); + + return err; +} + +static int __migrate_copy(struct intel_migrate *migrate, + struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object *src, + struct drm_i915_gem_object *dst, + struct i915_request **out) +{ + return intel_migrate_copy(migrate, ww, NULL, + src->mm.pages->sgl, src->cache_level, + i915_gem_object_is_lmem(src), + dst->mm.pages->sgl, dst->cache_level, + i915_gem_object_is_lmem(dst), + out); +} + +static int __global_copy(struct intel_migrate *migrate, + struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object *src, + struct drm_i915_gem_object *dst, + struct i915_request **out) +{ + return intel_context_migrate_copy(migrate->context, NULL, + src->mm.pages->sgl, src->cache_level, + i915_gem_object_is_lmem(src), + dst->mm.pages->sgl, dst->cache_level, + i915_gem_object_is_lmem(dst), + out); +} + +static int +migrate_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) +{ + return copy(migrate, __migrate_copy, sz, prng); +} + +static int +global_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) +{ + return copy(migrate, __global_copy, sz, prng); +} + +static int live_migrate_copy(void *arg) +{ + struct intel_migrate *migrate = arg; + struct drm_i915_private *i915 = migrate->context->engine->i915; + I915_RND_STATE(prng); + int i; + + for (i = 0; i < ARRAY_SIZE(sizes); i++) { + int err; + + err = migrate_copy(migrate, sizes[i], &prng); + if (err == 0) + err = global_copy(migrate, sizes[i], &prng); + i915_gem_drain_freed_objects(i915); + if (err) + return err; + } + + return 0; +} + +struct threaded_migrate { + struct intel_migrate *migrate; + struct task_struct *tsk; + struct rnd_state prng; +}; + +static int threaded_migrate(struct intel_migrate *migrate, + int (*fn)(void *arg), + unsigned int flags) +{ + const unsigned int n_cpus = num_online_cpus() + 1; + struct threaded_migrate *thread; + I915_RND_STATE(prng); + unsigned int i; + int err = 0; + + thread = kcalloc(n_cpus, sizeof(*thread), GFP_KERNEL); + if (!thread) + return 0; + + for (i = 0; i < n_cpus; ++i) { + struct task_struct *tsk; + + thread[i].migrate = migrate; + thread[i].prng = + I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng)); + + tsk = kthread_run(fn, &thread[i], "igt-%d", i); + if (IS_ERR(tsk)) { + err = PTR_ERR(tsk); + break; + } + + get_task_struct(tsk); + thread[i].tsk = tsk; + } + + msleep(10); /* start all threads before we kthread_stop() */ + + for (i = 0; i < n_cpus; ++i) { + struct task_struct *tsk = thread[i].tsk; + int status; + + if (IS_ERR_OR_NULL(tsk)) + continue; + + status = kthread_stop(tsk); + if (status && !err) + err = status; + + put_task_struct(tsk); + } + + kfree(thread); + return err; +} + +static int __thread_migrate_copy(void *arg) +{ + struct threaded_migrate *tm = arg; + + return migrate_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng); +} + +static int thread_migrate_copy(void *arg) +{ + return threaded_migrate(arg, __thread_migrate_copy, 0); +} + +static int __thread_global_copy(void *arg) +{ + struct threaded_migrate *tm = arg; + + return global_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng); +} + +static int thread_global_copy(void *arg) +{ + return threaded_migrate(arg, __thread_global_copy, 0); +} + +int intel_migrate_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_migrate_copy), + SUBTEST(thread_migrate_copy), + SUBTEST(thread_global_copy), + }; + struct intel_migrate m; + int err; + + if (intel_migrate_init(&m, &i915->gt)) + return 0; + + err = i915_subtests(tests, &m); + intel_migrate_fini(&m); + + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index a92c0e9b7e6b..be5e0191eaea 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -26,6 +26,7 @@ selftest(gt_mocs, intel_mocs_live_selftests) selftest(gt_pm, intel_gt_pm_live_selftests) selftest(gt_heartbeat, intel_heartbeat_live_selftests) selftest(requests, i915_request_live_selftests) +selftest(migrate, intel_migrate_live_selftests) selftest(active, i915_active_live_selftests) selftest(objects, i915_gem_object_live_selftests) selftest(mman, i915_gem_mman_live_selftests) -- cgit v1.2.3 From 563baae1875cbcac332086cca325cf55a0532b9e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 17 Jun 2021 08:30:14 +0200 Subject: drm/i915/gt: Pipelined clear MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update the PTE and emit a clear within a single unpreemptible packet such that we can schedule and pipeline clears. Signed-off-by: Chris Wilson Co-developed-by: Thomas Hellström Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210617063018.92802-9-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_migrate.c | 143 +++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_migrate.h | 20 ++++ drivers/gpu/drm/i915/gt/selftest_migrate.c | 163 +++++++++++++++++++++++++++++ 3 files changed, 326 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index e2e860063e7b..ba4009120b33 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -488,6 +488,114 @@ out_ce: return err; } +static int emit_clear(struct i915_request *rq, int size, u32 value) +{ + const int gen = INTEL_GEN(rq->engine->i915); + u32 instance = rq->engine->instance; + u32 *cs; + + GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); + + cs = intel_ring_begin(rq, gen >= 8 ? 8 : 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + if (gen >= 8) { + *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2); + *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; + *cs++ = 0; + *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cs++ = 0; /* offset */ + *cs++ = instance; + *cs++ = value; + *cs++ = MI_NOOP; + } else { + GEM_BUG_ON(instance); + *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); + *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; + *cs++ = 0; + *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cs++ = 0; + *cs++ = value; + } + + intel_ring_advance(rq, cs); + return 0; +} + +int +intel_context_migrate_clear(struct intel_context *ce, + struct dma_fence *await, + struct scatterlist *sg, + enum i915_cache_level cache_level, + bool is_lmem, + u32 value, + struct i915_request **out) +{ + struct sgt_dma it = sg_sgt(sg); + struct i915_request *rq; + int err; + + *out = NULL; + + GEM_BUG_ON(ce->ring->size < SZ_64K); + + do { + int len; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_ce; + } + + if (await) { + err = i915_request_await_dma_fence(rq, await); + if (err) + goto out_rq; + + if (rq->engine->emit_init_breadcrumb) { + err = rq->engine->emit_init_breadcrumb(rq); + if (err) + goto out_rq; + } + + await = NULL; + } + + /* The PTE updates + clear must not be interrupted. */ + err = emit_no_arbitration(rq); + if (err) + goto out_rq; + + len = emit_pte(rq, &it, cache_level, is_lmem, 0, CHUNK_SZ); + if (len <= 0) { + err = len; + goto out_rq; + } + + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto out_rq; + + err = emit_clear(rq, len, value); + + /* Arbitration is re-enabled between requests. */ +out_rq: + if (*out) + i915_request_put(*out); + *out = i915_request_get(rq); + i915_request_add(rq); + if (err || !it.sg || !sg_dma_len(it.sg)) + break; + + cond_resched(); + } while (1); + +out_ce: + return err; +} + int intel_migrate_copy(struct intel_migrate *m, struct i915_gem_ww_ctx *ww, struct dma_fence *await, @@ -526,6 +634,41 @@ out: return err; } +int +intel_migrate_clear(struct intel_migrate *m, + struct i915_gem_ww_ctx *ww, + struct dma_fence *await, + struct scatterlist *sg, + enum i915_cache_level cache_level, + bool is_lmem, + u32 value, + struct i915_request **out) +{ + struct intel_context *ce; + int err; + + *out = NULL; + if (!m->context) + return -ENODEV; + + ce = intel_migrate_create_context(m); + if (IS_ERR(ce)) + ce = intel_context_get(m->context); + GEM_BUG_ON(IS_ERR(ce)); + + err = intel_context_pin_ww(ce, ww); + if (err) + goto out; + + err = intel_context_migrate_clear(ce, await, sg, cache_level, + is_lmem, value, out); + + intel_context_unpin(ce); +out: + intel_context_put(ce); + return err; +} + void intel_migrate_fini(struct intel_migrate *m) { struct intel_context *ce; diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.h b/drivers/gpu/drm/i915/gt/intel_migrate.h index 32c61190ed73..4e18e755a00b 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.h +++ b/drivers/gpu/drm/i915/gt/intel_migrate.h @@ -6,6 +6,8 @@ #ifndef __INTEL_MIGRATE__ #define __INTEL_MIGRATE__ +#include + #include "intel_migrate_types.h" struct dma_fence; @@ -40,6 +42,24 @@ int intel_context_migrate_copy(struct intel_context *ce, bool dst_is_lmem, struct i915_request **out); +int +intel_migrate_clear(struct intel_migrate *m, + struct i915_gem_ww_ctx *ww, + struct dma_fence *await, + struct scatterlist *sg, + enum i915_cache_level cache_level, + bool is_lmem, + u32 value, + struct i915_request **out); +int +intel_context_migrate_clear(struct intel_context *ce, + struct dma_fence *await, + struct scatterlist *sg, + enum i915_cache_level cache_level, + bool is_lmem, + u32 value, + struct i915_request **out); + void intel_migrate_fini(struct intel_migrate *m); #endif /* __INTEL_MIGRATE__ */ diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c index 9784d149ebf1..159c8656e1b0 100644 --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -129,6 +129,82 @@ err_free_src: return err; } +static int clear(struct intel_migrate *migrate, + int (*fn)(struct intel_migrate *migrate, + struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object *obj, + u32 value, + struct i915_request **out), + u32 sz, struct rnd_state *prng) +{ + struct drm_i915_private *i915 = migrate->context->engine->i915; + struct drm_i915_gem_object *obj; + struct i915_request *rq; + struct i915_gem_ww_ctx ww; + u32 *vaddr; + int err = 0; + int i; + + obj = create_lmem_or_internal(i915, sz); + if (IS_ERR(obj)) + return 0; + + for_i915_gem_ww(&ww, err, true) { + err = i915_gem_object_lock(obj, &ww); + if (err) + continue; + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + continue; + } + + for (i = 0; i < sz / sizeof(u32); i++) + vaddr[i] = ~i; + i915_gem_object_flush_map(obj); + + err = fn(migrate, &ww, obj, sz, &rq); + if (!err) + continue; + + if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS) + pr_err("%ps failed, size: %u\n", fn, sz); + if (rq) { + i915_request_wait(rq, 0, HZ); + i915_request_put(rq); + } + i915_gem_object_unpin_map(obj); + } + if (err) + goto err_out; + + if (rq) { + if (i915_request_wait(rq, 0, HZ) < 0) { + pr_err("%ps timed out, size: %u\n", fn, sz); + err = -ETIME; + } + i915_request_put(rq); + } + + for (i = 0; !err && i < sz / PAGE_SIZE; i++) { + int x = i * 1024 + i915_prandom_u32_max_state(1024, prng); + + if (vaddr[x] != sz) { + pr_err("%ps failed, size: %u, offset: %zu\n", + fn, sz, x * sizeof(u32)); + igt_hexdump(vaddr + i * 1024, 4096); + err = -EINVAL; + } + } + + i915_gem_object_unpin_map(obj); +err_out: + i915_gem_object_put(obj); + + return err; +} + static int __migrate_copy(struct intel_migrate *migrate, struct i915_gem_ww_ctx *ww, struct drm_i915_gem_object *src, @@ -169,6 +245,44 @@ global_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) return copy(migrate, __global_copy, sz, prng); } +static int __migrate_clear(struct intel_migrate *migrate, + struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object *obj, + u32 value, + struct i915_request **out) +{ + return intel_migrate_clear(migrate, ww, NULL, + obj->mm.pages->sgl, + obj->cache_level, + i915_gem_object_is_lmem(obj), + value, out); +} + +static int __global_clear(struct intel_migrate *migrate, + struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object *obj, + u32 value, + struct i915_request **out) +{ + return intel_context_migrate_clear(migrate->context, NULL, + obj->mm.pages->sgl, + obj->cache_level, + i915_gem_object_is_lmem(obj), + value, out); +} + +static int +migrate_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) +{ + return clear(migrate, __migrate_clear, sz, prng); +} + +static int +global_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) +{ + return clear(migrate, __global_clear, sz, prng); +} + static int live_migrate_copy(void *arg) { struct intel_migrate *migrate = arg; @@ -190,6 +304,28 @@ static int live_migrate_copy(void *arg) return 0; } +static int live_migrate_clear(void *arg) +{ + struct intel_migrate *migrate = arg; + struct drm_i915_private *i915 = migrate->context->engine->i915; + I915_RND_STATE(prng); + int i; + + for (i = 0; i < ARRAY_SIZE(sizes); i++) { + int err; + + err = migrate_clear(migrate, sizes[i], &prng); + if (err == 0) + err = global_clear(migrate, sizes[i], &prng); + + i915_gem_drain_freed_objects(i915); + if (err) + return err; + } + + return 0; +} + struct threaded_migrate { struct intel_migrate *migrate; struct task_struct *tsk; @@ -271,12 +407,39 @@ static int thread_global_copy(void *arg) return threaded_migrate(arg, __thread_global_copy, 0); } +static int __thread_migrate_clear(void *arg) +{ + struct threaded_migrate *tm = arg; + + return migrate_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng); +} + +static int __thread_global_clear(void *arg) +{ + struct threaded_migrate *tm = arg; + + return global_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng); +} + +static int thread_migrate_clear(void *arg) +{ + return threaded_migrate(arg, __thread_migrate_clear, 0); +} + +static int thread_global_clear(void *arg) +{ + return threaded_migrate(arg, __thread_global_clear, 0); +} + int intel_migrate_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_migrate_copy), + SUBTEST(live_migrate_clear), SUBTEST(thread_migrate_copy), + SUBTEST(thread_migrate_clear), SUBTEST(thread_global_copy), + SUBTEST(thread_global_clear), }; struct intel_migrate m; int err; -- cgit v1.2.3 From 94ce0d65076cda511da843ae1893c819948a215a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 17 Jun 2021 08:30:15 +0200 Subject: drm/i915/gt: Setup a default migration context on the GT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set up a default migration context on the GT and use it from the selftests. Add a perf selftest and make sure we exercise LMEM if available. Signed-off-by: Chris Wilson Co-developed-by: Thomas Hellström Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210617063018.92802-10-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 4 + drivers/gpu/drm/i915/gt/intel_gt_types.h | 3 + drivers/gpu/drm/i915/gt/intel_migrate.c | 2 + drivers/gpu/drm/i915/gt/selftest_migrate.c | 237 ++++++++++++++++++++- .../gpu/drm/i915/selftests/i915_perf_selftests.h | 1 + 5 files changed, 236 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 2161bf01ef8b..67ef057ae918 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -13,6 +13,7 @@ #include "intel_gt_clock_utils.h" #include "intel_gt_pm.h" #include "intel_gt_requests.h" +#include "intel_migrate.h" #include "intel_mocs.h" #include "intel_rc6.h" #include "intel_renderstate.h" @@ -626,6 +627,8 @@ int intel_gt_init(struct intel_gt *gt) if (err) goto err_gt; + intel_migrate_init(>->migrate, gt); + goto out_fw; err_gt: __intel_gt_disable(gt); @@ -649,6 +652,7 @@ void intel_gt_driver_remove(struct intel_gt *gt) { __intel_gt_disable(gt); + intel_migrate_fini(>->migrate); intel_uc_driver_remove(>->uc); intel_engines_release(gt); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index fecfacf551d5..7450935f2ca8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -24,6 +24,7 @@ #include "intel_reset_types.h" #include "intel_rc6_types.h" #include "intel_rps_types.h" +#include "intel_migrate_types.h" #include "intel_wakeref.h" struct drm_i915_private; @@ -145,6 +146,8 @@ struct intel_gt { struct i915_vma *scratch; + struct intel_migrate migrate; + struct intel_gt_info { intel_engine_mask_t engine_mask; u8 num_engines; diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index ba4009120b33..23c59ce66cee 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -418,6 +418,7 @@ intel_context_migrate_copy(struct intel_context *ce, struct i915_request *rq; int err; + GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm); *out = NULL; GEM_BUG_ON(ce->ring->size < SZ_64K); @@ -536,6 +537,7 @@ intel_context_migrate_clear(struct intel_context *ce, struct i915_request *rq; int err; + GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm); *out = NULL; GEM_BUG_ON(ce->ring->size < SZ_64K); diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c index 159c8656e1b0..12ef2837c89b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -3,6 +3,8 @@ * Copyright © 2020 Intel Corporation */ +#include + #include "selftests/i915_random.h" static const unsigned int sizes[] = { @@ -18,13 +20,11 @@ static const unsigned int sizes[] = { static struct drm_i915_gem_object * create_lmem_or_internal(struct drm_i915_private *i915, size_t size) { - if (HAS_LMEM(i915)) { - struct drm_i915_gem_object *obj; + struct drm_i915_gem_object *obj; - obj = i915_gem_object_create_lmem(i915, size, 0); - if (!IS_ERR(obj)) - return obj; - } + obj = i915_gem_object_create_lmem(i915, size, 0); + if (!IS_ERR(obj)) + return obj; return i915_gem_object_create_internal(i915, size); } @@ -441,14 +441,229 @@ int intel_migrate_live_selftests(struct drm_i915_private *i915) SUBTEST(thread_global_copy), SUBTEST(thread_global_clear), }; - struct intel_migrate m; + struct intel_gt *gt = &i915->gt; + + if (!gt->migrate.context) + return 0; + + return i915_subtests(tests, >->migrate); +} + +static struct drm_i915_gem_object * +create_init_lmem_internal(struct intel_gt *gt, size_t sz, bool try_lmem) +{ + struct drm_i915_gem_object *obj = NULL; int err; - if (intel_migrate_init(&m, &i915->gt)) + if (try_lmem) + obj = i915_gem_object_create_lmem(gt->i915, sz, 0); + + if (IS_ERR_OR_NULL(obj)) { + obj = i915_gem_object_create_internal(gt->i915, sz); + if (IS_ERR(obj)) + return obj; + } + + i915_gem_object_trylock(obj); + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_unlock(obj); + i915_gem_object_put(obj); + return ERR_PTR(err); + } + + return obj; +} + +static int wrap_ktime_compare(const void *A, const void *B) +{ + const ktime_t *a = A, *b = B; + + return ktime_compare(*a, *b); +} + +static int __perf_clear_blt(struct intel_context *ce, + struct scatterlist *sg, + enum i915_cache_level cache_level, + bool is_lmem, + size_t sz) +{ + ktime_t t[5]; + int pass; + int err = 0; + + for (pass = 0; pass < ARRAY_SIZE(t); pass++) { + struct i915_request *rq; + ktime_t t0, t1; + + t0 = ktime_get(); + + err = intel_context_migrate_clear(ce, NULL, sg, cache_level, + is_lmem, 0, &rq); + if (rq) { + if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0) + err = -EIO; + i915_request_put(rq); + } + if (err) + break; + + t1 = ktime_get(); + t[pass] = ktime_sub(t1, t0); + } + if (err) + return err; + + sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); + pr_info("%s: %zd KiB fill: %lld MiB/s\n", + ce->engine->name, sz >> 10, + div64_u64(mul_u32_u32(4 * sz, + 1000 * 1000 * 1000), + t[1] + 2 * t[2] + t[3]) >> 20); + return 0; +} + +static int perf_clear_blt(void *arg) +{ + struct intel_gt *gt = arg; + static const unsigned long sizes[] = { + SZ_4K, + SZ_64K, + SZ_2M, + SZ_64M + }; + int i; + + for (i = 0; i < ARRAY_SIZE(sizes); i++) { + struct drm_i915_gem_object *dst; + int err; + + dst = create_init_lmem_internal(gt, sizes[i], true); + if (IS_ERR(dst)) + return PTR_ERR(dst); + + err = __perf_clear_blt(gt->migrate.context, + dst->mm.pages->sgl, + I915_CACHE_NONE, + i915_gem_object_is_lmem(dst), + sizes[i]); + + i915_gem_object_unlock(dst); + i915_gem_object_put(dst); + if (err) + return err; + } + + return 0; +} + +static int __perf_copy_blt(struct intel_context *ce, + struct scatterlist *src, + enum i915_cache_level src_cache_level, + bool src_is_lmem, + struct scatterlist *dst, + enum i915_cache_level dst_cache_level, + bool dst_is_lmem, + size_t sz) +{ + ktime_t t[5]; + int pass; + int err = 0; + + for (pass = 0; pass < ARRAY_SIZE(t); pass++) { + struct i915_request *rq; + ktime_t t0, t1; + + t0 = ktime_get(); + + err = intel_context_migrate_copy(ce, NULL, + src, src_cache_level, + src_is_lmem, + dst, dst_cache_level, + dst_is_lmem, + &rq); + if (rq) { + if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0) + err = -EIO; + i915_request_put(rq); + } + if (err) + break; + + t1 = ktime_get(); + t[pass] = ktime_sub(t1, t0); + } + if (err) + return err; + + sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); + pr_info("%s: %zd KiB copy: %lld MiB/s\n", + ce->engine->name, sz >> 10, + div64_u64(mul_u32_u32(4 * sz, + 1000 * 1000 * 1000), + t[1] + 2 * t[2] + t[3]) >> 20); + return 0; +} + +static int perf_copy_blt(void *arg) +{ + struct intel_gt *gt = arg; + static const unsigned long sizes[] = { + SZ_4K, + SZ_64K, + SZ_2M, + SZ_64M + }; + int i; + + for (i = 0; i < ARRAY_SIZE(sizes); i++) { + struct drm_i915_gem_object *src, *dst; + int err; + + src = create_init_lmem_internal(gt, sizes[i], true); + if (IS_ERR(src)) + return PTR_ERR(src); + + dst = create_init_lmem_internal(gt, sizes[i], false); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + goto err_src; + } + + err = __perf_copy_blt(gt->migrate.context, + src->mm.pages->sgl, + I915_CACHE_NONE, + i915_gem_object_is_lmem(src), + dst->mm.pages->sgl, + I915_CACHE_NONE, + i915_gem_object_is_lmem(dst), + sizes[i]); + + i915_gem_object_unlock(dst); + i915_gem_object_put(dst); +err_src: + i915_gem_object_unlock(src); + i915_gem_object_put(src); + if (err) + return err; + } + + return 0; +} + +int intel_migrate_perf_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(perf_clear_blt), + SUBTEST(perf_copy_blt), + }; + struct intel_gt *gt = &i915->gt; + + if (intel_gt_is_wedged(gt)) return 0; - err = i915_subtests(tests, &m); - intel_migrate_fini(&m); + if (!gt->migrate.context) + return 0; - return err; + return intel_gt_live_subtests(tests, gt); } diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h index c2389f8a257d..5077dc3c3b8c 100644 --- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h @@ -17,5 +17,6 @@ */ selftest(engine_cs, intel_engine_cs_perf_selftests) selftest(request, i915_request_perf_selftests) +selftest(migrate, intel_migrate_perf_selftests) selftest(blt, i915_gem_object_blt_perf_selftests) selftest(region, intel_memory_region_perf_selftests) -- cgit v1.2.3 From 50331a7b50741035cc9335f863939d638b225e71 Mon Sep 17 00:00:00 2001 From: Ramalingam C Date: Thu, 17 Jun 2021 08:30:16 +0200 Subject: drm/i915/ttm: accelerated move implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Invokes the pipelined page migration through blt, for i915_ttm_move requests of eviction and also obj clear. Signed-off-by: Ramalingam C Reviewed-by: Thomas Hellström Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210617063018.92802-11-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 88 ++++++++++++++++++++++++++++----- 1 file changed, 75 insertions(+), 13 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 9366b18d1bc6..df46535cca47 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -15,6 +15,9 @@ #include "gem/i915_gem_ttm.h" #include "gem/i915_gem_mman.h" +#include "gt/intel_migrate.h" +#include "gt/intel_engine_pm.h" + #define I915_PL_LMEM0 TTM_PL_PRIV #define I915_PL_SYSTEM TTM_PL_SYSTEM #define I915_PL_STOLEN TTM_PL_VRAM @@ -326,6 +329,62 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, return intel_region_ttm_resource_to_st(obj->mm.region, res); } +static int i915_ttm_accel_move(struct ttm_buffer_object *bo, + struct ttm_resource *dst_mem, + struct sg_table *dst_st) +{ + struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915), + bdev); + struct ttm_resource_manager *src_man = + ttm_manager_type(bo->bdev, bo->resource->mem_type); + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct sg_table *src_st; + struct i915_request *rq; + int ret; + + if (!i915->gt.migrate.context) + return -EINVAL; + + if (!bo->ttm || !ttm_tt_is_populated(bo->ttm)) { + if (bo->type == ttm_bo_type_kernel) + return -EINVAL; + + if (bo->ttm && + !(bo->ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)) + return 0; + + intel_engine_pm_get(i915->gt.migrate.context->engine); + ret = intel_context_migrate_clear(i915->gt.migrate.context, NULL, + dst_st->sgl, I915_CACHE_NONE, + dst_mem->mem_type >= I915_PL_LMEM0, + 0, &rq); + + if (!ret && rq) { + i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_put(rq); + } + intel_engine_pm_put(i915->gt.migrate.context->engine); + } else { + src_st = src_man->use_tt ? i915_ttm_tt_get_st(bo->ttm) : + obj->ttm.cached_io_st; + + intel_engine_pm_get(i915->gt.migrate.context->engine); + ret = intel_context_migrate_copy(i915->gt.migrate.context, + NULL, src_st->sgl, I915_CACHE_NONE, + bo->resource->mem_type >= I915_PL_LMEM0, + dst_st->sgl, I915_CACHE_NONE, + dst_mem->mem_type >= I915_PL_LMEM0, + &rq); + if (!ret && rq) { + i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_put(rq); + } + intel_engine_pm_put(i915->gt.migrate.context->engine); + } + + return ret; +} + static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, struct ttm_operation_ctx *ctx, struct ttm_resource *dst_mem, @@ -376,19 +435,22 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, if (IS_ERR(dst_st)) return PTR_ERR(dst_st); - /* If we start mapping GGTT, we can no longer use man::use_tt here. */ - dst_iter = dst_man->use_tt ? - ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm) : - ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap, - dst_st, dst_reg->region.start); - - src_iter = src_man->use_tt ? - ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) : - ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap, - obj->ttm.cached_io_st, - src_reg->region.start); - - ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); + ret = i915_ttm_accel_move(bo, dst_mem, dst_st); + if (ret) { + /* If we start mapping GGTT, we can no longer use man::use_tt here. */ + dst_iter = dst_man->use_tt ? + ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm) : + ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap, + dst_st, dst_reg->region.start); + + src_iter = src_man->use_tt ? + ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) : + ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap, + obj->ttm.cached_io_st, + src_reg->region.start); + + ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); + } ttm_bo_move_sync_cleanup(bo, dst_mem); i915_ttm_free_cached_io_st(obj); -- cgit v1.2.3 From 57143f2e5b41f3e51b13d3c358e29a932334110d Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Thu, 17 Jun 2021 08:30:17 +0200 Subject: drm/i915/gem: Zap the client blt code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's not used anywhere. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210617063018.92802-12-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/Makefile | 2 +- drivers/gpu/drm/i915/gem/i915_gem_client_blt.c | 355 --------------------- drivers/gpu/drm/i915/gem/i915_gem_client_blt.h | 21 -- .../drm/i915/gem/selftests/i915_gem_client_blt.c | 114 +------ 4 files changed, 2 insertions(+), 490 deletions(-) delete mode 100644 drivers/gpu/drm/i915/gem/i915_gem_client_blt.c delete mode 100644 drivers/gpu/drm/i915/gem/i915_gem_client_blt.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 5e10e0628c56..ffa14084432c 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -136,7 +136,6 @@ i915-y += $(gt-y) gem-y += \ gem/i915_gem_busy.o \ gem/i915_gem_clflush.o \ - gem/i915_gem_client_blt.o \ gem/i915_gem_context.o \ gem/i915_gem_create.o \ gem/i915_gem_dmabuf.o \ @@ -281,6 +280,7 @@ i915-y += i915_perf.o # Post-mortem debug and GPU hang state capture i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o i915-$(CONFIG_DRM_I915_SELFTEST) += \ + gem/selftests/i915_gem_client_blt.o \ gem/selftests/igt_gem_utils.o \ selftests/i915_random.o \ selftests/i915_selftest.o \ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c deleted file mode 100644 index 44821d94544f..000000000000 --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c +++ /dev/null @@ -1,355 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2019 Intel Corporation - */ - -#include "i915_drv.h" -#include "gt/intel_context.h" -#include "gt/intel_engine_pm.h" -#include "i915_gem_client_blt.h" -#include "i915_gem_object_blt.h" - -struct i915_sleeve { - struct i915_vma *vma; - struct drm_i915_gem_object *obj; - struct sg_table *pages; - struct i915_page_sizes page_sizes; -}; - -static int vma_set_pages(struct i915_vma *vma) -{ - struct i915_sleeve *sleeve = vma->private; - - vma->pages = sleeve->pages; - vma->page_sizes = sleeve->page_sizes; - - return 0; -} - -static void vma_clear_pages(struct i915_vma *vma) -{ - GEM_BUG_ON(!vma->pages); - vma->pages = NULL; -} - -static void vma_bind(struct i915_address_space *vm, - struct i915_vm_pt_stash *stash, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) -{ - vm->vma_ops.bind_vma(vm, stash, vma, cache_level, flags); -} - -static void vma_unbind(struct i915_address_space *vm, struct i915_vma *vma) -{ - vm->vma_ops.unbind_vma(vm, vma); -} - -static const struct i915_vma_ops proxy_vma_ops = { - .set_pages = vma_set_pages, - .clear_pages = vma_clear_pages, - .bind_vma = vma_bind, - .unbind_vma = vma_unbind, -}; - -static struct i915_sleeve *create_sleeve(struct i915_address_space *vm, - struct drm_i915_gem_object *obj, - struct sg_table *pages, - struct i915_page_sizes *page_sizes) -{ - struct i915_sleeve *sleeve; - struct i915_vma *vma; - int err; - - sleeve = kzalloc(sizeof(*sleeve), GFP_KERNEL); - if (!sleeve) - return ERR_PTR(-ENOMEM); - - vma = i915_vma_instance(obj, vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_free; - } - - vma->private = sleeve; - vma->ops = &proxy_vma_ops; - - sleeve->vma = vma; - sleeve->pages = pages; - sleeve->page_sizes = *page_sizes; - - return sleeve; - -err_free: - kfree(sleeve); - return ERR_PTR(err); -} - -static void destroy_sleeve(struct i915_sleeve *sleeve) -{ - kfree(sleeve); -} - -struct clear_pages_work { - struct dma_fence dma; - struct dma_fence_cb cb; - struct i915_sw_fence wait; - struct work_struct work; - struct irq_work irq_work; - struct i915_sleeve *sleeve; - struct intel_context *ce; - u32 value; -}; - -static const char *clear_pages_work_driver_name(struct dma_fence *fence) -{ - return DRIVER_NAME; -} - -static const char *clear_pages_work_timeline_name(struct dma_fence *fence) -{ - return "clear"; -} - -static void clear_pages_work_release(struct dma_fence *fence) -{ - struct clear_pages_work *w = container_of(fence, typeof(*w), dma); - - destroy_sleeve(w->sleeve); - - i915_sw_fence_fini(&w->wait); - - BUILD_BUG_ON(offsetof(typeof(*w), dma)); - dma_fence_free(&w->dma); -} - -static const struct dma_fence_ops clear_pages_work_ops = { - .get_driver_name = clear_pages_work_driver_name, - .get_timeline_name = clear_pages_work_timeline_name, - .release = clear_pages_work_release, -}; - -static void clear_pages_signal_irq_worker(struct irq_work *work) -{ - struct clear_pages_work *w = container_of(work, typeof(*w), irq_work); - - dma_fence_signal(&w->dma); - dma_fence_put(&w->dma); -} - -static void clear_pages_dma_fence_cb(struct dma_fence *fence, - struct dma_fence_cb *cb) -{ - struct clear_pages_work *w = container_of(cb, typeof(*w), cb); - - if (fence->error) - dma_fence_set_error(&w->dma, fence->error); - - /* - * Push the signalling of the fence into yet another worker to avoid - * the nightmare locking around the fence spinlock. - */ - irq_work_queue(&w->irq_work); -} - -static void clear_pages_worker(struct work_struct *work) -{ - struct clear_pages_work *w = container_of(work, typeof(*w), work); - struct drm_i915_gem_object *obj = w->sleeve->vma->obj; - struct i915_vma *vma = w->sleeve->vma; - struct i915_gem_ww_ctx ww; - struct i915_request *rq; - struct i915_vma *batch; - int err = w->dma.error; - - if (unlikely(err)) - goto out_signal; - - if (obj->cache_dirty) { - if (i915_gem_object_has_struct_page(obj)) - drm_clflush_sg(w->sleeve->pages); - obj->cache_dirty = false; - } - obj->read_domains = I915_GEM_GPU_DOMAINS; - obj->write_domain = 0; - - i915_gem_ww_ctx_init(&ww, false); - intel_engine_pm_get(w->ce->engine); -retry: - err = intel_context_pin_ww(w->ce, &ww); - if (err) - goto out_signal; - - batch = intel_emit_vma_fill_blt(w->ce, vma, &ww, w->value); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_ctx; - } - - rq = i915_request_create(w->ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_batch; - } - - /* There's no way the fence has signalled */ - if (dma_fence_add_callback(&rq->fence, &w->cb, - clear_pages_dma_fence_cb)) - GEM_BUG_ON(1); - - err = intel_emit_vma_mark_active(batch, rq); - if (unlikely(err)) - goto out_request; - - /* - * w->dma is already exported via (vma|obj)->resv we need only - * keep track of the GPU activity within this vma/request, and - * propagate the signal from the request to w->dma. - */ - err = __i915_vma_move_to_active(vma, rq); - if (err) - goto out_request; - - if (rq->engine->emit_init_breadcrumb) { - err = rq->engine->emit_init_breadcrumb(rq); - if (unlikely(err)) - goto out_request; - } - - err = rq->engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - 0); -out_request: - if (unlikely(err)) { - i915_request_set_error_once(rq, err); - err = 0; - } - - i915_request_add(rq); -out_batch: - intel_emit_vma_release(w->ce, batch); -out_ctx: - intel_context_unpin(w->ce); -out_signal: - if (err == -EDEADLK) { - err = i915_gem_ww_ctx_backoff(&ww); - if (!err) - goto retry; - } - i915_gem_ww_ctx_fini(&ww); - - i915_vma_unpin(w->sleeve->vma); - intel_engine_pm_put(w->ce->engine); - - if (unlikely(err)) { - dma_fence_set_error(&w->dma, err); - dma_fence_signal(&w->dma); - dma_fence_put(&w->dma); - } -} - -static int pin_wait_clear_pages_work(struct clear_pages_work *w, - struct intel_context *ce) -{ - struct i915_vma *vma = w->sleeve->vma; - struct i915_gem_ww_ctx ww; - int err; - - i915_gem_ww_ctx_init(&ww, false); -retry: - err = i915_gem_object_lock(vma->obj, &ww); - if (err) - goto out; - - err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); - if (unlikely(err)) - goto out; - - err = i915_sw_fence_await_reservation(&w->wait, - vma->obj->base.resv, NULL, - true, 0, I915_FENCE_GFP); - if (err) - goto err_unpin_vma; - - dma_resv_add_excl_fence(vma->obj->base.resv, &w->dma); - -err_unpin_vma: - if (err) - i915_vma_unpin(vma); -out: - if (err == -EDEADLK) { - err = i915_gem_ww_ctx_backoff(&ww); - if (!err) - goto retry; - } - i915_gem_ww_ctx_fini(&ww); - return err; -} - -static int __i915_sw_fence_call -clear_pages_work_notify(struct i915_sw_fence *fence, - enum i915_sw_fence_notify state) -{ - struct clear_pages_work *w = container_of(fence, typeof(*w), wait); - - switch (state) { - case FENCE_COMPLETE: - schedule_work(&w->work); - break; - - case FENCE_FREE: - dma_fence_put(&w->dma); - break; - } - - return NOTIFY_DONE; -} - -static DEFINE_SPINLOCK(fence_lock); - -/* XXX: better name please */ -int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj, - struct intel_context *ce, - struct sg_table *pages, - struct i915_page_sizes *page_sizes, - u32 value) -{ - struct clear_pages_work *work; - struct i915_sleeve *sleeve; - int err; - - sleeve = create_sleeve(ce->vm, obj, pages, page_sizes); - if (IS_ERR(sleeve)) - return PTR_ERR(sleeve); - - work = kmalloc(sizeof(*work), GFP_KERNEL); - if (!work) { - destroy_sleeve(sleeve); - return -ENOMEM; - } - - work->value = value; - work->sleeve = sleeve; - work->ce = ce; - - INIT_WORK(&work->work, clear_pages_worker); - - init_irq_work(&work->irq_work, clear_pages_signal_irq_worker); - - dma_fence_init(&work->dma, &clear_pages_work_ops, &fence_lock, 0, 0); - i915_sw_fence_init(&work->wait, clear_pages_work_notify); - - err = pin_wait_clear_pages_work(work, ce); - if (err < 0) - dma_fence_set_error(&work->dma, err); - - dma_fence_get(&work->dma); - i915_sw_fence_commit(&work->wait); - - return err; -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/i915_gem_client_blt.c" -#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h deleted file mode 100644 index 3dbd28c22ff5..000000000000 --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2019 Intel Corporation - */ -#ifndef __I915_GEM_CLIENT_BLT_H__ -#define __I915_GEM_CLIENT_BLT_H__ - -#include - -struct drm_i915_gem_object; -struct i915_page_sizes; -struct intel_context; -struct sg_table; - -int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj, - struct intel_context *ce, - struct sg_table *pages, - struct i915_page_sizes *page_sizes, - u32 value); - -#endif diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index 176e6b22f87f..ecbcbb86ae1e 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -5,6 +5,7 @@ #include "i915_selftest.h" +#include "gt/intel_context.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "gt/intel_gpu_commands.h" @@ -16,118 +17,6 @@ #include "huge_gem_object.h" #include "mock_context.h" -static int __igt_client_fill(struct intel_engine_cs *engine) -{ - struct intel_context *ce = engine->kernel_context; - struct drm_i915_gem_object *obj; - I915_RND_STATE(prng); - IGT_TIMEOUT(end); - u32 *vaddr; - int err = 0; - - intel_engine_pm_get(engine); - do { - const u32 max_block_size = S16_MAX * PAGE_SIZE; - u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng)); - u32 phys_sz = sz % (max_block_size + 1); - u32 val = prandom_u32_state(&prng); - u32 i; - - sz = round_up(sz, PAGE_SIZE); - phys_sz = round_up(phys_sz, PAGE_SIZE); - - pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, - phys_sz, sz, val); - - obj = huge_gem_object(engine->i915, phys_sz, sz); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto err_flush; - } - - vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto err_put; - } - - /* - * XXX: The goal is move this to get_pages, so try to dirty the - * CPU cache first to check that we do the required clflush - * before scheduling the blt for !llc platforms. This matches - * some version of reality where at get_pages the pages - * themselves may not yet be coherent with the GPU(swap-in). If - * we are missing the flush then we should see the stale cache - * values after we do the set_to_cpu_domain and pick it up as a - * test failure. - */ - memset32(vaddr, val ^ 0xdeadbeaf, - huge_gem_object_phys_size(obj) / sizeof(u32)); - - if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) - obj->cache_dirty = true; - - err = i915_gem_schedule_fill_pages_blt(obj, ce, obj->mm.pages, - &obj->mm.page_sizes, - val); - if (err) - goto err_unpin; - - i915_gem_object_lock(obj, NULL); - err = i915_gem_object_set_to_cpu_domain(obj, false); - i915_gem_object_unlock(obj); - if (err) - goto err_unpin; - - for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); ++i) { - if (vaddr[i] != val) { - pr_err("vaddr[%u]=%x, expected=%x\n", i, - vaddr[i], val); - err = -EINVAL; - goto err_unpin; - } - } - - i915_gem_object_unpin_map(obj); - i915_gem_object_put(obj); - } while (!time_after(jiffies, end)); - - goto err_flush; - -err_unpin: - i915_gem_object_unpin_map(obj); -err_put: - i915_gem_object_put(obj); -err_flush: - if (err == -ENOMEM) - err = 0; - intel_engine_pm_put(engine); - - return err; -} - -static int igt_client_fill(void *arg) -{ - int inst = 0; - - do { - struct intel_engine_cs *engine; - int err; - - engine = intel_engine_lookup_user(arg, - I915_ENGINE_CLASS_COPY, - inst++); - if (!engine) - return 0; - - err = __igt_client_fill(engine); - if (err == -ENOMEM) - err = 0; - if (err) - return err; - } while (1); -} - #define WIDTH 512 #define HEIGHT 32 @@ -693,7 +582,6 @@ static int igt_client_tiled_blits(void *arg) int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { - SUBTEST(igt_client_fill), SUBTEST(igt_client_tiled_blits), }; -- cgit v1.2.3 From 99919be74aa37537850a7b453f2a3e3260e0d50e Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Thu, 17 Jun 2021 08:30:18 +0200 Subject: drm/i915/gem: Zap the i915_gem_object_blt code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's unused with the exception of selftest. Replace a call in the memory_region live selftest with a call into a corresponding function in the new migrate code. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210617063018.92802-13-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/Makefile | 1 - drivers/gpu/drm/i915/gem/i915_gem_object_blt.c | 461 ---------------- drivers/gpu/drm/i915/gem/i915_gem_object_blt.h | 39 -- .../drm/i915/gem/selftests/i915_gem_object_blt.c | 597 --------------------- .../gpu/drm/i915/selftests/i915_live_selftests.h | 1 - .../gpu/drm/i915/selftests/i915_perf_selftests.h | 1 - .../gpu/drm/i915/selftests/intel_memory_region.c | 21 +- 7 files changed, 14 insertions(+), 1107 deletions(-) delete mode 100644 drivers/gpu/drm/i915/gem/i915_gem_object_blt.c delete mode 100644 drivers/gpu/drm/i915/gem/i915_gem_object_blt.h delete mode 100644 drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index ffa14084432c..01f28ad5ea57 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -143,7 +143,6 @@ gem-y += \ gem/i915_gem_execbuffer.o \ gem/i915_gem_internal.o \ gem/i915_gem_object.o \ - gem/i915_gem_object_blt.o \ gem/i915_gem_lmem.o \ gem/i915_gem_mman.o \ gem/i915_gem_pages.o \ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c deleted file mode 100644 index 3e28c68fda3e..000000000000 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ /dev/null @@ -1,461 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2019 Intel Corporation - */ - -#include "i915_drv.h" -#include "gt/intel_context.h" -#include "gt/intel_engine_pm.h" -#include "gt/intel_gpu_commands.h" -#include "gt/intel_gt.h" -#include "gt/intel_gt_buffer_pool.h" -#include "gt/intel_ring.h" -#include "i915_gem_clflush.h" -#include "i915_gem_object_blt.h" - -struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, - struct i915_vma *vma, - struct i915_gem_ww_ctx *ww, - u32 value) -{ - struct drm_i915_private *i915 = ce->vm->i915; - const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */ - struct intel_gt_buffer_pool_node *pool; - struct i915_vma *batch; - u64 offset; - u64 count; - u64 rem; - u32 size; - u32 *cmd; - int err; - - GEM_BUG_ON(intel_engine_is_virtual(ce->engine)); - intel_engine_pm_get(ce->engine); - - count = div_u64(round_up(vma->size, block_size), block_size); - size = (1 + 8 * count) * sizeof(u32); - size = round_up(size, PAGE_SIZE); - pool = intel_gt_get_buffer_pool(ce->engine->gt, size, I915_MAP_WC); - if (IS_ERR(pool)) { - err = PTR_ERR(pool); - goto out_pm; - } - - err = i915_gem_object_lock(pool->obj, ww); - if (err) - goto out_put; - - batch = i915_vma_instance(pool->obj, ce->vm, NULL); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_put; - } - - err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER); - if (unlikely(err)) - goto out_put; - - /* we pinned the pool, mark it as such */ - intel_gt_buffer_pool_mark_used(pool); - - cmd = i915_gem_object_pin_map(pool->obj, pool->type); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto out_unpin; - } - - rem = vma->size; - offset = vma->node.start; - - do { - u32 size = min_t(u64, rem, block_size); - - GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); - - if (GRAPHICS_VER(i915) >= 8) { - *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2); - *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; - *cmd++ = 0; - *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cmd++ = lower_32_bits(offset); - *cmd++ = upper_32_bits(offset); - *cmd++ = value; - } else { - *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); - *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; - *cmd++ = 0; - *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cmd++ = offset; - *cmd++ = value; - } - - /* Allow ourselves to be preempted in between blocks. */ - *cmd++ = MI_ARB_CHECK; - - offset += size; - rem -= size; - } while (rem); - - *cmd = MI_BATCH_BUFFER_END; - - i915_gem_object_flush_map(pool->obj); - i915_gem_object_unpin_map(pool->obj); - - intel_gt_chipset_flush(ce->vm->gt); - - batch->private = pool; - return batch; - -out_unpin: - i915_vma_unpin(batch); -out_put: - intel_gt_buffer_pool_put(pool); -out_pm: - intel_engine_pm_put(ce->engine); - return ERR_PTR(err); -} - -int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq) -{ - int err; - - err = i915_request_await_object(rq, vma->obj, false); - if (err == 0) - err = i915_vma_move_to_active(vma, rq, 0); - if (unlikely(err)) - return err; - - return intel_gt_buffer_pool_mark_active(vma->private, rq); -} - -void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma) -{ - i915_vma_unpin(vma); - intel_gt_buffer_pool_put(vma->private); - intel_engine_pm_put(ce->engine); -} - -static int -move_obj_to_gpu(struct drm_i915_gem_object *obj, - struct i915_request *rq, - bool write) -{ - if (obj->cache_dirty & ~obj->cache_coherent) - i915_gem_clflush_object(obj, 0); - - return i915_request_await_object(rq, obj, write); -} - -int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, - struct intel_context *ce, - u32 value) -{ - struct i915_gem_ww_ctx ww; - struct i915_request *rq; - struct i915_vma *batch; - struct i915_vma *vma; - int err; - - vma = i915_vma_instance(obj, ce->vm, NULL); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - i915_gem_ww_ctx_init(&ww, true); - intel_engine_pm_get(ce->engine); -retry: - err = i915_gem_object_lock(obj, &ww); - if (err) - goto out; - - err = intel_context_pin_ww(ce, &ww); - if (err) - goto out; - - err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); - if (err) - goto out_ctx; - - batch = intel_emit_vma_fill_blt(ce, vma, &ww, value); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_vma; - } - - rq = i915_request_create(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_batch; - } - - err = intel_emit_vma_mark_active(batch, rq); - if (unlikely(err)) - goto out_request; - - err = move_obj_to_gpu(vma->obj, rq, true); - if (err == 0) - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - if (unlikely(err)) - goto out_request; - - if (ce->engine->emit_init_breadcrumb) - err = ce->engine->emit_init_breadcrumb(rq); - - if (likely(!err)) - err = ce->engine->emit_bb_start(rq, - batch->node.start, - batch->node.size, - 0); -out_request: - if (unlikely(err)) - i915_request_set_error_once(rq, err); - - i915_request_add(rq); -out_batch: - intel_emit_vma_release(ce, batch); -out_vma: - i915_vma_unpin(vma); -out_ctx: - intel_context_unpin(ce); -out: - if (err == -EDEADLK) { - err = i915_gem_ww_ctx_backoff(&ww); - if (!err) - goto retry; - } - i915_gem_ww_ctx_fini(&ww); - intel_engine_pm_put(ce->engine); - return err; -} - -/* Wa_1209644611:icl,ehl */ -static bool wa_1209644611_applies(struct drm_i915_private *i915, u32 size) -{ - u32 height = size >> PAGE_SHIFT; - - if (GRAPHICS_VER(i915) != 11) - return false; - - return height % 4 == 3 && height <= 8; -} - -struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, - struct i915_gem_ww_ctx *ww, - struct i915_vma *src, - struct i915_vma *dst) -{ - struct drm_i915_private *i915 = ce->vm->i915; - const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */ - struct intel_gt_buffer_pool_node *pool; - struct i915_vma *batch; - u64 src_offset, dst_offset; - u64 count, rem; - u32 size, *cmd; - int err; - - GEM_BUG_ON(src->size != dst->size); - - GEM_BUG_ON(intel_engine_is_virtual(ce->engine)); - intel_engine_pm_get(ce->engine); - - count = div_u64(round_up(dst->size, block_size), block_size); - size = (1 + 11 * count) * sizeof(u32); - size = round_up(size, PAGE_SIZE); - pool = intel_gt_get_buffer_pool(ce->engine->gt, size, I915_MAP_WC); - if (IS_ERR(pool)) { - err = PTR_ERR(pool); - goto out_pm; - } - - err = i915_gem_object_lock(pool->obj, ww); - if (err) - goto out_put; - - batch = i915_vma_instance(pool->obj, ce->vm, NULL); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_put; - } - - err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER); - if (unlikely(err)) - goto out_put; - - /* we pinned the pool, mark it as such */ - intel_gt_buffer_pool_mark_used(pool); - - cmd = i915_gem_object_pin_map(pool->obj, pool->type); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto out_unpin; - } - - rem = src->size; - src_offset = src->node.start; - dst_offset = dst->node.start; - - do { - size = min_t(u64, rem, block_size); - GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); - - if (GRAPHICS_VER(i915) >= 9 && - !wa_1209644611_applies(i915, size)) { - *cmd++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2); - *cmd++ = BLT_DEPTH_32 | PAGE_SIZE; - *cmd++ = 0; - *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cmd++ = lower_32_bits(dst_offset); - *cmd++ = upper_32_bits(dst_offset); - *cmd++ = 0; - *cmd++ = PAGE_SIZE; - *cmd++ = lower_32_bits(src_offset); - *cmd++ = upper_32_bits(src_offset); - } else if (GRAPHICS_VER(i915) >= 8) { - *cmd++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2); - *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; - *cmd++ = 0; - *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cmd++ = lower_32_bits(dst_offset); - *cmd++ = upper_32_bits(dst_offset); - *cmd++ = 0; - *cmd++ = PAGE_SIZE; - *cmd++ = lower_32_bits(src_offset); - *cmd++ = upper_32_bits(src_offset); - } else { - *cmd++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); - *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; - *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE; - *cmd++ = dst_offset; - *cmd++ = PAGE_SIZE; - *cmd++ = src_offset; - } - - /* Allow ourselves to be preempted in between blocks. */ - *cmd++ = MI_ARB_CHECK; - - src_offset += size; - dst_offset += size; - rem -= size; - } while (rem); - - *cmd = MI_BATCH_BUFFER_END; - - i915_gem_object_flush_map(pool->obj); - i915_gem_object_unpin_map(pool->obj); - - intel_gt_chipset_flush(ce->vm->gt); - batch->private = pool; - return batch; - -out_unpin: - i915_vma_unpin(batch); -out_put: - intel_gt_buffer_pool_put(pool); -out_pm: - intel_engine_pm_put(ce->engine); - return ERR_PTR(err); -} - -int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, - struct drm_i915_gem_object *dst, - struct intel_context *ce) -{ - struct i915_address_space *vm = ce->vm; - struct i915_vma *vma[2], *batch; - struct i915_gem_ww_ctx ww; - struct i915_request *rq; - int err, i; - - vma[0] = i915_vma_instance(src, vm, NULL); - if (IS_ERR(vma[0])) - return PTR_ERR(vma[0]); - - vma[1] = i915_vma_instance(dst, vm, NULL); - if (IS_ERR(vma[1])) - return PTR_ERR(vma[1]); - - i915_gem_ww_ctx_init(&ww, true); - intel_engine_pm_get(ce->engine); -retry: - err = i915_gem_object_lock(src, &ww); - if (!err) - err = i915_gem_object_lock(dst, &ww); - if (!err) - err = intel_context_pin_ww(ce, &ww); - if (err) - goto out; - - err = i915_vma_pin_ww(vma[0], &ww, 0, 0, PIN_USER); - if (err) - goto out_ctx; - - err = i915_vma_pin_ww(vma[1], &ww, 0, 0, PIN_USER); - if (unlikely(err)) - goto out_unpin_src; - - batch = intel_emit_vma_copy_blt(ce, &ww, vma[0], vma[1]); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_unpin_dst; - } - - rq = i915_request_create(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_batch; - } - - err = intel_emit_vma_mark_active(batch, rq); - if (unlikely(err)) - goto out_request; - - for (i = 0; i < ARRAY_SIZE(vma); i++) { - err = move_obj_to_gpu(vma[i]->obj, rq, i); - if (unlikely(err)) - goto out_request; - } - - for (i = 0; i < ARRAY_SIZE(vma); i++) { - unsigned int flags = i ? EXEC_OBJECT_WRITE : 0; - - err = i915_vma_move_to_active(vma[i], rq, flags); - if (unlikely(err)) - goto out_request; - } - - if (rq->engine->emit_init_breadcrumb) { - err = rq->engine->emit_init_breadcrumb(rq); - if (unlikely(err)) - goto out_request; - } - - err = rq->engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - 0); - -out_request: - if (unlikely(err)) - i915_request_set_error_once(rq, err); - - i915_request_add(rq); -out_batch: - intel_emit_vma_release(ce, batch); -out_unpin_dst: - i915_vma_unpin(vma[1]); -out_unpin_src: - i915_vma_unpin(vma[0]); -out_ctx: - intel_context_unpin(ce); -out: - if (err == -EDEADLK) { - err = i915_gem_ww_ctx_backoff(&ww); - if (!err) - goto retry; - } - i915_gem_ww_ctx_fini(&ww); - intel_engine_pm_put(ce->engine); - return err; -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/i915_gem_object_blt.c" -#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h deleted file mode 100644 index 2409fdcccf0e..000000000000 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2019 Intel Corporation - */ - -#ifndef __I915_GEM_OBJECT_BLT_H__ -#define __I915_GEM_OBJECT_BLT_H__ - -#include - -#include "gt/intel_context.h" -#include "gt/intel_engine_pm.h" -#include "i915_vma.h" - -struct drm_i915_gem_object; -struct i915_gem_ww_ctx; - -struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, - struct i915_vma *vma, - struct i915_gem_ww_ctx *ww, - u32 value); - -struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, - struct i915_gem_ww_ctx *ww, - struct i915_vma *src, - struct i915_vma *dst); - -int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq); -void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma); - -int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, - struct intel_context *ce, - u32 value); - -int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, - struct drm_i915_gem_object *dst, - struct intel_context *ce); - -#endif diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c deleted file mode 100644 index 8c335d1a8406..000000000000 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c +++ /dev/null @@ -1,597 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2019 Intel Corporation - */ - -#include - -#include "gt/intel_gt.h" -#include "gt/intel_engine_user.h" - -#include "i915_selftest.h" - -#include "gem/i915_gem_context.h" -#include "selftests/igt_flush_test.h" -#include "selftests/i915_random.h" -#include "selftests/mock_drm.h" -#include "huge_gem_object.h" -#include "mock_context.h" - -static int wrap_ktime_compare(const void *A, const void *B) -{ - const ktime_t *a = A, *b = B; - - return ktime_compare(*a, *b); -} - -static int __perf_fill_blt(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - int inst = 0; - - do { - struct intel_engine_cs *engine; - ktime_t t[5]; - int pass; - int err; - - engine = intel_engine_lookup_user(i915, - I915_ENGINE_CLASS_COPY, - inst++); - if (!engine) - return 0; - - intel_engine_pm_get(engine); - for (pass = 0; pass < ARRAY_SIZE(t); pass++) { - struct intel_context *ce = engine->kernel_context; - ktime_t t0, t1; - - t0 = ktime_get(); - - err = i915_gem_object_fill_blt(obj, ce, 0); - if (err) - break; - - err = i915_gem_object_wait(obj, - I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT); - if (err) - break; - - t1 = ktime_get(); - t[pass] = ktime_sub(t1, t0); - } - intel_engine_pm_put(engine); - if (err) - return err; - - sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); - pr_info("%s: blt %zd KiB fill: %lld MiB/s\n", - engine->name, - obj->base.size >> 10, - div64_u64(mul_u32_u32(4 * obj->base.size, - 1000 * 1000 * 1000), - t[1] + 2 * t[2] + t[3]) >> 20); - } while (1); -} - -static int perf_fill_blt(void *arg) -{ - struct drm_i915_private *i915 = arg; - static const unsigned long sizes[] = { - SZ_4K, - SZ_64K, - SZ_2M, - SZ_64M - }; - int i; - - for (i = 0; i < ARRAY_SIZE(sizes); i++) { - struct drm_i915_gem_object *obj; - int err; - - obj = i915_gem_object_create_internal(i915, sizes[i]); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - err = __perf_fill_blt(obj); - i915_gem_object_put(obj); - if (err) - return err; - } - - return 0; -} - -static int __perf_copy_blt(struct drm_i915_gem_object *src, - struct drm_i915_gem_object *dst) -{ - struct drm_i915_private *i915 = to_i915(src->base.dev); - int inst = 0; - - do { - struct intel_engine_cs *engine; - ktime_t t[5]; - int pass; - int err = 0; - - engine = intel_engine_lookup_user(i915, - I915_ENGINE_CLASS_COPY, - inst++); - if (!engine) - return 0; - - intel_engine_pm_get(engine); - for (pass = 0; pass < ARRAY_SIZE(t); pass++) { - struct intel_context *ce = engine->kernel_context; - ktime_t t0, t1; - - t0 = ktime_get(); - - err = i915_gem_object_copy_blt(src, dst, ce); - if (err) - break; - - err = i915_gem_object_wait(dst, - I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT); - if (err) - break; - - t1 = ktime_get(); - t[pass] = ktime_sub(t1, t0); - } - intel_engine_pm_put(engine); - if (err) - return err; - - sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); - pr_info("%s: blt %zd KiB copy: %lld MiB/s\n", - engine->name, - src->base.size >> 10, - div64_u64(mul_u32_u32(4 * src->base.size, - 1000 * 1000 * 1000), - t[1] + 2 * t[2] + t[3]) >> 20); - } while (1); -} - -static int perf_copy_blt(void *arg) -{ - struct drm_i915_private *i915 = arg; - static const unsigned long sizes[] = { - SZ_4K, - SZ_64K, - SZ_2M, - SZ_64M - }; - int i; - - for (i = 0; i < ARRAY_SIZE(sizes); i++) { - struct drm_i915_gem_object *src, *dst; - int err; - - src = i915_gem_object_create_internal(i915, sizes[i]); - if (IS_ERR(src)) - return PTR_ERR(src); - - dst = i915_gem_object_create_internal(i915, sizes[i]); - if (IS_ERR(dst)) { - err = PTR_ERR(dst); - goto err_src; - } - - err = __perf_copy_blt(src, dst); - - i915_gem_object_put(dst); -err_src: - i915_gem_object_put(src); - if (err) - return err; - } - - return 0; -} - -struct igt_thread_arg { - struct intel_engine_cs *engine; - struct i915_gem_context *ctx; - struct file *file; - struct rnd_state prng; - unsigned int n_cpus; -}; - -static int igt_fill_blt_thread(void *arg) -{ - struct igt_thread_arg *thread = arg; - struct intel_engine_cs *engine = thread->engine; - struct rnd_state *prng = &thread->prng; - struct drm_i915_gem_object *obj; - struct i915_gem_context *ctx; - struct intel_context *ce; - unsigned int prio; - IGT_TIMEOUT(end); - u64 total, max; - int err; - - ctx = thread->ctx; - if (!ctx) { - ctx = live_context_for_engine(engine, thread->file); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); - ctx->sched.priority = prio; - } - - ce = i915_gem_context_get_engine(ctx, 0); - GEM_BUG_ON(IS_ERR(ce)); - - /* - * If we have a tiny shared address space, like for the GGTT - * then we can't be too greedy. - */ - max = ce->vm->total; - if (i915_is_ggtt(ce->vm) || thread->ctx) - max = div_u64(max, thread->n_cpus); - max >>= 4; - - total = PAGE_SIZE; - do { - /* Aim to keep the runtime under reasonable bounds! */ - const u32 max_phys_size = SZ_64K; - u32 val = prandom_u32_state(prng); - u32 phys_sz; - u32 sz; - u32 *vaddr; - u32 i; - - total = min(total, max); - sz = i915_prandom_u32_max_state(total, prng) + 1; - phys_sz = sz % max_phys_size + 1; - - sz = round_up(sz, PAGE_SIZE); - phys_sz = round_up(phys_sz, PAGE_SIZE); - phys_sz = min(phys_sz, sz); - - pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, - phys_sz, sz, val); - - obj = huge_gem_object(engine->i915, phys_sz, sz); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto err_flush; - } - - vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto err_put; - } - - /* - * Make sure the potentially async clflush does its job, if - * required. - */ - memset32(vaddr, val ^ 0xdeadbeaf, - huge_gem_object_phys_size(obj) / sizeof(u32)); - - if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) - obj->cache_dirty = true; - - err = i915_gem_object_fill_blt(obj, ce, val); - if (err) - goto err_unpin; - - err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); - if (err) - goto err_unpin; - - for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); i += 17) { - if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) - drm_clflush_virt_range(&vaddr[i], sizeof(vaddr[i])); - - if (vaddr[i] != val) { - pr_err("vaddr[%u]=%x, expected=%x\n", i, - vaddr[i], val); - err = -EINVAL; - goto err_unpin; - } - } - - i915_gem_object_unpin_map(obj); - i915_gem_object_put(obj); - - total <<= 1; - } while (!time_after(jiffies, end)); - - goto err_flush; - -err_unpin: - i915_gem_object_unpin_map(obj); -err_put: - i915_gem_object_put(obj); -err_flush: - if (err == -ENOMEM) - err = 0; - - intel_context_put(ce); - return err; -} - -static int igt_copy_blt_thread(void *arg) -{ - struct igt_thread_arg *thread = arg; - struct intel_engine_cs *engine = thread->engine; - struct rnd_state *prng = &thread->prng; - struct drm_i915_gem_object *src, *dst; - struct i915_gem_context *ctx; - struct intel_context *ce; - unsigned int prio; - IGT_TIMEOUT(end); - u64 total, max; - int err; - - ctx = thread->ctx; - if (!ctx) { - ctx = live_context_for_engine(engine, thread->file); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); - ctx->sched.priority = prio; - } - - ce = i915_gem_context_get_engine(ctx, 0); - GEM_BUG_ON(IS_ERR(ce)); - - /* - * If we have a tiny shared address space, like for the GGTT - * then we can't be too greedy. - */ - max = ce->vm->total; - if (i915_is_ggtt(ce->vm) || thread->ctx) - max = div_u64(max, thread->n_cpus); - max >>= 4; - - total = PAGE_SIZE; - do { - /* Aim to keep the runtime under reasonable bounds! */ - const u32 max_phys_size = SZ_64K; - u32 val = prandom_u32_state(prng); - u32 phys_sz; - u32 sz; - u32 *vaddr; - u32 i; - - total = min(total, max); - sz = i915_prandom_u32_max_state(total, prng) + 1; - phys_sz = sz % max_phys_size + 1; - - sz = round_up(sz, PAGE_SIZE); - phys_sz = round_up(phys_sz, PAGE_SIZE); - phys_sz = min(phys_sz, sz); - - pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, - phys_sz, sz, val); - - src = huge_gem_object(engine->i915, phys_sz, sz); - if (IS_ERR(src)) { - err = PTR_ERR(src); - goto err_flush; - } - - vaddr = i915_gem_object_pin_map_unlocked(src, I915_MAP_WB); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto err_put_src; - } - - memset32(vaddr, val, - huge_gem_object_phys_size(src) / sizeof(u32)); - - i915_gem_object_unpin_map(src); - - if (!(src->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) - src->cache_dirty = true; - - dst = huge_gem_object(engine->i915, phys_sz, sz); - if (IS_ERR(dst)) { - err = PTR_ERR(dst); - goto err_put_src; - } - - vaddr = i915_gem_object_pin_map_unlocked(dst, I915_MAP_WB); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto err_put_dst; - } - - memset32(vaddr, val ^ 0xdeadbeaf, - huge_gem_object_phys_size(dst) / sizeof(u32)); - - if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) - dst->cache_dirty = true; - - err = i915_gem_object_copy_blt(src, dst, ce); - if (err) - goto err_unpin; - - err = i915_gem_object_wait(dst, 0, MAX_SCHEDULE_TIMEOUT); - if (err) - goto err_unpin; - - for (i = 0; i < huge_gem_object_phys_size(dst) / sizeof(u32); i += 17) { - if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) - drm_clflush_virt_range(&vaddr[i], sizeof(vaddr[i])); - - if (vaddr[i] != val) { - pr_err("vaddr[%u]=%x, expected=%x\n", i, - vaddr[i], val); - err = -EINVAL; - goto err_unpin; - } - } - - i915_gem_object_unpin_map(dst); - - i915_gem_object_put(src); - i915_gem_object_put(dst); - - total <<= 1; - } while (!time_after(jiffies, end)); - - goto err_flush; - -err_unpin: - i915_gem_object_unpin_map(dst); -err_put_dst: - i915_gem_object_put(dst); -err_put_src: - i915_gem_object_put(src); -err_flush: - if (err == -ENOMEM) - err = 0; - - intel_context_put(ce); - return err; -} - -static int igt_threaded_blt(struct intel_engine_cs *engine, - int (*blt_fn)(void *arg), - unsigned int flags) -#define SINGLE_CTX BIT(0) -{ - struct igt_thread_arg *thread; - struct task_struct **tsk; - unsigned int n_cpus, i; - I915_RND_STATE(prng); - int err = 0; - - n_cpus = num_online_cpus() + 1; - - tsk = kcalloc(n_cpus, sizeof(struct task_struct *), GFP_KERNEL); - if (!tsk) - return 0; - - thread = kcalloc(n_cpus, sizeof(struct igt_thread_arg), GFP_KERNEL); - if (!thread) - goto out_tsk; - - thread[0].file = mock_file(engine->i915); - if (IS_ERR(thread[0].file)) { - err = PTR_ERR(thread[0].file); - goto out_thread; - } - - if (flags & SINGLE_CTX) { - thread[0].ctx = live_context_for_engine(engine, thread[0].file); - if (IS_ERR(thread[0].ctx)) { - err = PTR_ERR(thread[0].ctx); - goto out_file; - } - } - - for (i = 0; i < n_cpus; ++i) { - thread[i].engine = engine; - thread[i].file = thread[0].file; - thread[i].ctx = thread[0].ctx; - thread[i].n_cpus = n_cpus; - thread[i].prng = - I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng)); - - tsk[i] = kthread_run(blt_fn, &thread[i], "igt/blt-%d", i); - if (IS_ERR(tsk[i])) { - err = PTR_ERR(tsk[i]); - break; - } - - get_task_struct(tsk[i]); - } - - yield(); /* start all threads before we kthread_stop() */ - - for (i = 0; i < n_cpus; ++i) { - int status; - - if (IS_ERR_OR_NULL(tsk[i])) - continue; - - status = kthread_stop(tsk[i]); - if (status && !err) - err = status; - - put_task_struct(tsk[i]); - } - -out_file: - fput(thread[0].file); -out_thread: - kfree(thread); -out_tsk: - kfree(tsk); - return err; -} - -static int test_copy_engines(struct drm_i915_private *i915, - int (*fn)(void *arg), - unsigned int flags) -{ - struct intel_engine_cs *engine; - int ret; - - for_each_uabi_class_engine(engine, I915_ENGINE_CLASS_COPY, i915) { - ret = igt_threaded_blt(engine, fn, flags); - if (ret) - return ret; - } - - return 0; -} - -static int igt_fill_blt(void *arg) -{ - return test_copy_engines(arg, igt_fill_blt_thread, 0); -} - -static int igt_fill_blt_ctx0(void *arg) -{ - return test_copy_engines(arg, igt_fill_blt_thread, SINGLE_CTX); -} - -static int igt_copy_blt(void *arg) -{ - return test_copy_engines(arg, igt_copy_blt_thread, 0); -} - -static int igt_copy_blt_ctx0(void *arg) -{ - return test_copy_engines(arg, igt_copy_blt_thread, SINGLE_CTX); -} - -int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_fill_blt), - SUBTEST(igt_fill_blt_ctx0), - SUBTEST(igt_copy_blt), - SUBTEST(igt_copy_blt_ctx0), - }; - - if (intel_gt_is_wedged(&i915->gt)) - return 0; - - return i915_live_subtests(tests, i915); -} - -int i915_gem_object_blt_perf_selftests(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(perf_fill_blt), - SUBTEST(perf_copy_blt), - }; - - if (intel_gt_is_wedged(&i915->gt)) - return 0; - - return i915_live_subtests(tests, i915); -} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index be5e0191eaea..a68197cf1044 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -39,7 +39,6 @@ selftest(evict, i915_gem_evict_live_selftests) selftest(hugepages, i915_gem_huge_page_live_selftests) selftest(gem_contexts, i915_gem_context_live_selftests) selftest(gem_execbuf, i915_gem_execbuffer_live_selftests) -selftest(blt, i915_gem_object_blt_live_selftests) selftest(client, i915_gem_client_blt_live_selftests) selftest(reset, intel_reset_live_selftests) selftest(memory_region, intel_memory_region_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h index 5077dc3c3b8c..058450d351f7 100644 --- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h @@ -18,5 +18,4 @@ selftest(engine_cs, intel_engine_cs_perf_selftests) selftest(request, i915_request_perf_selftests) selftest(migrate, intel_migrate_perf_selftests) -selftest(blt, i915_gem_object_blt_perf_selftests) selftest(region, intel_memory_region_perf_selftests) diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index 118a66c29695..ecc3b9e6c22b 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -15,12 +15,13 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" -#include "gem/i915_gem_object_blt.h" #include "gem/selftests/igt_gem_utils.h" #include "gem/selftests/mock_context.h" +#include "gt/intel_engine_pm.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "i915_buddy.h" +#include "gt/intel_migrate.h" #include "i915_memcpy.h" #include "i915_ttm_buddy_manager.h" #include "selftests/igt_flush_test.h" @@ -808,6 +809,7 @@ static int igt_lmem_write_cpu(void *arg) PAGE_SIZE - 64, }; struct intel_engine_cs *engine; + struct i915_request *rq; u32 *vaddr; u32 sz; u32 i; @@ -834,15 +836,20 @@ static int igt_lmem_write_cpu(void *arg) goto out_put; } + i915_gem_object_lock(obj, NULL); /* Put the pages into a known state -- from the gpu for added fun */ intel_engine_pm_get(engine); - err = i915_gem_object_fill_blt(obj, engine->kernel_context, 0xdeadbeaf); - intel_engine_pm_put(engine); - if (err) - goto out_unpin; + err = intel_context_migrate_clear(engine->gt->migrate.context, NULL, + obj->mm.pages->sgl, I915_CACHE_NONE, + true, 0xdeadbeaf, &rq); + if (rq) { + dma_resv_add_excl_fence(obj->base.resv, &rq->fence); + i915_request_put(rq); + } - i915_gem_object_lock(obj, NULL); - err = i915_gem_object_set_to_wc_domain(obj, true); + intel_engine_pm_put(engine); + if (!err) + err = i915_gem_object_set_to_wc_domain(obj, true); i915_gem_object_unlock(obj); if (err) goto out_unpin; -- cgit v1.2.3 From 5cd57f676bb946a00275408f0dd0d75dbc466d25 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Tue, 15 Jun 2021 13:36:00 +0200 Subject: drm/i915: Perform execbuffer object locking as a separate step MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To help avoid evicting already resident buffers from the batch we're processing, perform locking as a separate step. Signed-off-by: Thomas Hellström Reviewed-by: Ramalingam C Reviewed-by: Maarten Lankhorst Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210615113600.30660-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 201fed19d120..394eb40c95b5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -922,21 +922,38 @@ err: return err; } -static int eb_validate_vmas(struct i915_execbuffer *eb) +static int eb_lock_vmas(struct i915_execbuffer *eb) { unsigned int i; int err; - INIT_LIST_HEAD(&eb->unbound); - for (i = 0; i < eb->buffer_count; i++) { - struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; struct eb_vma *ev = &eb->vma[i]; struct i915_vma *vma = ev->vma; err = i915_gem_object_lock(vma->obj, &eb->ww); if (err) return err; + } + + return 0; +} + +static int eb_validate_vmas(struct i915_execbuffer *eb) +{ + unsigned int i; + int err; + + INIT_LIST_HEAD(&eb->unbound); + + err = eb_lock_vmas(eb); + if (err) + return err; + + for (i = 0; i < eb->buffer_count; i++) { + struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; + struct eb_vma *ev = &eb->vma[i]; + struct i915_vma *vma = ev->vma; err = eb_pin_vma(eb, entry, ev); if (err == -EDEADLK) -- cgit v1.2.3 From 6796c772850574ec0a9adc977e9889606b23d0f4 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Tue, 15 Jun 2021 19:35:20 +0800 Subject: drm/i915: Remove duplicate include of intel_region_lmem.h Fix the following checkinclude.pl warning: drivers/gpu/drm/i915/gt/intel_region_lmem.c 8 #include "intel_region_lmem.h" 12 #include "intel_region_lmem.h" Signed-off-by: Wan Jiabing Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210615113522.6867-1-wanjiabing@vivo.com --- drivers/gpu/drm/i915/gt/intel_region_lmem.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index 4ae1f717a94c..1f43aba2e9e2 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -10,7 +10,6 @@ #include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_ttm.h" -#include "intel_region_lmem.h" static int init_fake_lmem_bar(struct intel_memory_region *mem) { -- cgit v1.2.3 From 932641f0323eec3fc42dfd303eb033f5dfa08e74 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 17 Jun 2021 14:14:23 -0700 Subject: drm/i915: extract steered reg access to common function New steering cases will be added in the follow-up patches, so prepare a common helper to avoid code duplication. Cc: Tvrtko Ursulin Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Matt Roper Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210617211425.1943662-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 41 ++--------------------- drivers/gpu/drm/i915/intel_uncore.c | 55 +++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_uncore.h | 6 ++++ 3 files changed, 63 insertions(+), 39 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index fcbaad18ac91..04c1f5b9ce71 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1112,45 +1112,8 @@ static u32 read_subslice_reg(const struct intel_engine_cs *engine, int slice, int subslice, i915_reg_t reg) { - struct drm_i915_private *i915 = engine->i915; - struct intel_uncore *uncore = engine->uncore; - u32 mcr_mask, mcr_ss, mcr, old_mcr, val; - enum forcewake_domains fw_domains; - - if (GRAPHICS_VER(i915) >= 11) { - mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; - mcr_ss = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); - } else { - mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; - mcr_ss = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); - } - - fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, - FW_REG_READ); - fw_domains |= intel_uncore_forcewake_for_reg(uncore, - GEN8_MCR_SELECTOR, - FW_REG_READ | FW_REG_WRITE); - - spin_lock_irq(&uncore->lock); - intel_uncore_forcewake_get__locked(uncore, fw_domains); - - old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR); - - mcr &= ~mcr_mask; - mcr |= mcr_ss; - intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); - - val = intel_uncore_read_fw(uncore, reg); - - mcr &= ~mcr_mask; - mcr |= old_mcr & mcr_mask; - - intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); - - intel_uncore_forcewake_put__locked(uncore, fw_domains); - spin_unlock_irq(&uncore->lock); - - return val; + return intel_uncore_read_with_mcr_steering(engine->uncore, reg, + slice, subslice); } /* NB: please notice the memset */ diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 1bed8f666048..d067524f9162 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -2277,6 +2277,61 @@ intel_uncore_forcewake_for_reg(struct intel_uncore *uncore, return fw_domains; } +u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore, + i915_reg_t reg, + int slice, int subslice) +{ + u32 mcr_mask, mcr_ss, mcr, old_mcr, val; + + lockdep_assert_held(&uncore->lock); + + if (GRAPHICS_VER(uncore->i915) >= 11) { + mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; + mcr_ss = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); + } else { + mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; + mcr_ss = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); + } + + old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR); + + mcr &= ~mcr_mask; + mcr |= mcr_ss; + intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); + + val = intel_uncore_read_fw(uncore, reg); + + mcr &= ~mcr_mask; + mcr |= old_mcr & mcr_mask; + + intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); + + return val; +} + +u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore, + i915_reg_t reg, int slice, int subslice) +{ + enum forcewake_domains fw_domains; + u32 val; + + fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, + FW_REG_READ); + fw_domains |= intel_uncore_forcewake_for_reg(uncore, + GEN8_MCR_SELECTOR, + FW_REG_READ | FW_REG_WRITE); + + spin_lock_irq(&uncore->lock); + intel_uncore_forcewake_get__locked(uncore, fw_domains); + + val = intel_uncore_read_with_mcr_steering_fw(uncore, reg, slice, subslice); + + intel_uncore_forcewake_put__locked(uncore, fw_domains); + spin_unlock_irq(&uncore->lock); + + return val; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_uncore.c" #include "selftests/intel_uncore.c" diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 59f0da8f1fbb..a18bdb57af7b 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -182,6 +182,12 @@ intel_uncore_has_fifo(const struct intel_uncore *uncore) return uncore->flags & UNCORE_HAS_FIFO; } +u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore, + i915_reg_t reg, + int slice, int subslice); +u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore, + i915_reg_t reg, int slice, int subslice); + void intel_uncore_mmio_debug_init_early(struct intel_uncore_mmio_debug *mmio_debug); void intel_uncore_init_early(struct intel_uncore *uncore, -- cgit v1.2.3 From 0957e931df647511ecaf43883a7031004036f90d Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 17 Jun 2021 14:14:24 -0700 Subject: drm/i915: Add GT support for multiple types of multicast steering Although most of our multicast registers are replicated per-subslice, we also have a small number of multicast registers that are replicated per-l3 bank instead. For both types of multicast registers we need to make sure we steer reads of these registers to a valid instance. Ideally we'd like to find a specific instance ID that would steer reads of either type of multicast register to a valid instance (i.e., not fused off and not powered down), but sometimes the combination of part-specific fusing and the additional restrictions imposed by Render Power Gating make it impossible to find any overlap between the set of valid subslices and valid l3 banks. This problem will become even more noticeable on our upcoming platforms since they will be adding additional types of multicast registers with new types of replication and rules for finding valid instances for reads. To handle this we'll continue to pick a suitable subslice instance at driver startup and program this as the default (sliceid,subsliceid) setting in the steering control register (0xFDC). In cases where we need to read another type of multicast GT register, but the default subslice steering would not correspond to a valid instance, we'll explicitly re-steer the single read to a valid value, perform the read, and then reset the steering to it's "subslice" default. This patch adds the general functionality to prepare for this explicit steering of other multicast register types. We'll plug L3 bank steering into this in the next patch, and then add additional types of multicast registers when the support for our next upcoming platform arrives. v2: - Use entry->end==0 as table terminator. (Rodrigo) - Grab forcewake in wa_list_verify() now that we're using accessors that assume forcewake is already held. v3: - Fix loop condition when iterating over steering range tables. (Rodrigo) Cc: Rodrigo Vivi Signed-off-by: Matt Roper Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210617211425.1943662-3-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 84 ++++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_gt.h | 8 +++ drivers/gpu/drm/i915/gt/intel_gt_types.h | 22 +++++++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 39 ++++++++---- drivers/gpu/drm/i915/gt/selftest_workarounds.c | 2 +- 5 files changed, 142 insertions(+), 13 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 67ef057ae918..1c7ca7a090ab 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -701,6 +701,90 @@ void intel_gt_driver_late_release(struct intel_gt *gt) intel_engines_free(gt); } +/** + * intel_gt_reg_needs_read_steering - determine whether a register read + * requires explicit steering + * @gt: GT structure + * @reg: the register to check steering requirements for + * @type: type of multicast steering to check + * + * Determines whether @reg needs explicit steering of a specific type for + * reads. + * + * Returns false if @reg does not belong to a register range of the given + * steering type, or if the default (subslice-based) steering IDs are suitable + * for @type steering too. + */ +static bool intel_gt_reg_needs_read_steering(struct intel_gt *gt, + i915_reg_t reg, + enum intel_steering_type type) +{ + const u32 offset = i915_mmio_reg_offset(reg); + const struct intel_mmio_range *entry; + + if (likely(!intel_gt_needs_read_steering(gt, type))) + return false; + + for (entry = gt->steering_table[type]; entry->end; entry++) { + if (offset >= entry->start && offset <= entry->end) + return true; + } + + return false; +} + +/** + * intel_gt_get_valid_steering - determines valid IDs for a class of MCR steering + * @gt: GT structure + * @type: multicast register type + * @sliceid: Slice ID returned + * @subsliceid: Subslice ID returned + * + * Determines sliceid and subsliceid values that will steer reads + * of a specific multicast register class to a valid value. + */ +static void intel_gt_get_valid_steering(struct intel_gt *gt, + enum intel_steering_type type, + u8 *sliceid, u8 *subsliceid) +{ + switch (type) { + default: + MISSING_CASE(type); + *sliceid = 0; + *subsliceid = 0; + } +} + +/** + * intel_gt_read_register_fw - reads a GT register with support for multicast + * @gt: GT structure + * @reg: register to read + * + * This function will read a GT register. If the register is a multicast + * register, the read will be steered to a valid instance (i.e., one that + * isn't fused off or powered down by power gating). + * + * Returns the value from a valid instance of @reg. + */ +u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg) +{ + int type; + u8 sliceid, subsliceid; + + for (type = 0; type < NUM_STEERING_TYPES; type++) { + if (intel_gt_reg_needs_read_steering(gt, reg, type)) { + intel_gt_get_valid_steering(gt, type, &sliceid, + &subsliceid); + return intel_uncore_read_with_mcr_steering_fw(gt->uncore, + reg, + sliceid, + subsliceid); + } + } + + return intel_uncore_read_fw(gt->uncore, reg); +} + void intel_gt_info_print(const struct intel_gt_info *info, struct drm_printer *p) { diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 7ec395cace69..e7aabe0cc5bf 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -75,6 +75,14 @@ static inline bool intel_gt_is_wedged(const struct intel_gt *gt) return unlikely(test_bit(I915_WEDGED, >->reset.flags)); } +static inline bool intel_gt_needs_read_steering(struct intel_gt *gt, + enum intel_steering_type type) +{ + return gt->steering_table[type]; +} + +u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg); + void intel_gt_info_print(const struct intel_gt_info *info, struct drm_printer *p); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 7450935f2ca8..cabea1966b4e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -32,6 +32,26 @@ struct i915_ggtt; struct intel_engine_cs; struct intel_uncore; +struct intel_mmio_range { + u32 start; + u32 end; +}; + +/* + * The hardware has multiple kinds of multicast register ranges that need + * special register steering (and future platforms are expected to add + * additional types). + * + * During driver startup, we initialize the steering control register to + * direct reads to a slice/subslice that are valid for the 'subslice' class + * of multicast registers. If another type of steering does not have any + * overlap in valid steering targets with 'subslice' style registers, we will + * need to explicitly re-steer reads of registers of the other type. + */ +enum intel_steering_type { + NUM_STEERING_TYPES +}; + enum intel_submission_method { INTEL_SUBMISSION_RING, INTEL_SUBMISSION_ELSP, @@ -148,6 +168,8 @@ struct intel_gt { struct intel_migrate migrate; + const struct intel_mmio_range *steering_table[NUM_STEERING_TYPES]; + struct intel_gt_info { intel_engine_mask_t engine_mask; u8 num_engines; diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 977a76e648e0..93c74d4cae02 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1248,8 +1248,9 @@ wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) } static void -wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) +wa_list_apply(struct intel_gt *gt, const struct i915_wa_list *wal) { + struct intel_uncore *uncore = gt->uncore; enum forcewake_domains fw; unsigned long flags; struct i915_wa *wa; @@ -1264,13 +1265,16 @@ wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) intel_uncore_forcewake_get__locked(uncore, fw); for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { - if (wa->clr) - intel_uncore_rmw_fw(uncore, wa->reg, wa->clr, wa->set); - else - intel_uncore_write_fw(uncore, wa->reg, wa->set); + u32 val, old = 0; + + /* open-coded rmw due to steering */ + old = wa->clr ? intel_gt_read_register_fw(gt, wa->reg) : 0; + val = (old & ~wa->clr) | wa->set; + if (val != old || !wa->clr) + intel_uncore_write_fw(uncore, wa->reg, val); + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - wa_verify(wa, - intel_uncore_read_fw(uncore, wa->reg), + wa_verify(wa, intel_gt_read_register_fw(gt, wa->reg), wal->name, "application"); } @@ -1280,28 +1284,39 @@ wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) void intel_gt_apply_workarounds(struct intel_gt *gt) { - wa_list_apply(gt->uncore, >->i915->gt_wa_list); + wa_list_apply(gt, >->i915->gt_wa_list); } -static bool wa_list_verify(struct intel_uncore *uncore, +static bool wa_list_verify(struct intel_gt *gt, const struct i915_wa_list *wal, const char *from) { + struct intel_uncore *uncore = gt->uncore; struct i915_wa *wa; + enum forcewake_domains fw; + unsigned long flags; unsigned int i; bool ok = true; + fw = wal_get_fw_for_rmw(uncore, wal); + + spin_lock_irqsave(&uncore->lock, flags); + intel_uncore_forcewake_get__locked(uncore, fw); + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) ok &= wa_verify(wa, - intel_uncore_read(uncore, wa->reg), + intel_gt_read_register_fw(gt, wa->reg), wal->name, from); + intel_uncore_forcewake_put__locked(uncore, fw); + spin_unlock_irqrestore(&uncore->lock, flags); + return ok; } bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from) { - return wa_list_verify(gt->uncore, >->i915->gt_wa_list, from); + return wa_list_verify(gt, >->i915->gt_wa_list, from); } __maybe_unused @@ -2084,7 +2099,7 @@ void intel_engine_init_workarounds(struct intel_engine_cs *engine) void intel_engine_apply_workarounds(struct intel_engine_cs *engine) { - wa_list_apply(engine->uncore, &engine->wa_list); + wa_list_apply(engine->gt, &engine->wa_list); } struct mcr_range { diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index c30754daf4b1..7ebc4edb8ecf 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -1147,7 +1147,7 @@ verify_wa_lists(struct intel_gt *gt, struct wa_lists *lists, enum intel_engine_id id; bool ok = true; - ok &= wa_list_verify(gt->uncore, &lists->gt_wa_list, str); + ok &= wa_list_verify(gt, &lists->gt_wa_list, str); for_each_engine(engine, gt, id) { struct intel_context *ce; -- cgit v1.2.3 From 3193927421554757e6bee52f9c7e3937edefc589 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 17 Jun 2021 14:14:25 -0700 Subject: drm/i915: Add support for explicit L3BANK steering Because Render Power Gating restricts us to just a single subslice as a valid steering target for reads of multicast registers in a SUBSLICE range, the default steering we setup at init may not lead to a suitable target for L3BANK multicast register. In cases where it does not, use explicit runtime steering whenever an L3BANK multicast register is read. While we're at it, let's simplify the function a little bit and drop its support for gen10/CNL since no such platforms ever materialized for real use. Multicast register steering is already an area that causes enough confusion; no need to complicate it with what's effectively dead code. v2: - Use gt->uncore instead of gt->i915->uncore. (Tvrtko) - Use {} as table terminator. (Rodrigo) v3: - L3bank fuse register is a disable mask rather than an enable mask. We need to invert it before use. (CI) v4: - L3bank ID goes in the subslice field, not the slice field. (CI) Cc: Tvrtko Ursulin Cc: Rodrigo Vivi Signed-off-by: Matt Roper Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210617211425.1943662-4-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 18 +++++++ drivers/gpu/drm/i915/gt/intel_gt_types.h | 4 ++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 84 +++++++++-------------------- 3 files changed, 46 insertions(+), 60 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 1c7ca7a090ab..e714e21c0a4d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -84,6 +84,11 @@ void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt) gt->ggtt = ggtt; } +static const struct intel_mmio_range icl_l3bank_steering_table[] = { + { 0x00B100, 0x00B3FF }, + {}, +}; + int intel_gt_init_mmio(struct intel_gt *gt) { intel_gt_init_clock_frequency(gt); @@ -91,6 +96,13 @@ int intel_gt_init_mmio(struct intel_gt *gt) intel_uc_init_mmio(>->uc); intel_sseu_info_init(gt); + if (GRAPHICS_VER(gt->i915) >= 11) { + gt->steering_table[L3BANK] = icl_l3bank_steering_table; + gt->info.l3bank_mask = + ~intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) & + GEN10_L3BANK_MASK; + } + return intel_engines_init_mmio(gt); } @@ -748,6 +760,12 @@ static void intel_gt_get_valid_steering(struct intel_gt *gt, u8 *sliceid, u8 *subsliceid) { switch (type) { + case L3BANK: + GEM_DEBUG_WARN_ON(!gt->info.l3bank_mask); /* should be impossible! */ + + *sliceid = 0; /* unused */ + *subsliceid = __ffs(gt->info.l3bank_mask); + break; default: MISSING_CASE(type); *sliceid = 0; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index cabea1966b4e..d93d578a4105 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -49,6 +49,8 @@ struct intel_mmio_range { * need to explicitly re-steer reads of registers of the other type. */ enum intel_steering_type { + L3BANK, + NUM_STEERING_TYPES }; @@ -177,6 +179,8 @@ struct intel_gt { /* Media engine access to SFC per instance */ u8 vdbox_sfc_access; + u32 l3bank_mask; + /* Slice/subslice/EU info */ struct sseu_dev_info sseu; } info; diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 93c74d4cae02..d9a5a445ceec 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -945,71 +945,37 @@ cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) } static void -wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) +icl_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) { const struct sseu_dev_info *sseu = &i915->gt.info.sseu; unsigned int slice, subslice; - u32 l3_en, mcr, mcr_mask; + u32 mcr, mcr_mask; - GEM_BUG_ON(GRAPHICS_VER(i915) < 10); + GEM_BUG_ON(GRAPHICS_VER(i915) < 11); + GEM_BUG_ON(hweight8(sseu->slice_mask) > 1); + slice = 0; /* - * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl - * L3Banks could be fused off in single slice scenario. If that is - * the case, we might need to program MCR select to a valid L3Bank - * by default, to make sure we correctly read certain registers - * later on (in the range 0xB100 - 0xB3FF). - * - * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl - * Before any MMIO read into slice/subslice specific registers, MCR - * packet control register needs to be programmed to point to any - * enabled s/ss pair. Otherwise, incorrect values will be returned. - * This means each subsequent MMIO read will be forwarded to an - * specific s/ss combination, but this is OK since these registers - * are consistent across s/ss in almost all cases. In the rare - * occasions, such as INSTDONE, where this value is dependent - * on s/ss combo, the read should be done with read_subslice_reg. - * - * Since GEN8_MCR_SELECTOR contains dual-purpose bits which select both - * to which subslice, or to which L3 bank, the respective mmio reads - * will go, we have to find a common index which works for both - * accesses. - * - * Case where we cannot find a common index fortunately should not - * happen in production hardware, so we only emit a warning instead of - * implementing something more complex that requires checking the range - * of every MMIO read. + * Although a platform may have subslices, we need to always steer + * reads to the lowest instance that isn't fused off. When Render + * Power Gating is enabled, grabbing forcewake will only power up a + * single subslice (the "minconfig") if there isn't a real workload + * that needs to be run; this means that if we steer register reads to + * one of the higher subslices, we run the risk of reading back 0's or + * random garbage. */ + subslice = __ffs(intel_sseu_get_subslices(sseu, slice)); - if (GRAPHICS_VER(i915) >= 10 && is_power_of_2(sseu->slice_mask)) { - u32 l3_fuse = - intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) & - GEN10_L3BANK_MASK; - - drm_dbg(&i915->drm, "L3 fuse = %x\n", l3_fuse); - l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse); - } else { - l3_en = ~0; - } + /* + * If the subslice we picked above also steers us to a valid L3 bank, + * then we can just rely on the default steering and won't need to + * worry about explicitly re-steering L3BANK reads later. + */ + if (i915->gt.info.l3bank_mask & BIT(subslice)) + i915->gt.steering_table[L3BANK] = NULL; - slice = fls(sseu->slice_mask) - 1; - subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice)); - if (!subslice) { - drm_warn(&i915->drm, - "No common index found between subslice mask %x and L3 bank mask %x!\n", - intel_sseu_get_subslices(sseu, slice), l3_en); - subslice = fls(l3_en); - drm_WARN_ON(&i915->drm, !subslice); - } - subslice--; - - if (GRAPHICS_VER(i915) >= 11) { - mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); - mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; - } else { - mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); - mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; - } + mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); + mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr); @@ -1019,8 +985,6 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) static void cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { - wa_init_mcr(i915, wal); - /* WaInPlaceDecompressionHang:cnl */ wa_write_or(wal, GEN9_GAMT_ECO_REG_RW_IA, @@ -1030,7 +994,7 @@ cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) static void icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { - wa_init_mcr(i915, wal); + icl_wa_init_mcr(i915, wal); /* WaInPlaceDecompressionHang:icl */ wa_write_or(wal, @@ -1112,7 +1076,7 @@ static void gen12_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { - wa_init_mcr(i915, wal); + icl_wa_init_mcr(i915, wal); /* Wa_14011060649:tgl,rkl,dg1,adls,adl-p */ wa_14011060649(i915, wal); -- cgit v1.2.3 From b07a6483839a838dc7acff570174053dd544c039 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 18 Jun 2021 15:25:15 +0200 Subject: drm/i915/ttm: Fix incorrect assumptions about ttm_bo_validate() semantics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have assumed that if the current placement was not the requested placement, but instead one of the busy placements, a TTM move would have been triggered. That is not the case. So when we initially place LMEM objects in "Limbo", (that is system placement without any pages allocated), to be able to defer clearing objects until first get_pages(), the first get_pages() would happily keep objects in system memory if that is one of the allowed placements. And since we don't yet support i915 GEM system memory from TTM, everything breaks apart. So make sure we try the requested placement first, if no eviction is needed. If that fails, retry with all allowed placements also allowing evictions. Also make sure we handle TTM failure codes correctly. Also temporarily (until we support i915 GEM system on TTM), restrict allowed placements to the requested placement to avoid things falling apart should LMEM be full. Fixes: 38f28c0695c0 ("drm/i915/ttm: Calculate the object placement at get_pages time") Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210618132515.163277-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 64 +++++++++++++++++++++++++++++++-- 1 file changed, 61 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index df46535cca47..c5deb8b7227c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -64,6 +64,33 @@ static struct ttm_placement i915_sys_placement = { .busy_placement = &sys_placement_flags, }; +static int i915_ttm_err_to_gem(int err) +{ + /* Fastpath */ + if (likely(!err)) + return 0; + + switch (err) { + case -EBUSY: + /* + * TTM likes to convert -EDEADLK to -EBUSY, and wants us to + * restart the operation, since we don't record the contending + * lock. We use -EAGAIN to restart. + */ + return -EAGAIN; + case -ENOSPC: + /* + * Memory type / region is full, and we can't evict. + * Except possibly system, that returns -ENOMEM; + */ + return -ENXIO; + default: + break; + } + + return err; +} + static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj); static enum ttm_caching @@ -522,15 +549,46 @@ static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) struct sg_table *st; struct ttm_place requested, busy[I915_TTM_MAX_PLACEMENTS]; struct ttm_placement placement; + int real_num_busy; int ret; GEM_BUG_ON(obj->mm.n_placements > I915_TTM_MAX_PLACEMENTS); /* Move to the requested placement. */ i915_ttm_placement_from_obj(obj, &requested, busy, &placement); + + /* + * For now we support LMEM only with TTM. + * TODO: Remove with system support + */ + GEM_BUG_ON(requested.mem_type < I915_PL_LMEM0 || + busy[0].mem_type < I915_PL_LMEM0); + + /* First try only the requested placement. No eviction. */ + real_num_busy = fetch_and_zero(&placement.num_busy_placement); ret = ttm_bo_validate(bo, &placement, &ctx); - if (ret) - return ret == -ENOSPC ? -ENXIO : ret; + if (ret) { + ret = i915_ttm_err_to_gem(ret); + /* + * Anything that wants to restart the operation gets to + * do that. + */ + if (ret == -EDEADLK || ret == -EINTR || ret == -ERESTARTSYS || + ret == -EAGAIN) + return ret; + + /* TODO: Remove this when we support system as TTM. */ + real_num_busy = 1; + + /* + * If the initial attempt fails, allow all accepted placements, + * evicting if necessary. + */ + placement.num_busy_placement = real_num_busy; + ret = ttm_bo_validate(bo, &placement, &ctx); + if (ret) + return i915_ttm_err_to_gem(ret); + } /* Object either has a page vector or is an iomem object */ st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj->ttm.cached_io_st; @@ -741,5 +799,5 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, obj->ttm.created = true; /* i915 wants -ENXIO when out of memory region space. */ - return (ret == -ENOSPC) ? -ENXIO : ret; + return i915_ttm_err_to_gem(ret); } -- cgit v1.2.3 From 59bd8ae7d33c83c4a81835d4e922bdae17fd8522 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 18 Jun 2021 14:31:49 +0100 Subject: drm/i915/selftests: add back the selftest() hook for the buddy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we resurrected the selftest we forgot to add back the selftest() hook, meaning the test is not currently run. References: d148738923fd ("drm/i915/ttm Initialize the ttm device and memory managers") Reported-by: kernel test robot Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20210618133150.700375-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/selftests/i915_mock_selftests.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 34e5caf38093..6759b56086fb 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -34,3 +34,4 @@ selftest(gtt, i915_gem_gtt_mock_selftests) selftest(hugepages, i915_gem_huge_page_mock_selftests) selftest(contexts, i915_gem_context_mock_selftests) selftest(memory_region, intel_memory_region_mock_selftests) +selftest(buddy, i915_buddy_mock_selftests) -- cgit v1.2.3 From 3e28d37146db5dd49c469bc62a93ca791067d391 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 17 Jun 2021 18:06:31 -0700 Subject: drm/i915: Move priolist to new i915_sched_engine object Introduce i915_sched_engine object which is lower level data structure that i915_scheduler / generic code can operate on without touching execlist specific structures. This allows additional submission backends to be added without breaking the layering. Currently the execlists backend uses 1 of these object per each engine (physical or virtual) but future backends like the GuC will point to less instances utilizing the reference counting. This is a bit of detour to integrating the i915 with the DRM scheduler but this object will still exist when the DRM scheduler lands in the i915. It will however look a bit different. It will encapsulate the drm_gpu_scheduler object plus and common variables (to the backends) related to scheduling. Regardless this is a step in the right direction. This patch starts the aforementioned transition by moving the priolist into the i915_sched_engine object. v3: (Jason Ekstrand) Update comment next to intel_engine_cs.virtual Add kernel doc (Checkpatch) Fix double the in commit message v4: (Daniele) Update comment message. Add comment about subclass field Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210618010638.98941-2-matthew.brost@intel.com --- Documentation/gpu/i915.rst | 5 ++ drivers/gpu/drm/i915/gt/intel_engine_cs.c | 14 ++-- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 4 +- drivers/gpu/drm/i915/gt/intel_engine_types.h | 32 ++------- .../gpu/drm/i915/gt/intel_execlists_submission.c | 81 ++++++++++++---------- drivers/gpu/drm/i915/gt/mock_engine.c | 9 ++- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 19 ++--- drivers/gpu/drm/i915/i915_scheduler.c | 53 ++++++++++---- drivers/gpu/drm/i915/i915_scheduler.h | 18 +++++ drivers/gpu/drm/i915/i915_scheduler_types.h | 47 +++++++++++++ 10 files changed, 192 insertions(+), 90 deletions(-) (limited to 'drivers/gpu') diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 42ce0196930a..1d5ce5676d35 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -425,6 +425,11 @@ User Batchbuffer Execution .. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c :doc: User command execution +Scheduling +---------- +.. kernel-doc:: drivers/gpu/drm/i915/i915_scheduler_types.h + :functions: i915_sched_engine + Logical Rings, Logical Ring Contexts and Execlists -------------------------------------------------- diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 04c1f5b9ce71..62a28c3c44a8 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -585,9 +585,6 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine) memset(execlists->pending, 0, sizeof(execlists->pending)); execlists->active = memset(execlists->inflight, 0, sizeof(execlists->inflight)); - - execlists->queue_priority_hint = INT_MIN; - execlists->queue = RB_ROOT_CACHED; } static void cleanup_status_page(struct intel_engine_cs *engine) @@ -714,6 +711,12 @@ static int engine_setup_common(struct intel_engine_cs *engine) goto err_status; } + engine->sched_engine = i915_sched_engine_create(ENGINE_PHYSICAL); + if (!engine->sched_engine) { + err = -ENOMEM; + goto err_sched_engine; + } + err = intel_engine_init_cmd_parser(engine); if (err) goto err_cmd_parser; @@ -737,6 +740,8 @@ static int engine_setup_common(struct intel_engine_cs *engine) return 0; err_cmd_parser: + i915_sched_engine_put(engine->sched_engine); +err_sched_engine: intel_breadcrumbs_free(engine->breadcrumbs); err_status: cleanup_status_page(engine); @@ -967,6 +972,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) GEM_BUG_ON(!list_empty(&engine->active.requests)); tasklet_kill(&engine->execlists.tasklet); /* flush the callback */ + i915_sched_engine_put(engine->sched_engine); intel_breadcrumbs_free(engine->breadcrumbs); intel_engine_fini_retire(engine); @@ -1253,7 +1259,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) intel_engine_flush_submission(engine); /* ELSP is empty, but there are ready requests? E.g. after reset */ - if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)) + if (!RB_EMPTY_ROOT(&engine->sched_engine->queue.rb_root)) return false; /* Ring stopped? */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 47f4397095e5..b6a00dd72808 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -275,12 +275,12 @@ static int __engine_park(struct intel_wakeref *wf) intel_breadcrumbs_park(engine->breadcrumbs); /* Must be reset upon idling, or we may miss the busy wakeup. */ - GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN); + GEM_BUG_ON(engine->sched_engine->queue_priority_hint != INT_MIN); if (engine->park) engine->park(engine); - engine->execlists.no_priolist = false; + engine->sched_engine->no_priolist = false; /* While gt calls i915_vma_parked(), we have to break the lock cycle */ intel_gt_pm_put_async(engine->gt); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index e113f93b3274..e41a9c3f9269 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -59,6 +59,7 @@ struct drm_i915_reg_table; struct i915_gem_context; struct i915_request; struct i915_sched_attr; +struct i915_sched_engine; struct intel_gt; struct intel_ring; struct intel_uncore; @@ -152,11 +153,6 @@ struct intel_engine_execlists { */ struct timer_list preempt; - /** - * @default_priolist: priority list for I915_PRIORITY_NORMAL - */ - struct i915_priolist default_priolist; - /** * @ccid: identifier for contexts submitted to this engine */ @@ -191,11 +187,6 @@ struct intel_engine_execlists { */ u32 reset_ccid; - /** - * @no_priolist: priority lists disabled - */ - bool no_priolist; - /** * @submit_reg: gen-specific execlist submission register * set to the ExecList Submission Port (elsp) register pre-Gen11 and to @@ -238,23 +229,10 @@ struct intel_engine_execlists { unsigned int port_mask; /** - * @queue_priority_hint: Highest pending priority. - * - * When we add requests into the queue, or adjust the priority of - * executing requests, we compute the maximum priority of those - * pending requests. We can then use this value to determine if - * we need to preempt the executing requests to service the queue. - * However, since the we may have recorded the priority of an inflight - * request we wanted to preempt but since completed, at the time of - * dequeuing the priority hint may no longer may match the highest - * available request priority. + * @virtual: Queue of requets on a virtual engine, sorted by priority. + * Each RB entry is a struct i915_priolist containing a list of requests + * of the same priority. */ - int queue_priority_hint; - - /** - * @queue: queue of requests, in priority lists - */ - struct rb_root_cached queue; struct rb_root_cached virtual; /** @@ -332,6 +310,8 @@ struct intel_engine_cs { struct list_head hold; /* ready requests, but on hold */ } active; + struct i915_sched_engine *sched_engine; + /* keep a request in reserve for a [pm] barrier under oom */ struct i915_request *request_pool; diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index fc77592d88a9..4f759559a792 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -273,11 +273,11 @@ static int effective_prio(const struct i915_request *rq) return prio; } -static int queue_prio(const struct intel_engine_execlists *execlists) +static int queue_prio(const struct i915_sched_engine *sched_engine) { struct rb_node *rb; - rb = rb_first_cached(&execlists->queue); + rb = rb_first_cached(&sched_engine->queue); if (!rb) return INT_MIN; @@ -318,7 +318,7 @@ static bool need_preempt(const struct intel_engine_cs *engine, * to preserve FIFO ordering of dependencies. */ last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1); - if (engine->execlists.queue_priority_hint <= last_prio) + if (engine->sched_engine->queue_priority_hint <= last_prio) return false; /* @@ -340,7 +340,7 @@ static bool need_preempt(const struct intel_engine_cs *engine, * context, it's priority would not exceed ELSP[0] aka last_prio. */ return max(virtual_prio(&engine->execlists), - queue_prio(&engine->execlists)) > last_prio; + queue_prio(engine->sched_engine)) > last_prio; } __maybe_unused static bool @@ -384,7 +384,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) prio = rq_prio(rq); pl = i915_sched_lookup_priolist(engine, prio); } - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->sched_engine->queue.rb_root)); list_move(&rq->sched.link, pl); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); @@ -1139,7 +1139,7 @@ static bool needs_timeslice(const struct intel_engine_cs *engine, } /* Otherwise, ELSP[0] is by itself, but may be waiting in the queue */ - if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)) { + if (!RB_EMPTY_ROOT(&engine->sched_engine->queue.rb_root)) { ENGINE_TRACE(engine, "timeslice required for queue\n"); return true; } @@ -1236,6 +1236,7 @@ static bool completed(const struct i915_request *rq) static void execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_sched_engine * const sched_engine = engine->sched_engine; struct i915_request **port = execlists->pending; struct i915_request ** const last_port = port + execlists->port_mask; struct i915_request *last, * const *active; @@ -1287,7 +1288,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) last->fence.context, last->fence.seqno, last->sched.attr.priority, - execlists->queue_priority_hint); + sched_engine->queue_priority_hint); record_preemption(execlists); /* @@ -1313,7 +1314,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) yesno(timer_expired(&execlists->timer)), last->fence.context, last->fence.seqno, rq_prio(last), - execlists->queue_priority_hint, + sched_engine->queue_priority_hint, yesno(timeslice_yield(execlists, last))); /* @@ -1384,7 +1385,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(rq->engine != &ve->base); GEM_BUG_ON(rq->context != &ve->context); - if (unlikely(rq_prio(rq) < queue_prio(execlists))) { + if (unlikely(rq_prio(rq) < queue_prio(sched_engine))) { spin_unlock(&ve->base.active.lock); break; } @@ -1405,7 +1406,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) yesno(engine != ve->siblings[0])); WRITE_ONCE(ve->request, NULL); - WRITE_ONCE(ve->base.execlists.queue_priority_hint, INT_MIN); + WRITE_ONCE(ve->base.sched_engine->queue_priority_hint, INT_MIN); rb = &ve->nodes[engine->id].rb; rb_erase_cached(rb, &execlists->virtual); @@ -1450,7 +1451,7 @@ unlock: break; } - while ((rb = rb_first_cached(&execlists->queue))) { + while ((rb = rb_first_cached(&sched_engine->queue))) { struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; @@ -1529,7 +1530,7 @@ unlock: } } - rb_erase_cached(&p->node, &execlists->queue); + rb_erase_cached(&p->node, &sched_engine->queue); i915_priolist_free(p); } done: @@ -1551,7 +1552,7 @@ done: * request triggering preemption on the next dequeue (or subsequent * interrupt for secondary ports). */ - execlists->queue_priority_hint = queue_prio(execlists); + sched_engine->queue_priority_hint = queue_prio(sched_engine); spin_unlock(&engine->active.lock); /* @@ -2123,8 +2124,8 @@ static void execlists_unhold(struct intel_engine_cs *engine, */ __execlists_unhold(rq); - if (rq_prio(rq) > engine->execlists.queue_priority_hint) { - engine->execlists.queue_priority_hint = rq_prio(rq); + if (rq_prio(rq) > engine->sched_engine->queue_priority_hint) { + engine->sched_engine->queue_priority_hint = rq_prio(rq); tasklet_hi_schedule(&engine->execlists.tasklet); } @@ -2455,12 +2456,12 @@ static void queue_request(struct intel_engine_cs *engine, static bool submit_queue(struct intel_engine_cs *engine, const struct i915_request *rq) { - struct intel_engine_execlists *execlists = &engine->execlists; + struct i915_sched_engine *sched_engine = engine->sched_engine; - if (rq_prio(rq) <= execlists->queue_priority_hint) + if (rq_prio(rq) <= sched_engine->queue_priority_hint) return false; - execlists->queue_priority_hint = rq_prio(rq); + sched_engine->queue_priority_hint = rq_prio(rq); return true; } @@ -2486,7 +2487,7 @@ static void execlists_submit_request(struct i915_request *request) } else { queue_request(engine, request); - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->sched_engine->queue.rb_root)); GEM_BUG_ON(list_empty(&request->sched.link)); if (submit_queue(engine, request)) @@ -2969,12 +2970,13 @@ static void nop_submission_tasklet(struct tasklet_struct *t) from_tasklet(engine, t, execlists.tasklet); /* The driver is wedged; don't process any more events. */ - WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN); + WRITE_ONCE(engine->sched_engine->queue_priority_hint, INT_MIN); } static void execlists_reset_cancel(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_sched_engine * const sched_engine = engine->sched_engine; struct i915_request *rq, *rn; struct rb_node *rb; unsigned long flags; @@ -3006,7 +3008,7 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) intel_engine_signal_breadcrumbs(engine); /* Flush the queued requests to the timeline list (for retiring). */ - while ((rb = rb_first_cached(&execlists->queue))) { + while ((rb = rb_first_cached(&sched_engine->queue))) { struct i915_priolist *p = to_priolist(rb); priolist_for_each_request_consume(rq, rn, p) { @@ -3016,7 +3018,7 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) } } - rb_erase_cached(&p->node, &execlists->queue); + rb_erase_cached(&p->node, &sched_engine->queue); i915_priolist_free(p); } @@ -3042,15 +3044,15 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) } i915_request_put(rq); - ve->base.execlists.queue_priority_hint = INT_MIN; + ve->base.sched_engine->queue_priority_hint = INT_MIN; } spin_unlock(&ve->base.active.lock); } /* Remaining _unready_ requests will be nop'ed when submitted */ - execlists->queue_priority_hint = INT_MIN; - execlists->queue = RB_ROOT_CACHED; + sched_engine->queue_priority_hint = INT_MIN; + sched_engine->queue = RB_ROOT_CACHED; GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet)); execlists->tasklet.callback = nop_submission_tasklet; @@ -3286,7 +3288,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) static struct list_head *virtual_queue(struct virtual_engine *ve) { - return &ve->base.execlists.default_priolist.requests; + return &ve->base.sched_engine->default_priolist.requests; } static void rcu_virtual_context_destroy(struct work_struct *wrk) @@ -3344,7 +3346,10 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk) lrc_fini(&ve->context); intel_context_fini(&ve->context); - intel_breadcrumbs_free(ve->base.breadcrumbs); + if (ve->base.breadcrumbs) + intel_breadcrumbs_free(ve->base.breadcrumbs); + if (ve->base.sched_engine) + i915_sched_engine_put(ve->base.sched_engine); intel_engine_free_request_pool(&ve->base); kfree(ve->bonds); @@ -3475,7 +3480,7 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n", rq->fence.context, rq->fence.seqno, - mask, ve->base.execlists.queue_priority_hint); + mask, ve->base.sched_engine->queue_priority_hint); return mask; } @@ -3484,7 +3489,7 @@ static void virtual_submission_tasklet(struct tasklet_struct *t) { struct virtual_engine * const ve = from_tasklet(ve, t, base.execlists.tasklet); - const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint); + const int prio = READ_ONCE(ve->base.sched_engine->queue_priority_hint); intel_engine_mask_t mask; unsigned int n; @@ -3552,7 +3557,7 @@ static void virtual_submission_tasklet(struct tasklet_struct *t) submit_engine: GEM_BUG_ON(RB_EMPTY_NODE(&node->rb)); node->prio = prio; - if (first && prio > sibling->execlists.queue_priority_hint) + if (first && prio > sibling->sched_engine->queue_priority_hint) tasklet_hi_schedule(&sibling->execlists.tasklet); unlock_engine: @@ -3588,7 +3593,7 @@ static void virtual_submit_request(struct i915_request *rq) i915_request_put(ve->request); } - ve->base.execlists.queue_priority_hint = rq_prio(rq); + ve->base.sched_engine->queue_priority_hint = rq_prio(rq); ve->request = i915_request_get(rq); GEM_BUG_ON(!list_empty(virtual_queue(ve))); @@ -3684,6 +3689,12 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, intel_engine_init_active(&ve->base, ENGINE_VIRTUAL); intel_engine_init_execlists(&ve->base); + ve->base.sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); + if (!ve->base.sched_engine) { + err = -ENOMEM; + goto err_put; + } + ve->base.cops = &virtual_context_ops; ve->base.request_alloc = execlists_request_alloc; @@ -3692,7 +3703,6 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, ve->base.bond_execute = virtual_bond_execute; INIT_LIST_HEAD(virtual_queue(ve)); - ve->base.execlists.queue_priority_hint = INT_MIN; tasklet_setup(&ve->base.execlists.tasklet, virtual_submission_tasklet); intel_context_init(&ve->context, &ve->base); @@ -3849,6 +3859,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, unsigned int max) { const struct intel_engine_execlists *execlists = &engine->execlists; + const struct i915_sched_engine *sched_engine = engine->sched_engine; struct i915_request *rq, *last; unsigned long flags; unsigned int count; @@ -3873,13 +3884,13 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, show_request(m, last, "\t\t", 0); } - if (execlists->queue_priority_hint != INT_MIN) + if (sched_engine->queue_priority_hint != INT_MIN) drm_printf(m, "\t\tQueue priority hint: %d\n", - READ_ONCE(execlists->queue_priority_hint)); + READ_ONCE(sched_engine->queue_priority_hint)); last = NULL; count = 0; - for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { + for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); priolist_for_each_request(rq, p) { diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 32589c6625e1..b1fdba13e900 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -283,6 +283,7 @@ static void mock_engine_release(struct intel_engine_cs *engine) GEM_BUG_ON(timer_pending(&mock->hw_delay)); + i915_sched_engine_put(engine->sched_engine); intel_breadcrumbs_free(engine->breadcrumbs); intel_context_unpin(engine->kernel_context); @@ -345,6 +346,10 @@ int mock_engine_init(struct intel_engine_cs *engine) { struct intel_context *ce; + engine->sched_engine = i915_sched_engine_create(ENGINE_MOCK); + if (!engine->sched_engine) + return -ENOMEM; + intel_engine_init_active(engine, ENGINE_MOCK); intel_engine_init_execlists(engine); intel_engine_init__pm(engine); @@ -352,7 +357,7 @@ int mock_engine_init(struct intel_engine_cs *engine) engine->breadcrumbs = intel_breadcrumbs_create(NULL); if (!engine->breadcrumbs) - return -ENOMEM; + goto err_schedule; ce = create_kernel_context(engine); if (IS_ERR(ce)) @@ -366,6 +371,8 @@ int mock_engine_init(struct intel_engine_cs *engine) err_breadcrumbs: intel_breadcrumbs_free(engine->breadcrumbs); +err_schedule: + i915_sched_engine_put(engine->sched_engine); return -ENOMEM; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 7c8ff9792f7b..5c5f33f40055 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -182,6 +182,7 @@ static void schedule_out(struct i915_request *rq) static void __guc_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_sched_engine * const sched_engine = engine->sched_engine; struct i915_request **first = execlists->inflight; struct i915_request ** const last_port = first + execlists->port_mask; struct i915_request *last = first[0]; @@ -204,7 +205,7 @@ static void __guc_dequeue(struct intel_engine_cs *engine) * event. */ port = first; - while ((rb = rb_first_cached(&execlists->queue))) { + while ((rb = rb_first_cached(&sched_engine->queue))) { struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; @@ -224,11 +225,11 @@ static void __guc_dequeue(struct intel_engine_cs *engine) last = rq; } - rb_erase_cached(&p->node, &execlists->queue); + rb_erase_cached(&p->node, &sched_engine->queue); i915_priolist_free(p); } done: - execlists->queue_priority_hint = + sched_engine->queue_priority_hint = rb ? to_priolist(rb)->priority : INT_MIN; if (submit) { *port = schedule_in(last, port - execlists->inflight); @@ -338,7 +339,7 @@ out_unlock: static void guc_reset_cancel(struct intel_engine_cs *engine) { - struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_sched_engine * const sched_engine = engine->sched_engine; struct i915_request *rq, *rn; struct rb_node *rb; unsigned long flags; @@ -368,7 +369,7 @@ static void guc_reset_cancel(struct intel_engine_cs *engine) } /* Flush the queued requests to the timeline list (for retiring). */ - while ((rb = rb_first_cached(&execlists->queue))) { + while ((rb = rb_first_cached(&sched_engine->queue))) { struct i915_priolist *p = to_priolist(rb); priolist_for_each_request_consume(rq, rn, p) { @@ -378,14 +379,14 @@ static void guc_reset_cancel(struct intel_engine_cs *engine) i915_request_mark_complete(rq); } - rb_erase_cached(&p->node, &execlists->queue); + rb_erase_cached(&p->node, &sched_engine->queue); i915_priolist_free(p); } /* Remaining _unready_ requests will be nop'ed when submitted */ - execlists->queue_priority_hint = INT_MIN; - execlists->queue = RB_ROOT_CACHED; + sched_engine->queue_priority_hint = INT_MIN; + sched_engine->queue = RB_ROOT_CACHED; spin_unlock_irqrestore(&engine->active.lock, flags); } @@ -514,7 +515,7 @@ static void guc_submit_request(struct i915_request *rq) queue_request(engine, rq, rq_prio(rq)); - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->sched_engine->queue.rb_root)); GEM_BUG_ON(list_empty(&rq->sched.link)); tasklet_hi_schedule(&engine->execlists.tasklet); diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index efa638c3acc7..2c31e07883ba 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -40,7 +40,7 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) return rb_entry(rb, struct i915_priolist, node); } -static void assert_priolists(struct intel_engine_execlists * const execlists) +static void assert_priolists(struct i915_sched_engine * const sched_engine) { struct rb_node *rb; long last_prio; @@ -48,11 +48,11 @@ static void assert_priolists(struct intel_engine_execlists * const execlists) if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) return; - GEM_BUG_ON(rb_first_cached(&execlists->queue) != - rb_first(&execlists->queue.rb_root)); + GEM_BUG_ON(rb_first_cached(&sched_engine->queue) != + rb_first(&sched_engine->queue.rb_root)); last_prio = INT_MAX; - for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { + for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { const struct i915_priolist *p = to_priolist(rb); GEM_BUG_ON(p->priority > last_prio); @@ -63,21 +63,21 @@ static void assert_priolists(struct intel_engine_execlists * const execlists) struct list_head * i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio) { - struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_sched_engine * const sched_engine = engine->sched_engine; struct i915_priolist *p; struct rb_node **parent, *rb; bool first = true; lockdep_assert_held(&engine->active.lock); - assert_priolists(execlists); + assert_priolists(sched_engine); - if (unlikely(execlists->no_priolist)) + if (unlikely(sched_engine->no_priolist)) prio = I915_PRIORITY_NORMAL; find_priolist: /* most positive priority is scheduled first, equal priorities fifo */ rb = NULL; - parent = &execlists->queue.rb_root.rb_node; + parent = &sched_engine->queue.rb_root.rb_node; while (*parent) { rb = *parent; p = to_priolist(rb); @@ -92,7 +92,7 @@ find_priolist: } if (prio == I915_PRIORITY_NORMAL) { - p = &execlists->default_priolist; + p = &sched_engine->default_priolist; } else { p = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC); /* Convert an allocation failure to a priority bump */ @@ -107,7 +107,7 @@ find_priolist: * requests, so if userspace lied about their * dependencies that reordering may be visible. */ - execlists->no_priolist = true; + sched_engine->no_priolist = true; goto find_priolist; } } @@ -116,7 +116,7 @@ find_priolist: INIT_LIST_HEAD(&p->requests); rb_link_node(&p->node, rb, parent); - rb_insert_color_cached(&p->node, &execlists->queue, first); + rb_insert_color_cached(&p->node, &sched_engine->queue, first); return &p->requests; } @@ -184,7 +184,7 @@ static void kick_submission(struct intel_engine_cs *engine, * We only need to kick the tasklet once for the high priority * new context we add into the queue. */ - if (prio <= engine->execlists.queue_priority_hint) + if (prio <= engine->sched_engine->queue_priority_hint) return; rcu_read_lock(); @@ -208,7 +208,7 @@ static void kick_submission(struct intel_engine_cs *engine, inflight->fence.context, inflight->fence.seqno, inflight->sched.attr.priority); - engine->execlists.queue_priority_hint = prio; + engine->sched_engine->queue_priority_hint = prio; if (need_preempt(prio, rq_prio(inflight))) tasklet_hi_schedule(&engine->execlists.tasklet); @@ -489,6 +489,33 @@ void i915_request_show_with_schedule(struct drm_printer *m, rcu_read_unlock(); } +void i915_sched_engine_free(struct kref *kref) +{ + struct i915_sched_engine *sched_engine = + container_of(kref, typeof(*sched_engine), ref); + + kfree(sched_engine); +} + +struct i915_sched_engine * +i915_sched_engine_create(unsigned int subclass) +{ + struct i915_sched_engine *sched_engine; + + sched_engine = kzalloc(sizeof(*sched_engine), GFP_KERNEL); + if (!sched_engine) + return NULL; + + kref_init(&sched_engine->ref); + + sched_engine->queue = RB_ROOT_CACHED; + sched_engine->queue_priority_hint = INT_MIN; + + /* subclass is used in a follow up patch */ + + return sched_engine; +} + static void i915_global_scheduler_shrink(void) { kmem_cache_shrink(global.slab_dependencies); diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 858a0938f47a..91a04e34cac5 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -48,6 +48,24 @@ static inline void i915_priolist_free(struct i915_priolist *p) __i915_priolist_free(p); } +struct i915_sched_engine * +i915_sched_engine_create(unsigned int subclass); + +void i915_sched_engine_free(struct kref *kref); + +static inline struct i915_sched_engine * +i915_sched_engine_get(struct i915_sched_engine *sched_engine) +{ + kref_get(&sched_engine->ref); + return sched_engine; +} + +static inline void +i915_sched_engine_put(struct i915_sched_engine *sched_engine) +{ + kref_put(&sched_engine->ref, i915_sched_engine_free); +} + void i915_request_show_with_schedule(struct drm_printer *m, const struct i915_request *rq, const char *prefix, diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index 343ed44d5ed4..4a7c9f06b40b 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -91,4 +91,51 @@ struct i915_dependency { &(rq__)->sched.signalers_list, \ signal_link) +/** + * struct i915_sched_engine - scheduler engine + * + * A schedule engine represents a submission queue with different priority + * bands. It contains all the common state (relative to the backend) to queue, + * track, and submit a request. + * + * This object at the moment is quite i915 specific but will transition into a + * container for the drm_gpu_scheduler plus a few other variables once the i915 + * is integrated with the DRM scheduler. + */ +struct i915_sched_engine { + /** + * @ref: reference count of schedule engine object + */ + struct kref ref; + + /** + * @default_priolist: priority list for I915_PRIORITY_NORMAL + */ + struct i915_priolist default_priolist; + + /** + * @queue_priority_hint: Highest pending priority. + * + * When we add requests into the queue, or adjust the priority of + * executing requests, we compute the maximum priority of those + * pending requests. We can then use this value to determine if + * we need to preempt the executing requests to service the queue. + * However, since the we may have recorded the priority of an inflight + * request we wanted to preempt but since completed, at the time of + * dequeuing the priority hint may no longer may match the highest + * available request priority. + */ + int queue_priority_hint; + + /** + * @queue: queue of requests, in priority lists + */ + struct rb_root_cached queue; + + /** + * @no_priolist: priority lists disabled + */ + bool no_priolist; +}; + #endif /* _I915_SCHEDULER_TYPES_H_ */ -- cgit v1.2.3 From 074bb195bce1c86b66e5cd809d6663039d3abf42 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 17 Jun 2021 18:06:32 -0700 Subject: drm/i915: Add i915_sched_engine_is_empty function Add wrapper function around RB tree to determine if i915_sched_engine is empty. Signed-off-by: Matthew Brost Reviewed-by: Jason Ekstrand Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210618010638.98941-3-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 +- drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 6 +++--- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 +- drivers/gpu/drm/i915/i915_scheduler.h | 6 ++++++ 4 files changed, 11 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 62a28c3c44a8..506b8f702a7f 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1259,7 +1259,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) intel_engine_flush_submission(engine); /* ELSP is empty, but there are ready requests? E.g. after reset */ - if (!RB_EMPTY_ROOT(&engine->sched_engine->queue.rb_root)) + if (!i915_sched_engine_is_empty(engine->sched_engine)) return false; /* Ring stopped? */ diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 4f759559a792..e36b0e81876a 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -384,7 +384,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) prio = rq_prio(rq); pl = i915_sched_lookup_priolist(engine, prio); } - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->sched_engine->queue.rb_root)); + GEM_BUG_ON(i915_sched_engine_is_empty(engine->sched_engine)); list_move(&rq->sched.link, pl); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); @@ -1139,7 +1139,7 @@ static bool needs_timeslice(const struct intel_engine_cs *engine, } /* Otherwise, ELSP[0] is by itself, but may be waiting in the queue */ - if (!RB_EMPTY_ROOT(&engine->sched_engine->queue.rb_root)) { + if (!i915_sched_engine_is_empty(engine->sched_engine)) { ENGINE_TRACE(engine, "timeslice required for queue\n"); return true; } @@ -2487,7 +2487,7 @@ static void execlists_submit_request(struct i915_request *request) } else { queue_request(engine, request); - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->sched_engine->queue.rb_root)); + GEM_BUG_ON(i915_sched_engine_is_empty(engine->sched_engine)); GEM_BUG_ON(list_empty(&request->sched.link)); if (submit_queue(engine, request)) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 5c5f33f40055..d65a7665b38e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -515,7 +515,7 @@ static void guc_submit_request(struct i915_request *rq) queue_request(engine, rq, rq_prio(rq)); - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->sched_engine->queue.rb_root)); + GEM_BUG_ON(i915_sched_engine_is_empty(engine->sched_engine)); GEM_BUG_ON(list_empty(&rq->sched.link)); tasklet_hi_schedule(&engine->execlists.tasklet); diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 91a04e34cac5..5bec7b3b8456 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -66,6 +66,12 @@ i915_sched_engine_put(struct i915_sched_engine *sched_engine) kref_put(&sched_engine->ref, i915_sched_engine_free); } +static inline bool +i915_sched_engine_is_empty(struct i915_sched_engine *sched_engine) +{ + return RB_EMPTY_ROOT(&sched_engine->queue.rb_root); +} + void i915_request_show_with_schedule(struct drm_printer *m, const struct i915_request *rq, const char *prefix, -- cgit v1.2.3 From c4fd7d8cc3caa614ab492e0efc8854328f72b719 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 17 Jun 2021 18:06:33 -0700 Subject: drm/i915: Reset sched_engine.no_priolist immediately after dequeue Rather than touching schedule state in the generic PM code, reset the priolist allocation when empty in the submission code. Add a wrapper function to do this and update the backends to call it in the correct place. v3: (Jason Ekstrand) Update patch commit message with a better description Signed-off-by: Matthew Brost Reviewed-by: Jason Ekstrand Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210618010638.98941-4-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 2 -- drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 ++ drivers/gpu/drm/i915/i915_scheduler.h | 7 +++++++ 4 files changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index b6a00dd72808..1f07ac4e0672 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -280,8 +280,6 @@ static int __engine_park(struct intel_wakeref *wf) if (engine->park) engine->park(engine); - engine->sched_engine->no_priolist = false; - /* While gt calls i915_vma_parked(), we have to break the lock cycle */ intel_gt_pm_put_async(engine->gt); return 0; diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index e36b0e81876a..47a43aafa39f 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -1553,6 +1553,7 @@ done: * interrupt for secondary ports). */ sched_engine->queue_priority_hint = queue_prio(sched_engine); + i915_sched_engine_reset_on_empty(sched_engine); spin_unlock(&engine->active.lock); /* diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index d65a7665b38e..9887a514a4d5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -263,6 +263,8 @@ static void guc_submission_tasklet(struct tasklet_struct *t) __guc_dequeue(engine); + i915_sched_engine_reset_on_empty(engine->sched_engine); + spin_unlock_irqrestore(&engine->active.lock, flags); } diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 5bec7b3b8456..713c38c99de9 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -72,6 +72,13 @@ i915_sched_engine_is_empty(struct i915_sched_engine *sched_engine) return RB_EMPTY_ROOT(&sched_engine->queue.rb_root); } +static inline void +i915_sched_engine_reset_on_empty(struct i915_sched_engine *sched_engine) +{ + if (i915_sched_engine_is_empty(sched_engine)) + sched_engine->no_priolist = false; +} + void i915_request_show_with_schedule(struct drm_printer *m, const struct i915_request *rq, const char *prefix, -- cgit v1.2.3 From 349a2bc5aae45f54bce1c6fd54d8d3ac2ae26611 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 17 Jun 2021 18:06:34 -0700 Subject: drm/i915: Move active tracking to i915_sched_engine Move active request tracking and its lock to i915_sched_engine. This lock is also the submission lock so having it in the i915_sched_engine is the correct place. v3: (Jason Ekstrand) Add kernel doc v6: Rebase Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210618010638.98941-5-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_engine.h | 2 - drivers/gpu/drm/i915/gt/intel_engine_cs.c | 43 +++------- drivers/gpu/drm/i915/gt/intel_engine_types.h | 6 -- .../gpu/drm/i915/gt/intel_execlists_submission.c | 98 +++++++++++----------- drivers/gpu/drm/i915/gt/intel_ring_submission.c | 12 +-- drivers/gpu/drm/i915/gt/mock_engine.c | 7 +- drivers/gpu/drm/i915/gt/selftest_execlists.c | 4 +- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 20 ++--- drivers/gpu/drm/i915/i915_gpu_error.c | 4 +- drivers/gpu/drm/i915/i915_request.c | 32 +++---- drivers/gpu/drm/i915/i915_request.h | 2 +- drivers/gpu/drm/i915/i915_scheduler.c | 30 +++++-- drivers/gpu/drm/i915/i915_scheduler_types.h | 16 ++++ 13 files changed, 140 insertions(+), 136 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 62f7440bc111..45ee08fc40a1 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -269,8 +269,6 @@ intel_engine_create_pinned_context(struct intel_engine_cs *engine, void intel_engine_destroy_pinned_context(struct intel_context *ce); -void intel_engine_init_active(struct intel_engine_cs *engine, - unsigned int subclass); #define ENGINE_PHYSICAL 0 #define ENGINE_MOCK 1 #define ENGINE_VIRTUAL 2 diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 506b8f702a7f..530bd3baf9d7 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -721,7 +721,6 @@ static int engine_setup_common(struct intel_engine_cs *engine) if (err) goto err_cmd_parser; - intel_engine_init_active(engine, ENGINE_PHYSICAL); intel_engine_init_execlists(engine); intel_engine_init__pm(engine); intel_engine_init_retire(engine); @@ -780,11 +779,11 @@ static int measure_breadcrumb_dw(struct intel_context *ce) frame->rq.ring = &frame->ring; mutex_lock(&ce->timeline->mutex); - spin_lock_irq(&engine->active.lock); + spin_lock_irq(&engine->sched_engine->lock); dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs; - spin_unlock_irq(&engine->active.lock); + spin_unlock_irq(&engine->sched_engine->lock); mutex_unlock(&ce->timeline->mutex); GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */ @@ -793,28 +792,6 @@ static int measure_breadcrumb_dw(struct intel_context *ce) return dw; } -void -intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) -{ - INIT_LIST_HEAD(&engine->active.requests); - INIT_LIST_HEAD(&engine->active.hold); - - spin_lock_init(&engine->active.lock); - lockdep_set_subclass(&engine->active.lock, subclass); - - /* - * Due to an interesting quirk in lockdep's internal debug tracking, - * after setting a subclass we must ensure the lock is used. Otherwise, - * nr_unused_locks is incremented once too often. - */ -#ifdef CONFIG_DEBUG_LOCK_ALLOC - local_irq_disable(); - lock_map_acquire(&engine->active.lock.dep_map); - lock_map_release(&engine->active.lock.dep_map); - local_irq_enable(); -#endif -} - struct intel_context * intel_engine_create_pinned_context(struct intel_engine_cs *engine, struct i915_address_space *vm, @@ -969,7 +946,7 @@ int intel_engines_init(struct intel_gt *gt) */ void intel_engine_cleanup_common(struct intel_engine_cs *engine) { - GEM_BUG_ON(!list_empty(&engine->active.requests)); + GEM_BUG_ON(!list_empty(&engine->sched_engine->requests)); tasklet_kill(&engine->execlists.tasklet); /* flush the callback */ i915_sched_engine_put(engine->sched_engine); @@ -1325,7 +1302,7 @@ static struct intel_timeline *get_timeline(struct i915_request *rq) struct intel_timeline *tl; /* - * Even though we are holding the engine->active.lock here, there + * Even though we are holding the engine->sched_engine->lock here, there * is no control over the submission queue per-se and we are * inspecting the active state at a random point in time, with an * unknown queue. Play safe and make sure the timeline remains valid. @@ -1672,7 +1649,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tRequests:\n"); - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); rq = intel_engine_find_active_request(engine); if (rq) { struct intel_timeline *tl = get_timeline(rq); @@ -1703,8 +1680,9 @@ void intel_engine_dump(struct intel_engine_cs *engine, hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE); } } - drm_printf(m, "\tOn hold?: %lu\n", list_count(&engine->active.hold)); - spin_unlock_irqrestore(&engine->active.lock, flags); + drm_printf(m, "\tOn hold?: %lu\n", + list_count(&engine->sched_engine->hold)); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); drm_printf(m, "\tMMIO base: 0x%08x\n", engine->mmio_base); wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm); @@ -1784,7 +1762,7 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) * At all other times, we must assume the GPU is still running, but * we only care about the snapshot of this moment. */ - lockdep_assert_held(&engine->active.lock); + lockdep_assert_held(&engine->sched_engine->lock); rcu_read_lock(); request = execlists_active(&engine->execlists); @@ -1802,7 +1780,8 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) if (active) return active; - list_for_each_entry(request, &engine->active.requests, sched.link) { + list_for_each_entry(request, &engine->sched_engine->requests, + sched.link) { if (__i915_request_is_complete(request)) continue; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index e41a9c3f9269..5e0f39d202ef 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -304,12 +304,6 @@ struct intel_engine_cs { struct intel_sseu sseu; - struct { - spinlock_t lock; - struct list_head requests; - struct list_head hold; /* ready requests, but on hold */ - } active; - struct i915_sched_engine *sched_engine; /* keep a request in reserve for a [pm] barrier under oom */ diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 47a43aafa39f..0591698573e8 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -325,7 +325,7 @@ static bool need_preempt(const struct intel_engine_cs *engine, * Check against the first request in ELSP[1], it will, thanks to the * power of PI, be the highest priority of that context. */ - if (!list_is_last(&rq->sched.link, &engine->active.requests) && + if (!list_is_last(&rq->sched.link, &engine->sched_engine->requests) && rq_prio(list_next_entry(rq, sched.link)) > last_prio) return true; @@ -367,10 +367,10 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) struct list_head *pl; int prio = I915_PRIORITY_INVALID; - lockdep_assert_held(&engine->active.lock); + lockdep_assert_held(&engine->sched_engine->lock); list_for_each_entry_safe_reverse(rq, rn, - &engine->active.requests, + &engine->sched_engine->requests, sched.link) { if (__i915_request_is_complete(rq)) { list_del_init(&rq->sched.link); @@ -534,13 +534,13 @@ resubmit_virtual_request(struct i915_request *rq, struct virtual_engine *ve) { struct intel_engine_cs *engine = rq->engine; - spin_lock_irq(&engine->active.lock); + spin_lock_irq(&engine->sched_engine->lock); clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); WRITE_ONCE(rq->engine, &ve->base); ve->base.submit_request(rq); - spin_unlock_irq(&engine->active.lock); + spin_unlock_irq(&engine->sched_engine->lock); } static void kick_siblings(struct i915_request *rq, struct intel_context *ce) @@ -579,7 +579,7 @@ static void __execlists_schedule_out(struct i915_request * const rq, unsigned int ccid; /* - * NB process_csb() is not under the engine->active.lock and hence + * NB process_csb() is not under the engine->sched_engine->lock and hence * schedule_out can race with schedule_in meaning that we should * refrain from doing non-trivial work here. */ @@ -1133,7 +1133,8 @@ static bool needs_timeslice(const struct intel_engine_cs *engine, return false; /* If ELSP[1] is occupied, always check to see if worth slicing */ - if (!list_is_last_rcu(&rq->sched.link, &engine->active.requests)) { + if (!list_is_last_rcu(&rq->sched.link, + &engine->sched_engine->requests)) { ENGINE_TRACE(engine, "timeslice required for second inflight context\n"); return true; } @@ -1266,7 +1267,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * and context switches) submission. */ - spin_lock(&engine->active.lock); + spin_lock(&sched_engine->lock); /* * If the queue is higher priority than the last @@ -1366,7 +1367,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * Even if ELSP[1] is occupied and not worthy * of timeslices, our queue might be. */ - spin_unlock(&engine->active.lock); + spin_unlock(&sched_engine->lock); return; } } @@ -1376,7 +1377,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) while ((ve = first_virtual_engine(engine))) { struct i915_request *rq; - spin_lock(&ve->base.active.lock); + spin_lock(&ve->base.sched_engine->lock); rq = ve->request; if (unlikely(!virtual_matches(ve, rq, engine))) @@ -1386,13 +1387,13 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(rq->context != &ve->context); if (unlikely(rq_prio(rq) < queue_prio(sched_engine))) { - spin_unlock(&ve->base.active.lock); + spin_unlock(&ve->base.sched_engine->lock); break; } if (last && !can_merge_rq(last, rq)) { - spin_unlock(&ve->base.active.lock); - spin_unlock(&engine->active.lock); + spin_unlock(&ve->base.sched_engine->lock); + spin_unlock(&engine->sched_engine->lock); return; /* leave this for another sibling */ } @@ -1438,7 +1439,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) i915_request_put(rq); unlock: - spin_unlock(&ve->base.active.lock); + spin_unlock(&ve->base.sched_engine->lock); /* * Hmm, we have a bunch of virtual engine requests, @@ -1554,7 +1555,7 @@ done: */ sched_engine->queue_priority_hint = queue_prio(sched_engine); i915_sched_engine_reset_on_empty(sched_engine); - spin_unlock(&engine->active.lock); + spin_unlock(&sched_engine->lock); /* * We can skip poking the HW if we ended up with exactly the same set @@ -1981,7 +1982,8 @@ static void __execlists_hold(struct i915_request *rq) __i915_request_unsubmit(rq); clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); - list_move_tail(&rq->sched.link, &rq->engine->active.hold); + list_move_tail(&rq->sched.link, + &rq->engine->sched_engine->hold); i915_request_set_hold(rq); RQ_TRACE(rq, "on hold\n"); @@ -2018,7 +2020,7 @@ static bool execlists_hold(struct intel_engine_cs *engine, if (i915_request_on_hold(rq)) return false; - spin_lock_irq(&engine->active.lock); + spin_lock_irq(&engine->sched_engine->lock); if (__i915_request_is_complete(rq)) { /* too late! */ rq = NULL; @@ -2034,10 +2036,10 @@ static bool execlists_hold(struct intel_engine_cs *engine, GEM_BUG_ON(i915_request_on_hold(rq)); GEM_BUG_ON(rq->engine != engine); __execlists_hold(rq); - GEM_BUG_ON(list_empty(&engine->active.hold)); + GEM_BUG_ON(list_empty(&engine->sched_engine->hold)); unlock: - spin_unlock_irq(&engine->active.lock); + spin_unlock_irq(&engine->sched_engine->lock); return rq; } @@ -2117,7 +2119,7 @@ static void __execlists_unhold(struct i915_request *rq) static void execlists_unhold(struct intel_engine_cs *engine, struct i915_request *rq) { - spin_lock_irq(&engine->active.lock); + spin_lock_irq(&engine->sched_engine->lock); /* * Move this request back to the priority queue, and all of its @@ -2130,7 +2132,7 @@ static void execlists_unhold(struct intel_engine_cs *engine, tasklet_hi_schedule(&engine->execlists.tasklet); } - spin_unlock_irq(&engine->active.lock); + spin_unlock_irq(&engine->sched_engine->lock); } struct execlists_capture { @@ -2260,13 +2262,13 @@ static void execlists_capture(struct intel_engine_cs *engine) if (!cap) return; - spin_lock_irq(&engine->active.lock); + spin_lock_irq(&engine->sched_engine->lock); cap->rq = active_context(engine, active_ccid(engine)); if (cap->rq) { cap->rq = active_request(cap->rq->context->timeline, cap->rq); cap->rq = i915_request_get_rcu(cap->rq); } - spin_unlock_irq(&engine->active.lock); + spin_unlock_irq(&engine->sched_engine->lock); if (!cap->rq) goto err_free; @@ -2470,7 +2472,7 @@ static bool ancestor_on_hold(const struct intel_engine_cs *engine, const struct i915_request *rq) { GEM_BUG_ON(i915_request_on_hold(rq)); - return !list_empty(&engine->active.hold) && hold_request(rq); + return !list_empty(&engine->sched_engine->hold) && hold_request(rq); } static void execlists_submit_request(struct i915_request *request) @@ -2479,11 +2481,12 @@ static void execlists_submit_request(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); if (unlikely(ancestor_on_hold(engine, request))) { RQ_TRACE(request, "ancestor on hold\n"); - list_add_tail(&request->sched.link, &engine->active.hold); + list_add_tail(&request->sched.link, + &engine->sched_engine->hold); i915_request_set_hold(request); } else { queue_request(engine, request); @@ -2495,7 +2498,7 @@ static void execlists_submit_request(struct i915_request *request) __execlists_kick(&engine->execlists); } - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); } static int @@ -2959,9 +2962,9 @@ static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled) /* Push back any incomplete requests for replay after the reset. */ rcu_read_lock(); - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); __unwind_incomplete_requests(engine); - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); rcu_read_unlock(); } @@ -3001,10 +3004,10 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) execlists_reset_csb(engine, true); rcu_read_lock(); - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); /* Mark all executing requests as skipped. */ - list_for_each_entry(rq, &engine->active.requests, sched.link) + list_for_each_entry(rq, &engine->sched_engine->requests, sched.link) i915_request_put(i915_request_mark_eio(rq)); intel_engine_signal_breadcrumbs(engine); @@ -3024,7 +3027,7 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) } /* On-hold requests will be flushed to timeline upon their release */ - list_for_each_entry(rq, &engine->active.hold, sched.link) + list_for_each_entry(rq, &sched_engine->hold, sched.link) i915_request_put(i915_request_mark_eio(rq)); /* Cancel all attached virtual engines */ @@ -3035,7 +3038,7 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) rb_erase_cached(rb, &execlists->virtual); RB_CLEAR_NODE(rb); - spin_lock(&ve->base.active.lock); + spin_lock(&ve->base.sched_engine->lock); rq = fetch_and_zero(&ve->request); if (rq) { if (i915_request_mark_eio(rq)) { @@ -3047,7 +3050,7 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) ve->base.sched_engine->queue_priority_hint = INT_MIN; } - spin_unlock(&ve->base.active.lock); + spin_unlock(&ve->base.sched_engine->lock); } /* Remaining _unready_ requests will be nop'ed when submitted */ @@ -3058,7 +3061,7 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet)); execlists->tasklet.callback = nop_submission_tasklet; - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); rcu_read_unlock(); } @@ -3304,7 +3307,7 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk) if (unlikely(ve->request)) { struct i915_request *old; - spin_lock_irq(&ve->base.active.lock); + spin_lock_irq(&ve->base.sched_engine->lock); old = fetch_and_zero(&ve->request); if (old) { @@ -3313,7 +3316,7 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk) i915_request_put(old); } - spin_unlock_irq(&ve->base.active.lock); + spin_unlock_irq(&ve->base.sched_engine->lock); } /* @@ -3333,13 +3336,13 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk) if (RB_EMPTY_NODE(node)) continue; - spin_lock_irq(&sibling->active.lock); + spin_lock_irq(&sibling->sched_engine->lock); /* Detachment is lazily performed in the execlists tasklet */ if (!RB_EMPTY_NODE(node)) rb_erase_cached(node, &sibling->execlists.virtual); - spin_unlock_irq(&sibling->active.lock); + spin_unlock_irq(&sibling->sched_engine->lock); } GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet)); GEM_BUG_ON(!list_empty(virtual_queue(ve))); @@ -3509,7 +3512,7 @@ static void virtual_submission_tasklet(struct tasklet_struct *t) if (!READ_ONCE(ve->request)) break; /* already handled by a sibling's tasklet */ - spin_lock_irq(&sibling->active.lock); + spin_lock_irq(&sibling->sched_engine->lock); if (unlikely(!(mask & sibling->mask))) { if (!RB_EMPTY_NODE(&node->rb)) { @@ -3562,7 +3565,7 @@ submit_engine: tasklet_hi_schedule(&sibling->execlists.tasklet); unlock_engine: - spin_unlock_irq(&sibling->active.lock); + spin_unlock_irq(&sibling->sched_engine->lock); if (intel_context_inflight(&ve->context)) break; @@ -3580,7 +3583,7 @@ static void virtual_submit_request(struct i915_request *rq) GEM_BUG_ON(ve->base.submit_request != virtual_submit_request); - spin_lock_irqsave(&ve->base.active.lock, flags); + spin_lock_irqsave(&ve->base.sched_engine->lock, flags); /* By the time we resubmit a request, it may be completed */ if (__i915_request_is_complete(rq)) { @@ -3603,7 +3606,7 @@ static void virtual_submit_request(struct i915_request *rq) tasklet_hi_schedule(&ve->base.execlists.tasklet); unlock: - spin_unlock_irqrestore(&ve->base.active.lock, flags); + spin_unlock_irqrestore(&ve->base.sched_engine->lock, flags); } static struct ve_bond * @@ -3687,7 +3690,6 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); - intel_engine_init_active(&ve->base, ENGINE_VIRTUAL); intel_engine_init_execlists(&ve->base); ve->base.sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); @@ -3860,17 +3862,17 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, unsigned int max) { const struct intel_engine_execlists *execlists = &engine->execlists; - const struct i915_sched_engine *sched_engine = engine->sched_engine; + struct i915_sched_engine *sched_engine = engine->sched_engine; struct i915_request *rq, *last; unsigned long flags; unsigned int count; struct rb_node *rb; - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&sched_engine->lock, flags); last = NULL; count = 0; - list_for_each_entry(rq, &engine->active.requests, sched.link) { + list_for_each_entry(rq, &sched_engine->requests, sched.link) { if (count++ < max - 1) show_request(m, rq, "\t\t", 0); else @@ -3933,7 +3935,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, show_request(m, last, "\t\t", 0); } - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&sched_engine->lock, flags); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 0c423f096e2b..5d42a12ef3d6 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -339,9 +339,9 @@ static void reset_rewind(struct intel_engine_cs *engine, bool stalled) u32 head; rq = NULL; - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); rcu_read_lock(); - list_for_each_entry(pos, &engine->active.requests, sched.link) { + list_for_each_entry(pos, &engine->sched_engine->requests, sched.link) { if (!__i915_request_is_complete(pos)) { rq = pos; break; @@ -396,7 +396,7 @@ static void reset_rewind(struct intel_engine_cs *engine, bool stalled) } engine->legacy.ring->head = intel_ring_wrap(engine->legacy.ring, head); - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); } static void reset_finish(struct intel_engine_cs *engine) @@ -408,16 +408,16 @@ static void reset_cancel(struct intel_engine_cs *engine) struct i915_request *request; unsigned long flags; - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); /* Mark all submitted requests as skipped. */ - list_for_each_entry(request, &engine->active.requests, sched.link) + list_for_each_entry(request, &engine->sched_engine->requests, sched.link) i915_request_put(i915_request_mark_eio(request)); intel_engine_signal_breadcrumbs(engine); /* Remaining _unready_ requests will be nop'ed when submitted */ - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); } static void i9xx_submit_request(struct i915_request *request) diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index b1fdba13e900..a49fd3039f13 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -253,10 +253,10 @@ static void mock_reset_cancel(struct intel_engine_cs *engine) del_timer_sync(&mock->hw_delay); - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); /* Mark all submitted requests as skipped. */ - list_for_each_entry(rq, &engine->active.requests, sched.link) + list_for_each_entry(rq, &engine->sched_engine->requests, sched.link) i915_request_put(i915_request_mark_eio(rq)); intel_engine_signal_breadcrumbs(engine); @@ -269,7 +269,7 @@ static void mock_reset_cancel(struct intel_engine_cs *engine) } INIT_LIST_HEAD(&mock->hw_queue); - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); } static void mock_reset_finish(struct intel_engine_cs *engine) @@ -350,7 +350,6 @@ int mock_engine_init(struct intel_engine_cs *engine) if (!engine->sched_engine) return -ENOMEM; - intel_engine_init_active(engine, ENGINE_MOCK); intel_engine_init_execlists(engine); intel_engine_init__pm(engine); intel_engine_init_retire(engine); diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 1c8108d30b85..da694bad4683 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -4610,9 +4610,9 @@ static int reset_virtual_engine(struct intel_gt *gt, GEM_BUG_ON(execlists_active(&engine->execlists) != rq); /* Fake a preemption event; failed of course */ - spin_lock_irq(&engine->active.lock); + spin_lock_irq(&engine->sched_engine->lock); __unwind_incomplete_requests(engine); - spin_unlock_irq(&engine->active.lock); + spin_unlock_irq(&engine->sched_engine->lock); GEM_BUG_ON(rq->engine != engine); /* Reset the engine while keeping our active request on hold */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 9887a514a4d5..6b8f6544d91a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -190,7 +190,7 @@ static void __guc_dequeue(struct intel_engine_cs *engine) bool submit = false; struct rb_node *rb; - lockdep_assert_held(&engine->active.lock); + lockdep_assert_held(&sched_engine->lock); if (last) { if (*++first) @@ -247,7 +247,7 @@ static void guc_submission_tasklet(struct tasklet_struct *t) struct i915_request **port, *rq; unsigned long flags; - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); for (port = execlists->inflight; (rq = *port); port++) { if (!i915_request_completed(rq)) @@ -265,7 +265,7 @@ static void guc_submission_tasklet(struct tasklet_struct *t) i915_sched_engine_reset_on_empty(engine->sched_engine); - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); } static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir) @@ -322,7 +322,7 @@ static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled) struct i915_request *rq; unsigned long flags; - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); /* Push back any incomplete requests for replay after the reset. */ rq = execlists_unwind_incomplete_requests(execlists); @@ -336,7 +336,7 @@ static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled) guc_reset_state(rq->context, engine, rq->head, stalled); out_unlock: - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); } static void guc_reset_cancel(struct intel_engine_cs *engine) @@ -362,10 +362,10 @@ static void guc_reset_cancel(struct intel_engine_cs *engine) * submission's irq state, we also wish to remind ourselves that * it is irq state.) */ - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&sched_engine->lock, flags); /* Mark all executing requests as skipped. */ - list_for_each_entry(rq, &engine->active.requests, sched.link) { + list_for_each_entry(rq, &sched_engine->requests, sched.link) { i915_request_set_error_once(rq, -EIO); i915_request_mark_complete(rq); } @@ -390,7 +390,7 @@ static void guc_reset_cancel(struct intel_engine_cs *engine) sched_engine->queue_priority_hint = INT_MIN; sched_engine->queue = RB_ROOT_CACHED; - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&sched_engine->lock, flags); } static void guc_reset_finish(struct intel_engine_cs *engine) @@ -513,7 +513,7 @@ static void guc_submit_request(struct i915_request *rq) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); queue_request(engine, rq, rq_prio(rq)); @@ -522,7 +522,7 @@ static void guc_submit_request(struct i915_request *rq) tasklet_hi_schedule(&engine->execlists.tasklet); - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); } static void sanitize_hwsp(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 35c97c39f125..cb182c6d265a 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1436,12 +1436,12 @@ capture_engine(struct intel_engine_cs *engine, if (!ee) return NULL; - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); rq = intel_engine_find_active_request(engine); if (rq) capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL); - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); if (!capture) { kfree(ee); return NULL; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 1014c71cf7f5..bd58198735dc 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -272,11 +272,11 @@ i915_request_active_engine(struct i915_request *rq, * check that we have acquired the lock on the final engine. */ locked = READ_ONCE(rq->engine); - spin_lock_irq(&locked->active.lock); + spin_lock_irq(&locked->sched_engine->lock); while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { - spin_unlock(&locked->active.lock); + spin_unlock(&locked->sched_engine->lock); locked = engine; - spin_lock(&locked->active.lock); + spin_lock(&locked->sched_engine->lock); } if (i915_request_is_active(rq)) { @@ -285,7 +285,7 @@ i915_request_active_engine(struct i915_request *rq, ret = true; } - spin_unlock_irq(&locked->active.lock); + spin_unlock_irq(&locked->sched_engine->lock); return ret; } @@ -302,10 +302,10 @@ static void remove_from_engine(struct i915_request *rq) * check that the rq still belongs to the newly locked engine. */ locked = READ_ONCE(rq->engine); - spin_lock_irq(&locked->active.lock); + spin_lock_irq(&locked->sched_engine->lock); while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { - spin_unlock(&locked->active.lock); - spin_lock(&engine->active.lock); + spin_unlock(&locked->sched_engine->lock); + spin_lock(&engine->sched_engine->lock); locked = engine; } list_del_init(&rq->sched.link); @@ -316,7 +316,7 @@ static void remove_from_engine(struct i915_request *rq) /* Prevent further __await_execution() registering a cb, then flush */ set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); - spin_unlock_irq(&locked->active.lock); + spin_unlock_irq(&locked->sched_engine->lock); __notify_execute_cb_imm(rq); } @@ -637,7 +637,7 @@ bool __i915_request_submit(struct i915_request *request) RQ_TRACE(request, "\n"); GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->active.lock); + lockdep_assert_held(&engine->sched_engine->lock); /* * With the advent of preempt-to-busy, we frequently encounter @@ -649,7 +649,7 @@ bool __i915_request_submit(struct i915_request *request) * * We must remove the request from the caller's priority queue, * and the caller must only call us when the request is in their - * priority queue, under the active.lock. This ensures that the + * priority queue, under the sched_engine->lock. This ensures that the * request has *not* yet been retired and we can safely move * the request into the engine->active.list where it will be * dropped upon retiring. (Otherwise if resubmit a *retired* @@ -694,7 +694,7 @@ bool __i915_request_submit(struct i915_request *request) result = true; GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); - list_move_tail(&request->sched.link, &engine->active.requests); + list_move_tail(&request->sched.link, &engine->sched_engine->requests); active: clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); @@ -724,11 +724,11 @@ void i915_request_submit(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); __i915_request_submit(request); - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); } void __i915_request_unsubmit(struct i915_request *request) @@ -742,7 +742,7 @@ void __i915_request_unsubmit(struct i915_request *request) RQ_TRACE(request, "\n"); GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->active.lock); + lockdep_assert_held(&engine->sched_engine->lock); /* * Before we remove this breadcrumb from the signal list, we have @@ -775,11 +775,11 @@ void i915_request_unsubmit(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); __i915_request_unsubmit(request); - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); } static void __cancel_request(struct i915_request *rq) diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 270f6cd37650..239964bec1fa 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -613,7 +613,7 @@ i915_request_active_timeline(const struct i915_request *rq) * this submission. */ return rcu_dereference_protected(rq->timeline, - lockdep_is_held(&rq->engine->active.lock)); + lockdep_is_held(&rq->engine->sched_engine->lock)); } static inline u32 diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 2c31e07883ba..4bc6969f6a97 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -68,7 +68,7 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio) struct rb_node **parent, *rb; bool first = true; - lockdep_assert_held(&engine->active.lock); + lockdep_assert_held(&engine->sched_engine->lock); assert_priolists(sched_engine); if (unlikely(sched_engine->no_priolist)) @@ -147,9 +147,9 @@ sched_lock_engine(const struct i915_sched_node *node, * check that the rq still belongs to the newly locked engine. */ while (locked != (engine = READ_ONCE(rq->engine))) { - spin_unlock(&locked->active.lock); + spin_unlock(&locked->sched_engine->lock); memset(cache, 0, sizeof(*cache)); - spin_lock(&engine->active.lock); + spin_lock(&engine->sched_engine->lock); locked = engine; } @@ -296,7 +296,7 @@ static void __i915_schedule(struct i915_sched_node *node, memset(&cache, 0, sizeof(cache)); engine = node_to_request(node)->engine; - spin_lock(&engine->active.lock); + spin_lock(&engine->sched_engine->lock); /* Fifo and depth-first replacement ensure our deps execute before us */ engine = sched_lock_engine(node, engine, &cache); @@ -305,7 +305,7 @@ static void __i915_schedule(struct i915_sched_node *node, node = dep->signaler; engine = sched_lock_engine(node, engine, &cache); - lockdep_assert_held(&engine->active.lock); + lockdep_assert_held(&engine->sched_engine->lock); /* Recheck after acquiring the engine->timeline.lock */ if (prio <= node->attr.priority || node_signaled(node)) @@ -338,7 +338,7 @@ static void __i915_schedule(struct i915_sched_node *node, kick_submission(engine, node_to_request(node), prio); } - spin_unlock(&engine->active.lock); + spin_unlock(&engine->sched_engine->lock); } void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) @@ -511,7 +511,23 @@ i915_sched_engine_create(unsigned int subclass) sched_engine->queue = RB_ROOT_CACHED; sched_engine->queue_priority_hint = INT_MIN; - /* subclass is used in a follow up patch */ + INIT_LIST_HEAD(&sched_engine->requests); + INIT_LIST_HEAD(&sched_engine->hold); + + spin_lock_init(&sched_engine->lock); + lockdep_set_subclass(&sched_engine->lock, subclass); + + /* + * Due to an interesting quirk in lockdep's internal debug tracking, + * after setting a subclass we must ensure the lock is used. Otherwise, + * nr_unused_locks is incremented once too often. + */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + local_irq_disable(); + lock_map_acquire(&sched_engine->lock.dep_map); + lock_map_release(&sched_engine->lock.dep_map); + local_irq_enable(); +#endif return sched_engine; } diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index 4a7c9f06b40b..74e3bb2b2a65 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -108,6 +108,22 @@ struct i915_sched_engine { */ struct kref ref; + /** + * @lock: protects requests in priority lists, requests, hold and + * tasklet while running + */ + spinlock_t lock; + + /** + * @requests: list of requests inflight on this schedule engine + */ + struct list_head requests; + + /** + * @hold: list of ready requests, but on hold + */ + struct list_head hold; + /** * @default_priolist: priority list for I915_PRIORITY_NORMAL */ -- cgit v1.2.3 From 3f623e06cd56573d57660ce02d63aaf0a09d3fbb Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 17 Jun 2021 18:06:35 -0700 Subject: drm/i915: Move engine->schedule to i915_sched_engine The schedule function should be in the schedule object. v3: (Jason Ekstrand) Add kernel doc Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210618010638.98941-6-matthew.brost@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_wait.c | 4 ++-- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 3 --- drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c | 4 ++-- drivers/gpu/drm/i915/gt/intel_engine_types.h | 8 -------- drivers/gpu/drm/i915/gt/intel_engine_user.c | 2 +- drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 4 ++-- drivers/gpu/drm/i915/gt/selftest_execlists.c | 16 ++++++++-------- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 4 ++-- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 +- drivers/gpu/drm/i915/i915_request.c | 10 +++++----- drivers/gpu/drm/i915/i915_scheduler_types.h | 10 ++++++++++ 11 files changed, 33 insertions(+), 34 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index 1e97520c62b2..1070d3afdce7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -104,8 +104,8 @@ static void fence_set_priority(struct dma_fence *fence, engine = rq->engine; rcu_read_lock(); /* RCU serialisation for set-wedged protection */ - if (engine->schedule) - engine->schedule(rq, attr); + if (engine->sched_engine->schedule) + engine->sched_engine->schedule(rq, attr); rcu_read_unlock(); } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 530bd3baf9d7..8e0a8ee955af 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -328,9 +328,6 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) if (engine->context_size) DRIVER_CAPS(i915)->has_logical_contexts = true; - /* Nothing to do here, execute in order of dependencies */ - engine->schedule = NULL; - ewma__engine_latency_init(&engine->latency); seqcount_init(&engine->stats.lock); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index b99ac41695f3..b6a305e6a974 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -121,7 +121,7 @@ static void heartbeat(struct work_struct *wrk) * but all other contexts, including the kernel * context are stuck waiting for the signal. */ - } else if (engine->schedule && + } else if (engine->sched_engine->schedule && rq->sched.attr.priority < I915_PRIORITY_BARRIER) { /* * Gradually raise the priority of the heartbeat to @@ -136,7 +136,7 @@ static void heartbeat(struct work_struct *wrk) attr.priority = I915_PRIORITY_BARRIER; local_bh_disable(); - engine->schedule(rq, &attr); + engine->sched_engine->schedule(rq, &attr); local_bh_enable(); } else { if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 5e0f39d202ef..0bb65c57d274 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -428,14 +428,6 @@ struct intel_engine_cs { void (*bond_execute)(struct i915_request *rq, struct dma_fence *signal); - /* - * Call when the priority on a request has changed and it and its - * dependencies may need rescheduling. Note the request itself may - * not be ready to run! - */ - void (*schedule)(struct i915_request *request, - const struct i915_sched_attr *attr); - void (*release)(struct intel_engine_cs *engine); struct intel_engine_execlists execlists; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index 3cca7ea2d6ea..84142127ebd8 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -108,7 +108,7 @@ static void set_scheduler_caps(struct drm_i915_private *i915) for_each_uabi_engine(engine, i915) { /* all engines must agree! */ int i; - if (engine->schedule) + if (engine->sched_engine->schedule) enabled |= (I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY); else diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 0591698573e8..8a3d4014fd2c 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3119,7 +3119,7 @@ static bool can_preempt(struct intel_engine_cs *engine) static void execlists_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = execlists_submit_request; - engine->schedule = i915_schedule; + engine->sched_engine->schedule = i915_schedule; engine->execlists.tasklet.callback = execlists_submission_tasklet; } @@ -3701,7 +3701,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, ve->base.cops = &virtual_context_ops; ve->base.request_alloc = execlists_request_alloc; - ve->base.schedule = i915_schedule; + ve->base.sched_engine->schedule = i915_schedule; ve->base.submit_request = virtual_submit_request; ve->base.bond_execute = virtual_bond_execute; diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index da694bad4683..72f8cc914f4f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -273,7 +273,7 @@ static int live_unlite_restore(struct intel_gt *gt, int prio) }; /* Alternatively preempt the spinner with ce[1] */ - engine->schedule(rq[1], &attr); + engine->sched_engine->schedule(rq[1], &attr); } /* And switch back to ce[0] for good measure */ @@ -917,7 +917,7 @@ release_queue(struct intel_engine_cs *engine, i915_request_add(rq); local_bh_disable(); - engine->schedule(rq, &attr); + engine->sched_engine->schedule(rq, &attr); local_bh_enable(); /* kick tasklet */ i915_request_put(rq); @@ -1342,7 +1342,7 @@ static int live_timeslice_queue(void *arg) err = PTR_ERR(rq); goto err_heartbeat; } - engine->schedule(rq, &attr); + engine->sched_engine->schedule(rq, &attr); err = wait_for_submit(engine, rq, HZ / 2); if (err) { pr_err("%s: Timed out trying to submit semaphores\n", @@ -1884,7 +1884,7 @@ static int live_late_preempt(void *arg) } attr.priority = I915_PRIORITY_MAX; - engine->schedule(rq, &attr); + engine->sched_engine->schedule(rq, &attr); if (!igt_wait_for_spinner(&spin_hi, rq)) { pr_err("High priority context failed to preempt the low priority context\n"); @@ -2497,7 +2497,7 @@ static int live_suppress_self_preempt(void *arg) i915_request_add(rq_b); GEM_BUG_ON(i915_request_completed(rq_a)); - engine->schedule(rq_a, &attr); + engine->sched_engine->schedule(rq_a, &attr); igt_spinner_end(&a.spin); if (!igt_wait_for_spinner(&b.spin, rq_b)) { @@ -2629,7 +2629,7 @@ static int live_chain_preempt(void *arg) i915_request_get(rq); i915_request_add(rq); - engine->schedule(rq, &attr); + engine->sched_engine->schedule(rq, &attr); igt_spinner_end(&hi.spin); if (i915_request_wait(rq, 0, HZ / 5) < 0) { @@ -2988,7 +2988,7 @@ static int live_preempt_gang(void *arg) break; /* Submit each spinner at increasing priority */ - engine->schedule(rq, &attr); + engine->sched_engine->schedule(rq, &attr); } while (prio <= I915_PRIORITY_MAX && !__igt_timeout(end_time, NULL)); pr_debug("%s: Preempt chain of %d requests\n", @@ -3236,7 +3236,7 @@ static int preempt_user(struct intel_engine_cs *engine, i915_request_get(rq); i915_request_add(rq); - engine->schedule(rq, &attr); + engine->sched_engine->schedule(rq, &attr); if (i915_request_wait(rq, 0, HZ / 2) < 0) err = -ETIME; diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 853246fad05f..cec4b9977c9b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -858,12 +858,12 @@ static int active_engine(void *data) rq[idx] = i915_request_get(new); i915_request_add(new); - if (engine->schedule && arg->flags & TEST_PRIORITY) { + if (engine->sched_engine->schedule && arg->flags & TEST_PRIORITY) { struct i915_sched_attr attr = { .priority = i915_prandom_u32_max_state(512, &prng), }; - engine->schedule(rq[idx], &attr); + engine->sched_engine->schedule(rq[idx], &attr); } err = active_request_put(old); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 6b8f6544d91a..60121809e6e2 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -615,7 +615,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) engine->cops = &guc_context_ops; engine->request_alloc = guc_request_alloc; - engine->schedule = i915_schedule; + engine->sched_engine->schedule = i915_schedule; engine->reset.prepare = guc_reset_prepare; engine->reset.rewind = guc_reset_rewind; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index bd58198735dc..c5989c0b83d3 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1343,7 +1343,7 @@ __i915_request_await_execution(struct i915_request *to, } /* Couple the dependency tree for PI on this exposed to->fence */ - if (to->engine->schedule) { + if (to->engine->sched_engine->schedule) { err = i915_sched_node_add_dependency(&to->sched, &from->sched, I915_DEPENDENCY_WEAK); @@ -1484,7 +1484,7 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) return 0; } - if (to->engine->schedule) { + if (to->engine->sched_engine->schedule) { ret = i915_sched_node_add_dependency(&to->sched, &from->sched, I915_DEPENDENCY_EXTERNAL); @@ -1671,7 +1671,7 @@ __i915_request_add_to_timeline(struct i915_request *rq) __i915_sw_fence_await_dma_fence(&rq->submit, &prev->fence, &rq->dmaq); - if (rq->engine->schedule) + if (rq->engine->sched_engine->schedule) __i915_sched_node_add_dependency(&rq->sched, &prev->sched, &rq->dep, @@ -1743,8 +1743,8 @@ void __i915_request_queue(struct i915_request *rq, * decide whether to preempt the entire chain so that it is ready to * run at the earliest possible convenience. */ - if (attr && rq->engine->schedule) - rq->engine->schedule(rq, attr); + if (attr && rq->engine->sched_engine->schedule) + rq->engine->sched_engine->schedule(rq, attr); local_bh_disable(); __i915_request_queue_bh(rq); diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index 74e3bb2b2a65..0c1e417b0164 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -152,6 +152,16 @@ struct i915_sched_engine { * @no_priolist: priority lists disabled */ bool no_priolist; + + /** + * @schedule: adjust priority of request + * + * Call when the priority on a request has changed and it and its + * dependencies may need rescheduling. Note the request itself may + * not be ready to run! + */ + void (*schedule)(struct i915_request *request, + const struct i915_sched_attr *attr); }; #endif /* _I915_SCHEDULER_TYPES_H_ */ -- cgit v1.2.3 From 71ed60112d5d3bc90df704c1db2b655a9f4a7b66 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 17 Jun 2021 18:06:36 -0700 Subject: drm/i915: Add kick_backend function to i915_sched_engine Not all back-ends require a kick after a scheduling update, so make the kick a call-back function that the back-end can opt-in to. Also move the current kick function from the scheduler to the execlists file as it is specific to that back-end. Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210618010638.98941-7-matthew.brost@intel.com --- .../gpu/drm/i915/gt/intel_execlists_submission.c | 52 ++++++++++++++++++ drivers/gpu/drm/i915/i915_scheduler.c | 62 +--------------------- drivers/gpu/drm/i915/i915_scheduler_types.h | 6 +++ 3 files changed, 60 insertions(+), 60 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 8a3d4014fd2c..9487d9e0be62 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3116,10 +3116,61 @@ static bool can_preempt(struct intel_engine_cs *engine) return engine->class != RENDER_CLASS; } +static void kick_execlists(const struct i915_request *rq, int prio) +{ + struct intel_engine_cs *engine = rq->engine; + struct i915_sched_engine *sched_engine = engine->sched_engine; + const struct i915_request *inflight; + + /* + * We only need to kick the tasklet once for the high priority + * new context we add into the queue. + */ + if (prio <= sched_engine->queue_priority_hint) + return; + + rcu_read_lock(); + + /* Nothing currently active? We're overdue for a submission! */ + inflight = execlists_active(&engine->execlists); + if (!inflight) + goto unlock; + + /* + * If we are already the currently executing context, don't + * bother evaluating if we should preempt ourselves. + */ + if (inflight->context == rq->context) + goto unlock; + + ENGINE_TRACE(engine, + "bumping queue-priority-hint:%d for rq:%llx:%lld, inflight:%llx:%lld prio %d\n", + prio, + rq->fence.context, rq->fence.seqno, + inflight->fence.context, inflight->fence.seqno, + inflight->sched.attr.priority); + + sched_engine->queue_priority_hint = prio; + + /* + * Allow preemption of low -> normal -> high, but we do + * not allow low priority tasks to preempt other low priority + * tasks under the impression that latency for low priority + * tasks does not matter (as much as background throughput), + * so kiss. + */ + if (prio >= max(I915_PRIORITY_NORMAL, rq_prio(inflight))) + tasklet_hi_schedule(&engine->execlists.tasklet); + +unlock: + rcu_read_unlock(); +} + static void execlists_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = execlists_submit_request; engine->sched_engine->schedule = i915_schedule; + engine->sched_engine->kick_backend = kick_execlists; engine->execlists.tasklet.callback = execlists_submission_tasklet; } @@ -3702,6 +3753,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, ve->base.request_alloc = execlists_request_alloc; ve->base.sched_engine->schedule = i915_schedule; + ve->base.sched_engine->kick_backend = kick_execlists; ve->base.submit_request = virtual_submit_request; ve->base.bond_execute = virtual_bond_execute; diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 4bc6969f6a97..035b88f2e4aa 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -157,65 +157,6 @@ sched_lock_engine(const struct i915_sched_node *node, return locked; } -static inline int rq_prio(const struct i915_request *rq) -{ - return rq->sched.attr.priority; -} - -static inline bool need_preempt(int prio, int active) -{ - /* - * Allow preemption of low -> normal -> high, but we do - * not allow low priority tasks to preempt other low priority - * tasks under the impression that latency for low priority - * tasks does not matter (as much as background throughput), - * so kiss. - */ - return prio >= max(I915_PRIORITY_NORMAL, active); -} - -static void kick_submission(struct intel_engine_cs *engine, - const struct i915_request *rq, - int prio) -{ - const struct i915_request *inflight; - - /* - * We only need to kick the tasklet once for the high priority - * new context we add into the queue. - */ - if (prio <= engine->sched_engine->queue_priority_hint) - return; - - rcu_read_lock(); - - /* Nothing currently active? We're overdue for a submission! */ - inflight = execlists_active(&engine->execlists); - if (!inflight) - goto unlock; - - /* - * If we are already the currently executing context, don't - * bother evaluating if we should preempt ourselves. - */ - if (inflight->context == rq->context) - goto unlock; - - ENGINE_TRACE(engine, - "bumping queue-priority-hint:%d for rq:%llx:%lld, inflight:%llx:%lld prio %d\n", - prio, - rq->fence.context, rq->fence.seqno, - inflight->fence.context, inflight->fence.seqno, - inflight->sched.attr.priority); - - engine->sched_engine->queue_priority_hint = prio; - if (need_preempt(prio, rq_prio(inflight))) - tasklet_hi_schedule(&engine->execlists.tasklet); - -unlock: - rcu_read_unlock(); -} - static void __i915_schedule(struct i915_sched_node *node, const struct i915_sched_attr *attr) { @@ -335,7 +276,8 @@ static void __i915_schedule(struct i915_sched_node *node, } /* Defer (tasklet) submission until after all of our updates. */ - kick_submission(engine, node_to_request(node), prio); + if (engine->sched_engine->kick_backend) + engine->sched_engine->kick_backend(node_to_request(node), prio); } spin_unlock(&engine->sched_engine->lock); diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index 0c1e417b0164..8bd07d0c27e1 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -153,6 +153,12 @@ struct i915_sched_engine { */ bool no_priolist; + /** + * @kick_backend: kick backend after a request's priority has changed + */ + void (*kick_backend)(const struct i915_request *rq, + int prio); + /** * @schedule: adjust priority of request * -- cgit v1.2.3 From d2a31d026492040f5401f667cd3201b16c656b00 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 17 Jun 2021 18:06:37 -0700 Subject: drm/i915: Update i915_scheduler to operate on i915_sched_engine Rather passing around an intel_engine_cs in the scheduling code, pass around a i915_sched_engine. v3: (Jason Ekstrand) Add READ_ONCE around rq->engine in lock_sched_engine Signed-off-by: Matthew Brost Reviewed-by: Jason Ekstrand Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210618010638.98941-8-matthew.brost@intel.com --- .../gpu/drm/i915/gt/intel_execlists_submission.c | 11 ++++-- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 +- drivers/gpu/drm/i915/i915_scheduler.c | 46 +++++++++++----------- drivers/gpu/drm/i915/i915_scheduler.h | 2 +- 4 files changed, 32 insertions(+), 29 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 9487d9e0be62..ffad4d98cec0 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -382,7 +382,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); if (rq_prio(rq) != prio) { prio = rq_prio(rq); - pl = i915_sched_lookup_priolist(engine, prio); + pl = i915_sched_lookup_priolist(engine->sched_engine, + prio); } GEM_BUG_ON(i915_sched_engine_is_empty(engine->sched_engine)); @@ -1096,7 +1097,8 @@ static void defer_active(struct intel_engine_cs *engine) if (!rq) return; - defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq))); + defer_request(rq, i915_sched_lookup_priolist(engine->sched_engine, + rq_prio(rq))); } static bool @@ -2083,7 +2085,7 @@ static void __execlists_unhold(struct i915_request *rq) i915_request_clear_hold(rq); list_move_tail(&rq->sched.link, - i915_sched_lookup_priolist(rq->engine, + i915_sched_lookup_priolist(rq->engine->sched_engine, rq_prio(rq))); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); @@ -2452,7 +2454,8 @@ static void queue_request(struct intel_engine_cs *engine, { GEM_BUG_ON(!list_empty(&rq->sched.link)); list_add_tail(&rq->sched.link, - i915_sched_lookup_priolist(engine, rq_prio(rq))); + i915_sched_lookup_priolist(engine->sched_engine, + rq_prio(rq))); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 60121809e6e2..cb13cc586c67 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -503,7 +503,7 @@ static inline void queue_request(struct intel_engine_cs *engine, { GEM_BUG_ON(!list_empty(&rq->sched.link)); list_add_tail(&rq->sched.link, - i915_sched_lookup_priolist(engine, prio)); + i915_sched_lookup_priolist(engine->sched_engine, prio)); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); } diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 035b88f2e4aa..fa8863df9513 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -61,14 +61,13 @@ static void assert_priolists(struct i915_sched_engine * const sched_engine) } struct list_head * -i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio) +i915_sched_lookup_priolist(struct i915_sched_engine *sched_engine, int prio) { - struct i915_sched_engine * const sched_engine = engine->sched_engine; struct i915_priolist *p; struct rb_node **parent, *rb; bool first = true; - lockdep_assert_held(&engine->sched_engine->lock); + lockdep_assert_held(&sched_engine->lock); assert_priolists(sched_engine); if (unlikely(sched_engine->no_priolist)) @@ -130,13 +129,13 @@ struct sched_cache { struct list_head *priolist; }; -static struct intel_engine_cs * -sched_lock_engine(const struct i915_sched_node *node, - struct intel_engine_cs *locked, +static struct i915_sched_engine * +lock_sched_engine(struct i915_sched_node *node, + struct i915_sched_engine *locked, struct sched_cache *cache) { const struct i915_request *rq = node_to_request(node); - struct intel_engine_cs *engine; + struct i915_sched_engine *sched_engine; GEM_BUG_ON(!locked); @@ -146,14 +145,14 @@ sched_lock_engine(const struct i915_sched_node *node, * engine lock. The simple ploy we use is to take the lock then * check that the rq still belongs to the newly locked engine. */ - while (locked != (engine = READ_ONCE(rq->engine))) { - spin_unlock(&locked->sched_engine->lock); + while (locked != (sched_engine = READ_ONCE(rq->engine)->sched_engine)) { + spin_unlock(&locked->lock); memset(cache, 0, sizeof(*cache)); - spin_lock(&engine->sched_engine->lock); - locked = engine; + spin_lock(&sched_engine->lock); + locked = sched_engine; } - GEM_BUG_ON(locked != engine); + GEM_BUG_ON(locked != sched_engine); return locked; } @@ -161,7 +160,7 @@ static void __i915_schedule(struct i915_sched_node *node, const struct i915_sched_attr *attr) { const int prio = max(attr->priority, node->attr.priority); - struct intel_engine_cs *engine; + struct i915_sched_engine *sched_engine; struct i915_dependency *dep, *p; struct i915_dependency stack; struct sched_cache cache; @@ -236,23 +235,24 @@ static void __i915_schedule(struct i915_sched_node *node, } memset(&cache, 0, sizeof(cache)); - engine = node_to_request(node)->engine; - spin_lock(&engine->sched_engine->lock); + sched_engine = node_to_request(node)->engine->sched_engine; + spin_lock(&sched_engine->lock); /* Fifo and depth-first replacement ensure our deps execute before us */ - engine = sched_lock_engine(node, engine, &cache); + sched_engine = lock_sched_engine(node, sched_engine, &cache); list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { INIT_LIST_HEAD(&dep->dfs_link); node = dep->signaler; - engine = sched_lock_engine(node, engine, &cache); - lockdep_assert_held(&engine->sched_engine->lock); + sched_engine = lock_sched_engine(node, sched_engine, &cache); + lockdep_assert_held(&sched_engine->lock); /* Recheck after acquiring the engine->timeline.lock */ if (prio <= node->attr.priority || node_signaled(node)) continue; - GEM_BUG_ON(node_to_request(node)->engine != engine); + GEM_BUG_ON(node_to_request(node)->engine->sched_engine != + sched_engine); WRITE_ONCE(node->attr.priority, prio); @@ -270,17 +270,17 @@ static void __i915_schedule(struct i915_sched_node *node, if (i915_request_in_priority_queue(node_to_request(node))) { if (!cache.priolist) cache.priolist = - i915_sched_lookup_priolist(engine, + i915_sched_lookup_priolist(sched_engine, prio); list_move_tail(&node->link, cache.priolist); } /* Defer (tasklet) submission until after all of our updates. */ - if (engine->sched_engine->kick_backend) - engine->sched_engine->kick_backend(node_to_request(node), prio); + if (sched_engine->kick_backend) + sched_engine->kick_backend(node_to_request(node), prio); } - spin_unlock(&engine->sched_engine->lock); + spin_unlock(&sched_engine->lock); } void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 713c38c99de9..0014745bda30 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -39,7 +39,7 @@ void i915_schedule(struct i915_request *request, const struct i915_sched_attr *attr); struct list_head * -i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio); +i915_sched_lookup_priolist(struct i915_sched_engine *sched_engine, int prio); void __i915_priolist_free(struct i915_priolist *p); static inline void i915_priolist_free(struct i915_priolist *p) -- cgit v1.2.3 From 22916bad07a5d42dbb6797f4f0c87a0a5842ec6c Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 17 Jun 2021 18:06:38 -0700 Subject: drm/i915: Move submission tasklet to i915_sched_engine The submission tasklet operates on i915_sched_engine, thus it is the correct place for it. v3: (Jason Ekstrand) Change sched_engine->engine to a void* private data pointer Add kernel doc v4: (Daniele) Update private_data comment Set queue_priority_hint in kick_execlists v5: (CI) Rebase and fix build error Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210618010638.98941-9-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_engine.h | 14 ---- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 12 ++-- drivers/gpu/drm/i915/gt/intel_engine_types.h | 5 -- .../gpu/drm/i915/gt/intel_execlists_submission.c | 84 ++++++++++++---------- drivers/gpu/drm/i915/gt/mock_engine.c | 1 + drivers/gpu/drm/i915/gt/selftest_execlists.c | 14 ++-- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 2 +- drivers/gpu/drm/i915/gt/selftest_lrc.c | 6 +- drivers/gpu/drm/i915/gt/selftest_reset.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 25 +++---- drivers/gpu/drm/i915/i915_scheduler.c | 1 + drivers/gpu/drm/i915/i915_scheduler.h | 14 ++++ drivers/gpu/drm/i915/i915_scheduler_types.h | 10 +++ 13 files changed, 100 insertions(+), 90 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 45ee08fc40a1..f911c1224ab2 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -125,20 +125,6 @@ execlists_active(const struct intel_engine_execlists *execlists) return active; } -static inline void -execlists_active_lock_bh(struct intel_engine_execlists *execlists) -{ - local_bh_disable(); /* prevent local softirq and lock recursion */ - tasklet_lock(&execlists->tasklet); -} - -static inline void -execlists_active_unlock_bh(struct intel_engine_execlists *execlists) -{ - tasklet_unlock(&execlists->tasklet); - local_bh_enable(); /* restore softirq, and kick ksoftirqd! */ -} - struct i915_request * execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 8e0a8ee955af..88694822716a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -713,6 +713,7 @@ static int engine_setup_common(struct intel_engine_cs *engine) err = -ENOMEM; goto err_sched_engine; } + engine->sched_engine->private_data = engine; err = intel_engine_init_cmd_parser(engine); if (err) @@ -944,7 +945,6 @@ int intel_engines_init(struct intel_gt *gt) void intel_engine_cleanup_common(struct intel_engine_cs *engine) { GEM_BUG_ON(!list_empty(&engine->sched_engine->requests)); - tasklet_kill(&engine->execlists.tasklet); /* flush the callback */ i915_sched_engine_put(engine->sched_engine); intel_breadcrumbs_free(engine->breadcrumbs); @@ -1193,7 +1193,7 @@ static bool ring_is_idle(struct intel_engine_cs *engine) void __intel_engine_flush_submission(struct intel_engine_cs *engine, bool sync) { - struct tasklet_struct *t = &engine->execlists.tasklet; + struct tasklet_struct *t = &engine->sched_engine->tasklet; if (!t->callback) return; @@ -1454,8 +1454,8 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, drm_printf(m, "\tExeclist tasklet queued? %s (%s), preempt? %s, timeslice? %s\n", yesno(test_bit(TASKLET_STATE_SCHED, - &engine->execlists.tasklet.state)), - enableddisabled(!atomic_read(&engine->execlists.tasklet.count)), + &engine->sched_engine->tasklet.state)), + enableddisabled(!atomic_read(&engine->sched_engine->tasklet.count)), repr_timer(&engine->execlists.preempt), repr_timer(&engine->execlists.timer)); @@ -1479,7 +1479,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, idx, hws[idx * 2], hws[idx * 2 + 1]); } - execlists_active_lock_bh(execlists); + i915_sched_engine_active_lock_bh(engine->sched_engine); rcu_read_lock(); for (port = execlists->active; (rq = *port); port++) { char hdr[160]; @@ -1510,7 +1510,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, i915_request_show(m, rq, hdr, 0); } rcu_read_unlock(); - execlists_active_unlock_bh(execlists); + i915_sched_engine_active_unlock_bh(engine->sched_engine); } else if (GRAPHICS_VER(dev_priv) > 6) { drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", ENGINE_READ(engine, RING_PP_DIR_BASE)); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 0bb65c57d274..5b91068ab277 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -138,11 +138,6 @@ struct st_preempt_hang { * driver and the hardware state for execlist mode of submission. */ struct intel_engine_execlists { - /** - * @tasklet: softirq tasklet for bottom handler - */ - struct tasklet_struct tasklet; - /** * @timer: kick the current context if its timeslice expires */ diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index ffad4d98cec0..cdb2126a159a 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -570,7 +570,7 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce) resubmit_virtual_request(rq, ve); if (READ_ONCE(ve->request)) - tasklet_hi_schedule(&ve->base.execlists.tasklet); + tasklet_hi_schedule(&ve->base.sched_engine->tasklet); } static void __execlists_schedule_out(struct i915_request * const rq, @@ -739,9 +739,9 @@ trace_ports(const struct intel_engine_execlists *execlists, } static bool -reset_in_progress(const struct intel_engine_execlists *execlists) +reset_in_progress(const struct intel_engine_cs *engine) { - return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); + return unlikely(!__tasklet_is_enabled(&engine->sched_engine->tasklet)); } static __maybe_unused noinline bool @@ -757,7 +757,7 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, trace_ports(execlists, msg, execlists->pending); /* We may be messing around with the lists during reset, lalala */ - if (reset_in_progress(execlists)) + if (reset_in_progress(engine)) return true; if (!execlists->pending[0]) { @@ -1190,7 +1190,7 @@ static void start_timeslice(struct intel_engine_cs *engine) * its timeslice, so recheck. */ if (!timer_pending(&el->timer)) - tasklet_hi_schedule(&el->tasklet); + tasklet_hi_schedule(&engine->sched_engine->tasklet); return; } @@ -1772,8 +1772,8 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive) * access. Either we are inside the tasklet, or the tasklet is disabled * and we assume that is only inside the reset paths and so serialised. */ - GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) && - !reset_in_progress(execlists)); + GEM_BUG_ON(!tasklet_is_locked(&engine->sched_engine->tasklet) && + !reset_in_progress(engine)); /* * Note that csb_write, csb_status may be either in HWSP or mmio. @@ -2131,7 +2131,7 @@ static void execlists_unhold(struct intel_engine_cs *engine, if (rq_prio(rq) > engine->sched_engine->queue_priority_hint) { engine->sched_engine->queue_priority_hint = rq_prio(rq); - tasklet_hi_schedule(&engine->execlists.tasklet); + tasklet_hi_schedule(&engine->sched_engine->tasklet); } spin_unlock_irq(&engine->sched_engine->lock); @@ -2322,13 +2322,13 @@ static void execlists_reset(struct intel_engine_cs *engine, const char *msg) ENGINE_TRACE(engine, "reset for %s\n", msg); /* Mark this tasklet as disabled to avoid waiting for it to complete */ - tasklet_disable_nosync(&engine->execlists.tasklet); + tasklet_disable_nosync(&engine->sched_engine->tasklet); ring_set_paused(engine, 1); /* Freeze the current request in place */ execlists_capture(engine); intel_engine_reset(engine, msg); - tasklet_enable(&engine->execlists.tasklet); + tasklet_enable(&engine->sched_engine->tasklet); clear_and_wake_up_bit(bit, lock); } @@ -2351,8 +2351,9 @@ static bool preempt_timeout(const struct intel_engine_cs *const engine) */ static void execlists_submission_tasklet(struct tasklet_struct *t) { - struct intel_engine_cs * const engine = - from_tasklet(engine, t, execlists.tasklet); + struct i915_sched_engine *sched_engine = + from_tasklet(sched_engine, t, tasklet); + struct intel_engine_cs * const engine = sched_engine->private_data; struct i915_request *post[2 * EXECLIST_MAX_PORTS]; struct i915_request **inactive; @@ -2427,13 +2428,16 @@ static void execlists_irq_handler(struct intel_engine_cs *engine, u16 iir) intel_engine_signal_breadcrumbs(engine); if (tasklet) - tasklet_hi_schedule(&engine->execlists.tasklet); + tasklet_hi_schedule(&engine->sched_engine->tasklet); } static void __execlists_kick(struct intel_engine_execlists *execlists) { + struct intel_engine_cs *engine = + container_of(execlists, typeof(*engine), execlists); + /* Kick the tasklet for some interrupt coalescing and reset handling */ - tasklet_hi_schedule(&execlists->tasklet); + tasklet_hi_schedule(&engine->sched_engine->tasklet); } #define execlists_kick(t, member) \ @@ -2808,10 +2812,8 @@ static int execlists_resume(struct intel_engine_cs *engine) static void execlists_reset_prepare(struct intel_engine_cs *engine) { - struct intel_engine_execlists * const execlists = &engine->execlists; - ENGINE_TRACE(engine, "depth<-%d\n", - atomic_read(&execlists->tasklet.count)); + atomic_read(&engine->sched_engine->tasklet.count)); /* * Prevent request submission to the hardware until we have @@ -2822,8 +2824,8 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) * Turning off the execlists->tasklet until the reset is over * prevents the race. */ - __tasklet_disable_sync_once(&execlists->tasklet); - GEM_BUG_ON(!reset_in_progress(execlists)); + __tasklet_disable_sync_once(&engine->sched_engine->tasklet); + GEM_BUG_ON(!reset_in_progress(engine)); /* * We stop engines, otherwise we might get failed reset and a @@ -2973,8 +2975,9 @@ static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled) static void nop_submission_tasklet(struct tasklet_struct *t) { - struct intel_engine_cs * const engine = - from_tasklet(engine, t, execlists.tasklet); + struct i915_sched_engine *sched_engine = + from_tasklet(sched_engine, t, tasklet); + struct intel_engine_cs * const engine = sched_engine->private_data; /* The driver is wedged; don't process any more events. */ WRITE_ONCE(engine->sched_engine->queue_priority_hint, INT_MIN); @@ -3061,8 +3064,8 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) sched_engine->queue_priority_hint = INT_MIN; sched_engine->queue = RB_ROOT_CACHED; - GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet)); - execlists->tasklet.callback = nop_submission_tasklet; + GEM_BUG_ON(__tasklet_is_enabled(&engine->sched_engine->tasklet)); + engine->sched_engine->tasklet.callback = nop_submission_tasklet; spin_unlock_irqrestore(&engine->sched_engine->lock, flags); rcu_read_unlock(); @@ -3082,14 +3085,14 @@ static void execlists_reset_finish(struct intel_engine_cs *engine) * reset as the next level of recovery, and as a final resort we * will declare the device wedged. */ - GEM_BUG_ON(!reset_in_progress(execlists)); + GEM_BUG_ON(!reset_in_progress(engine)); /* And kick in case we missed a new request submission. */ - if (__tasklet_enable(&execlists->tasklet)) + if (__tasklet_enable(&engine->sched_engine->tasklet)) __execlists_kick(execlists); ENGINE_TRACE(engine, "depth->%d\n", - atomic_read(&execlists->tasklet.count)); + atomic_read(&engine->sched_engine->tasklet.count)); } static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine) @@ -3163,7 +3166,7 @@ static void kick_execlists(const struct i915_request *rq, int prio) * so kiss. */ if (prio >= max(I915_PRIORITY_NORMAL, rq_prio(inflight))) - tasklet_hi_schedule(&engine->execlists.tasklet); + tasklet_hi_schedule(&sched_engine->tasklet); unlock: rcu_read_unlock(); @@ -3174,7 +3177,7 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine) engine->submit_request = execlists_submit_request; engine->sched_engine->schedule = i915_schedule; engine->sched_engine->kick_backend = kick_execlists; - engine->execlists.tasklet.callback = execlists_submission_tasklet; + engine->sched_engine->tasklet.callback = execlists_submission_tasklet; } static void execlists_shutdown(struct intel_engine_cs *engine) @@ -3182,7 +3185,7 @@ static void execlists_shutdown(struct intel_engine_cs *engine) /* Synchronise with residual timers and any softirq they raise */ del_timer_sync(&engine->execlists.timer); del_timer_sync(&engine->execlists.preempt); - tasklet_kill(&engine->execlists.tasklet); + tasklet_kill(&engine->sched_engine->tasklet); } static void execlists_release(struct intel_engine_cs *engine) @@ -3298,7 +3301,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) struct intel_uncore *uncore = engine->uncore; u32 base = engine->mmio_base; - tasklet_setup(&engine->execlists.tasklet, execlists_submission_tasklet); + tasklet_setup(&engine->sched_engine->tasklet, execlists_submission_tasklet); timer_setup(&engine->execlists.timer, execlists_timeslice, 0); timer_setup(&engine->execlists.preempt, execlists_preempt, 0); @@ -3380,7 +3383,7 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk) * rbtrees as in the case it is running in parallel, it may reinsert * the rb_node into a sibling. */ - tasklet_kill(&ve->base.execlists.tasklet); + tasklet_kill(&ve->base.sched_engine->tasklet); /* Decouple ourselves from the siblings, no more access allowed. */ for (n = 0; n < ve->num_siblings; n++) { @@ -3392,13 +3395,13 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk) spin_lock_irq(&sibling->sched_engine->lock); - /* Detachment is lazily performed in the execlists tasklet */ + /* Detachment is lazily performed in the sched_engine->tasklet */ if (!RB_EMPTY_NODE(node)) rb_erase_cached(node, &sibling->execlists.virtual); spin_unlock_irq(&sibling->sched_engine->lock); } - GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet)); + GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.sched_engine->tasklet)); GEM_BUG_ON(!list_empty(virtual_queue(ve))); lrc_fini(&ve->context); @@ -3545,9 +3548,11 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) static void virtual_submission_tasklet(struct tasklet_struct *t) { + struct i915_sched_engine *sched_engine = + from_tasklet(sched_engine, t, tasklet); struct virtual_engine * const ve = - from_tasklet(ve, t, base.execlists.tasklet); - const int prio = READ_ONCE(ve->base.sched_engine->queue_priority_hint); + (struct virtual_engine *)sched_engine->private_data; + const int prio = READ_ONCE(sched_engine->queue_priority_hint); intel_engine_mask_t mask; unsigned int n; @@ -3616,7 +3621,7 @@ submit_engine: GEM_BUG_ON(RB_EMPTY_NODE(&node->rb)); node->prio = prio; if (first && prio > sibling->sched_engine->queue_priority_hint) - tasklet_hi_schedule(&sibling->execlists.tasklet); + tasklet_hi_schedule(&sibling->sched_engine->tasklet); unlock_engine: spin_unlock_irq(&sibling->sched_engine->lock); @@ -3657,7 +3662,7 @@ static void virtual_submit_request(struct i915_request *rq) GEM_BUG_ON(!list_empty(virtual_queue(ve))); list_move_tail(&rq->sched.link, virtual_queue(ve)); - tasklet_hi_schedule(&ve->base.execlists.tasklet); + tasklet_hi_schedule(&ve->base.sched_engine->tasklet); unlock: spin_unlock_irqrestore(&ve->base.sched_engine->lock, flags); @@ -3751,6 +3756,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, err = -ENOMEM; goto err_put; } + ve->base.sched_engine->private_data = &ve->base; ve->base.cops = &virtual_context_ops; ve->base.request_alloc = execlists_request_alloc; @@ -3761,7 +3767,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, ve->base.bond_execute = virtual_bond_execute; INIT_LIST_HEAD(virtual_queue(ve)); - tasklet_setup(&ve->base.execlists.tasklet, virtual_submission_tasklet); + tasklet_setup(&ve->base.sched_engine->tasklet, virtual_submission_tasklet); intel_context_init(&ve->context, &ve->base); @@ -3789,7 +3795,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, * layering if we handle cloning of the requests and * submitting a copy into each backend. */ - if (sibling->execlists.tasklet.callback != + if (sibling->sched_engine->tasklet.callback != execlists_submission_tasklet) { err = -ENODEV; goto err_put; diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index a49fd3039f13..68970398e4ef 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -349,6 +349,7 @@ int mock_engine_init(struct intel_engine_cs *engine) engine->sched_engine = i915_sched_engine_create(ENGINE_MOCK); if (!engine->sched_engine) return -ENOMEM; + engine->sched_engine->private_data = engine; intel_engine_init_execlists(engine); intel_engine_init__pm(engine); diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 72f8cc914f4f..08896ae027d5 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -43,7 +43,7 @@ static int wait_for_submit(struct intel_engine_cs *engine, unsigned long timeout) { /* Ignore our own attempts to suppress excess tasklets */ - tasklet_hi_schedule(&engine->execlists.tasklet); + tasklet_hi_schedule(&engine->sched_engine->tasklet); timeout += jiffies; do { @@ -553,13 +553,13 @@ static int live_pin_rewind(void *arg) static int engine_lock_reset_tasklet(struct intel_engine_cs *engine) { - tasklet_disable(&engine->execlists.tasklet); + tasklet_disable(&engine->sched_engine->tasklet); local_bh_disable(); if (test_and_set_bit(I915_RESET_ENGINE + engine->id, &engine->gt->reset.flags)) { local_bh_enable(); - tasklet_enable(&engine->execlists.tasklet); + tasklet_enable(&engine->sched_engine->tasklet); intel_gt_set_wedged(engine->gt); return -EBUSY; @@ -574,7 +574,7 @@ static void engine_unlock_reset_tasklet(struct intel_engine_cs *engine) &engine->gt->reset.flags); local_bh_enable(); - tasklet_enable(&engine->execlists.tasklet); + tasklet_enable(&engine->sched_engine->tasklet); } static int live_hold_reset(void *arg) @@ -628,7 +628,7 @@ static int live_hold_reset(void *arg) if (err) goto out; - engine->execlists.tasklet.callback(&engine->execlists.tasklet); + engine->sched_engine->tasklet.callback(&engine->sched_engine->tasklet); GEM_BUG_ON(execlists_active(&engine->execlists) != rq); i915_request_get(rq); @@ -1200,7 +1200,7 @@ static int live_timeslice_rewind(void *arg) while (i915_request_is_active(rq[A2])) { /* semaphore yield! */ /* Wait for the timeslice to kick in */ del_timer(&engine->execlists.timer); - tasklet_hi_schedule(&engine->execlists.tasklet); + tasklet_hi_schedule(&engine->sched_engine->tasklet); intel_engine_flush_submission(engine); } /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */ @@ -4606,7 +4606,7 @@ static int reset_virtual_engine(struct intel_gt *gt, if (err) goto out_heartbeat; - engine->execlists.tasklet.callback(&engine->execlists.tasklet); + engine->sched_engine->tasklet.callback(&engine->sched_engine->tasklet); GEM_BUG_ON(execlists_active(&engine->execlists) != rq); /* Fake a preemption event; failed of course */ diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index cec4b9977c9b..6a0b04bdac58 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -1702,7 +1702,7 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine, const struct igt_atomic_section *p, const char *mode) { - struct tasklet_struct * const t = &engine->execlists.tasklet; + struct tasklet_struct * const t = &engine->sched_engine->tasklet; int err; GEM_TRACE("i915_reset_engine(%s:%s) under %s\n", diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 3119016d9910..b0977a3b699b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -49,7 +49,7 @@ static int wait_for_submit(struct intel_engine_cs *engine, unsigned long timeout) { /* Ignore our own attempts to suppress excess tasklets */ - tasklet_hi_schedule(&engine->execlists.tasklet); + tasklet_hi_schedule(&engine->sched_engine->tasklet); timeout += jiffies; do { @@ -1613,12 +1613,12 @@ static void garbage_reset(struct intel_engine_cs *engine, local_bh_disable(); if (!test_and_set_bit(bit, lock)) { - tasklet_disable(&engine->execlists.tasklet); + tasklet_disable(&engine->sched_engine->tasklet); if (!rq->fence.error) __intel_engine_reset_bh(engine, NULL); - tasklet_enable(&engine->execlists.tasklet); + tasklet_enable(&engine->sched_engine->tasklet); clear_and_wake_up_bit(bit, lock); } local_bh_enable(); diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 8784257ec808..7a50c9f4071b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -321,7 +321,7 @@ static int igt_atomic_engine_reset(void *arg) goto out_unlock; for_each_engine(engine, gt, id) { - struct tasklet_struct *t = &engine->execlists.tasklet; + struct tasklet_struct *t = &engine->sched_engine->tasklet; if (t->func) tasklet_disable(t); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index cb13cc586c67..e9c237b18692 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -241,8 +241,9 @@ done: static void guc_submission_tasklet(struct tasklet_struct *t) { - struct intel_engine_cs * const engine = - from_tasklet(engine, t, execlists.tasklet); + struct i915_sched_engine *sched_engine = + from_tasklet(sched_engine, t, tasklet); + struct intel_engine_cs * const engine = sched_engine->private_data; struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_request **port, *rq; unsigned long flags; @@ -272,14 +273,12 @@ static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir) { if (iir & GT_RENDER_USER_INTERRUPT) { intel_engine_signal_breadcrumbs(engine); - tasklet_hi_schedule(&engine->execlists.tasklet); + tasklet_hi_schedule(&engine->sched_engine->tasklet); } } static void guc_reset_prepare(struct intel_engine_cs *engine) { - struct intel_engine_execlists * const execlists = &engine->execlists; - ENGINE_TRACE(engine, "\n"); /* @@ -291,7 +290,7 @@ static void guc_reset_prepare(struct intel_engine_cs *engine) * Turning off the execlists->tasklet until the reset is over * prevents the race. */ - __tasklet_disable_sync_once(&execlists->tasklet); + __tasklet_disable_sync_once(&engine->sched_engine->tasklet); } static void guc_reset_state(struct intel_context *ce, @@ -395,14 +394,12 @@ static void guc_reset_cancel(struct intel_engine_cs *engine) static void guc_reset_finish(struct intel_engine_cs *engine) { - struct intel_engine_execlists * const execlists = &engine->execlists; - - if (__tasklet_enable(&execlists->tasklet)) + if (__tasklet_enable(&engine->sched_engine->tasklet)) /* And kick in case we missed a new request submission. */ - tasklet_hi_schedule(&execlists->tasklet); + tasklet_hi_schedule(&engine->sched_engine->tasklet); ENGINE_TRACE(engine, "depth->%d\n", - atomic_read(&execlists->tasklet.count)); + atomic_read(&engine->sched_engine->tasklet.count)); } /* @@ -520,7 +517,7 @@ static void guc_submit_request(struct i915_request *rq) GEM_BUG_ON(i915_sched_engine_is_empty(engine->sched_engine)); GEM_BUG_ON(list_empty(&rq->sched.link)); - tasklet_hi_schedule(&engine->execlists.tasklet); + tasklet_hi_schedule(&engine->sched_engine->tasklet); spin_unlock_irqrestore(&engine->sched_engine->lock, flags); } @@ -600,7 +597,7 @@ static void guc_release(struct intel_engine_cs *engine) { engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ - tasklet_kill(&engine->execlists.tasklet); + tasklet_kill(&engine->sched_engine->tasklet); intel_engine_cleanup_common(engine); lrc_fini_wa_ctx(engine); @@ -679,7 +676,7 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine) */ GEM_BUG_ON(GRAPHICS_VER(i915) < 11); - tasklet_setup(&engine->execlists.tasklet, guc_submission_tasklet); + tasklet_setup(&engine->sched_engine->tasklet, guc_submission_tasklet); guc_default_vfuncs(engine); guc_default_irqs(engine); diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index fa8863df9513..3a58a9130309 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -436,6 +436,7 @@ void i915_sched_engine_free(struct kref *kref) struct i915_sched_engine *sched_engine = container_of(kref, typeof(*sched_engine), ref); + tasklet_kill(&sched_engine->tasklet); /* flush the callback */ kfree(sched_engine); } diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 0014745bda30..650ab8e0db9f 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -79,6 +79,20 @@ i915_sched_engine_reset_on_empty(struct i915_sched_engine *sched_engine) sched_engine->no_priolist = false; } +static inline void +i915_sched_engine_active_lock_bh(struct i915_sched_engine *sched_engine) +{ + local_bh_disable(); /* prevent local softirq and lock recursion */ + tasklet_lock(&sched_engine->tasklet); +} + +static inline void +i915_sched_engine_active_unlock_bh(struct i915_sched_engine *sched_engine) +{ + tasklet_unlock(&sched_engine->tasklet); + local_bh_enable(); /* restore softirq, and kick ksoftirqd! */ +} + void i915_request_show_with_schedule(struct drm_printer *m, const struct i915_request *rq, const char *prefix, diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index 8bd07d0c27e1..5935c3152bdc 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -124,6 +124,11 @@ struct i915_sched_engine { */ struct list_head hold; + /** + * @tasklet: softirq tasklet for submission + */ + struct tasklet_struct tasklet; + /** * @default_priolist: priority list for I915_PRIORITY_NORMAL */ @@ -153,6 +158,11 @@ struct i915_sched_engine { */ bool no_priolist; + /** + * @private_data: private data of the submission backend + */ + void *private_data; + /** * @kick_backend: kick backend after a request's priority has changed */ -- cgit v1.2.3 From 088b4d4a48ee97e993fc6b4381d23776f0782bc6 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 15 Jun 2021 17:13:00 -0700 Subject: drm/i915/guc: Introduce unified HXG messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New GuC firmware will unify format of MMIO and CTB H2G messages. Introduce their definitions now to allow gradual transition of our code to match new changes. Signed-off-by: Matthew Brost Signed-off-by: Michal Wajdeczko Cc: Michał Winiarski Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210616001302.84233-2-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h | 213 ++++++++++++++++++++++ 1 file changed, 213 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h index 775e21f3058c..29ac823acd4c 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h @@ -6,6 +6,219 @@ #ifndef _ABI_GUC_MESSAGES_ABI_H #define _ABI_GUC_MESSAGES_ABI_H +/** + * DOC: HXG Message + * + * All messages exchanged with GuC are defined using 32 bit dwords. + * First dword is treated as a message header. Remaining dwords are optional. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | | | | + * | 0 | 31 | **ORIGIN** - originator of the message | + * | | | - _`GUC_HXG_ORIGIN_HOST` = 0 | + * | | | - _`GUC_HXG_ORIGIN_GUC` = 1 | + * | | | | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | **TYPE** - message type | + * | | | - _`GUC_HXG_TYPE_REQUEST` = 0 | + * | | | - _`GUC_HXG_TYPE_EVENT` = 1 | + * | | | - _`GUC_HXG_TYPE_NO_RESPONSE_BUSY` = 3 | + * | | | - _`GUC_HXG_TYPE_NO_RESPONSE_RETRY` = 5 | + * | | | - _`GUC_HXG_TYPE_RESPONSE_FAILURE` = 6 | + * | | | - _`GUC_HXG_TYPE_RESPONSE_SUCCESS` = 7 | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | **AUX** - auxiliary data (depends on TYPE) | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | | + * +---+-------+ | + * |...| | **PAYLOAD** - optional payload (depends on TYPE) | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_MSG_MIN_LEN 1u +#define GUC_HXG_MSG_0_ORIGIN (0x1 << 31) +#define GUC_HXG_ORIGIN_HOST 0u +#define GUC_HXG_ORIGIN_GUC 1u +#define GUC_HXG_MSG_0_TYPE (0x7 << 28) +#define GUC_HXG_TYPE_REQUEST 0u +#define GUC_HXG_TYPE_EVENT 1u +#define GUC_HXG_TYPE_NO_RESPONSE_BUSY 3u +#define GUC_HXG_TYPE_NO_RESPONSE_RETRY 5u +#define GUC_HXG_TYPE_RESPONSE_FAILURE 6u +#define GUC_HXG_TYPE_RESPONSE_SUCCESS 7u +#define GUC_HXG_MSG_0_AUX (0xfffffff << 0) +#define GUC_HXG_MSG_n_PAYLOAD (0xffffffff << 0) + +/** + * DOC: HXG Request + * + * The `HXG Request`_ message should be used to initiate synchronous activity + * for which confirmation or return data is expected. + * + * The recipient of this message shall use `HXG Response`_, `HXG Failure`_ + * or `HXG Retry`_ message as a definite reply, and may use `HXG Busy`_ + * message as a intermediate reply. + * + * Format of @DATA0 and all @DATAn fields depends on the @ACTION code. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | **DATA0** - request data (depends on ACTION) | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **ACTION** - requested action code | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | | + * +---+-------+ | + * |...| | **DATAn** - optional data (depends on ACTION) | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_REQUEST_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_REQUEST_MSG_0_DATA0 (0xfff << 16) +#define GUC_HXG_REQUEST_MSG_0_ACTION (0xffff << 0) +#define GUC_HXG_REQUEST_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD + +/** + * DOC: HXG Event + * + * The `HXG Event`_ message should be used to initiate asynchronous activity + * that does not involves immediate confirmation nor data. + * + * Format of @DATA0 and all @DATAn fields depends on the @ACTION code. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_EVENT_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | **DATA0** - event data (depends on ACTION) | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **ACTION** - event action code | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | | + * +---+-------+ | + * |...| | **DATAn** - optional event data (depends on ACTION) | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_EVENT_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_EVENT_MSG_0_DATA0 (0xfff << 16) +#define GUC_HXG_EVENT_MSG_0_ACTION (0xffff << 0) +#define GUC_HXG_EVENT_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD + +/** + * DOC: HXG Busy + * + * The `HXG Busy`_ message may be used to acknowledge reception of the `HXG Request`_ + * message if the recipient expects that it processing will be longer than default + * timeout. + * + * The @COUNTER field may be used as a progress indicator. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_NO_RESPONSE_BUSY_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | **COUNTER** - progress indicator | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_BUSY_MSG_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_BUSY_MSG_0_COUNTER GUC_HXG_MSG_0_AUX + +/** + * DOC: HXG Retry + * + * The `HXG Retry`_ message should be used by recipient to indicate that the + * `HXG Request`_ message was dropped and it should be resent again. + * + * The @REASON field may be used to provide additional information. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_NO_RESPONSE_RETRY_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | **REASON** - reason for retry | + * | | | - _`GUC_HXG_RETRY_REASON_UNSPECIFIED` = 0 | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_RETRY_MSG_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_RETRY_MSG_0_REASON GUC_HXG_MSG_0_AUX +#define GUC_HXG_RETRY_REASON_UNSPECIFIED 0u + +/** + * DOC: HXG Failure + * + * The `HXG Failure`_ message shall be used as a reply to the `HXG Request`_ + * message that could not be processed due to an error. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_FAILURE_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | **HINT** - additional error hint | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **ERROR** - error/result code | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_FAILURE_MSG_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_FAILURE_MSG_0_HINT (0xfff << 16) +#define GUC_HXG_FAILURE_MSG_0_ERROR (0xffff << 0) + +/** + * DOC: HXG Response + * + * The `HXG Response`_ message shall be used as a reply to the `HXG Request`_ + * message that was successfully processed without an error. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | **DATA0** - data (depends on ACTION from `HXG Request`_) | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | | + * +---+-------+ | + * |...| | **DATAn** - data (depends on ACTION from `HXG Request`_) | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_RESPONSE_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_RESPONSE_MSG_0_DATA0 GUC_HXG_MSG_0_AUX +#define GUC_HXG_RESPONSE_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD + +/* deprecated */ #define INTEL_GUC_MSG_TYPE_SHIFT 28 #define INTEL_GUC_MSG_TYPE_MASK (0xF << INTEL_GUC_MSG_TYPE_SHIFT) #define INTEL_GUC_MSG_DATA_SHIFT 16 -- cgit v1.2.3 From 572f2a5cd9742c52f6d4d659409180168a169a24 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 15 Jun 2021 17:13:01 -0700 Subject: drm/i915/guc: Update firmware to v62.0.0 Most of the changes to the 62.0.0 firmware revolved around CTB communication channel. Conform to the new (stable) CTB protocol. v2: (Michal) Add values back to kernel DOC for actions (Docs) Add 'CT buffer' back in to fix warning Signed-off-by: John Harrison Signed-off-by: Michal Wajdeczko Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio [mattrope: Tweaked kerneldoc while pushing as suggested by Daniele/Michal] Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210616001302.84233-3-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h | 107 +++++++ .../drm/i915/gt/uc/abi/guc_communication_ctb_abi.h | 126 ++++++-- .../i915/gt/uc/abi/guc_communication_mmio_abi.h | 65 ++-- drivers/gpu/drm/i915/gt/uc/intel_guc.c | 107 +++++-- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 45 +-- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 356 ++++++++++----------- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 6 +- drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 75 +---- drivers/gpu/drm/i915/gt/uc/intel_guc_log.c | 29 +- drivers/gpu/drm/i915/gt/uc/intel_guc_log.h | 6 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 26 +- 11 files changed, 527 insertions(+), 421 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index 90efef8a73e4..2d6198e63ebe 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -6,6 +6,113 @@ #ifndef _ABI_GUC_ACTIONS_ABI_H #define _ABI_GUC_ACTIONS_ABI_H +/** + * DOC: HOST2GUC_REGISTER_CTB + * + * This message is used as part of the `CTB based communication`_ setup. + * + * This message must be sent as `MMIO HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_REGISTER_CTB` = 0x4505 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:12 | RESERVED = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 11:8 | **TYPE** - type for the `CT Buffer`_ | + * | | | | + * | | | - _`GUC_CTB_TYPE_HOST2GUC` = 0 | + * | | | - _`GUC_CTB_TYPE_GUC2HOST` = 1 | + * | +-------+--------------------------------------------------------------+ + * | | 7:0 | **SIZE** - size of the `CT Buffer`_ in 4K units minus 1 | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | **DESC_ADDR** - GGTT address of the `CTB Descriptor`_ | + * +---+-------+--------------------------------------------------------------+ + * | 3 | 31:0 | **BUFF_ADDF** - GGTT address of the `CT Buffer`_ | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | DATA0 = MBZ | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_HOST2GUC_REGISTER_CTB 0x4505 + +#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 3u) +#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_MBZ (0xfffff << 12) +#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_TYPE (0xf << 8) +#define GUC_CTB_TYPE_HOST2GUC 0u +#define GUC_CTB_TYPE_GUC2HOST 1u +#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_SIZE (0xff << 0) +#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_2_DESC_ADDR GUC_HXG_REQUEST_MSG_n_DATAn +#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_3_BUFF_ADDR GUC_HXG_REQUEST_MSG_n_DATAn + +#define HOST2GUC_REGISTER_CTB_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN +#define HOST2GUC_REGISTER_CTB_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 + +/** + * DOC: HOST2GUC_DEREGISTER_CTB + * + * This message is used as part of the `CTB based communication`_ teardown. + * + * This message must be sent as `MMIO HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_DEREGISTER_CTB` = 0x4506 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:12 | RESERVED = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 11:8 | **TYPE** - type of the `CT Buffer`_ | + * | | | | + * | | | see `GUC_ACTION_HOST2GUC_REGISTER_CTB`_ | + * | +-------+--------------------------------------------------------------+ + * | | 7:0 | RESERVED = MBZ | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | DATA0 = MBZ | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_HOST2GUC_DEREGISTER_CTB 0x4506 + +#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 1u) +#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_MBZ (0xfffff << 12) +#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_TYPE (0xf << 8) +#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_MBZ2 (0xff << 0) + +#define HOST2GUC_DEREGISTER_CTB_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN +#define HOST2GUC_DEREGISTER_CTB_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 + +/* legacy definitions */ + enum intel_guc_action { INTEL_GUC_ACTION_DEFAULT = 0x0, INTEL_GUC_ACTION_REQUEST_PREEMPTION = 0x2, diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h index d38935f47ecf..e933ca02d0eb 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h @@ -7,6 +7,110 @@ #define _ABI_GUC_COMMUNICATION_CTB_ABI_H #include +#include + +#include "guc_messages_abi.h" + +/** + * DOC: CT Buffer + * + * Circular buffer used to send `CTB Message`_ + */ + +/** + * DOC: CTB Descriptor + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31:0 | **HEAD** - offset (in dwords) to the last dword that was | + * | | | read from the `CT Buffer`_. | + * | | | It can only be updated by the receiver. | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | **TAIL** - offset (in dwords) to the last dword that was | + * | | | written to the `CT Buffer`_. | + * | | | It can only be updated by the sender. | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | **STATUS** - status of the CTB | + * | | | | + * | | | - _`GUC_CTB_STATUS_NO_ERROR` = 0 (normal operation) | + * | | | - _`GUC_CTB_STATUS_OVERFLOW` = 1 (head/tail too large) | + * | | | - _`GUC_CTB_STATUS_UNDERFLOW` = 2 (truncated message) | + * | | | - _`GUC_CTB_STATUS_MISMATCH` = 4 (head/tail modified) | + * +---+-------+--------------------------------------------------------------+ + * |...| | RESERVED = MBZ | + * +---+-------+--------------------------------------------------------------+ + * | 15| 31:0 | RESERVED = MBZ | + * +---+-------+--------------------------------------------------------------+ + */ + +struct guc_ct_buffer_desc { + u32 head; + u32 tail; + u32 status; +#define GUC_CTB_STATUS_NO_ERROR 0 +#define GUC_CTB_STATUS_OVERFLOW (1 << 0) +#define GUC_CTB_STATUS_UNDERFLOW (1 << 1) +#define GUC_CTB_STATUS_MISMATCH (1 << 2) + u32 reserved[13]; +} __packed; +static_assert(sizeof(struct guc_ct_buffer_desc) == 64); + +/** + * DOC: CTB Message + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31:16 | **FENCE** - message identifier | + * | +-------+--------------------------------------------------------------+ + * | | 15:12 | **FORMAT** - format of the CTB message | + * | | | - _`GUC_CTB_FORMAT_HXG` = 0 - see `CTB HXG Message`_ | + * | +-------+--------------------------------------------------------------+ + * | | 11:8 | **RESERVED** | + * | +-------+--------------------------------------------------------------+ + * | | 7:0 | **NUM_DWORDS** - length of the CTB message (w/o header) | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | optional (depends on FORMAT) | + * +---+-------+ | + * |...| | | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_CTB_MSG_MIN_LEN 1u +#define GUC_CTB_MSG_MAX_LEN 256u +#define GUC_CTB_MSG_0_FENCE (0xffff << 16) +#define GUC_CTB_MSG_0_FORMAT (0xf << 12) +#define GUC_CTB_FORMAT_HXG 0u +#define GUC_CTB_MSG_0_RESERVED (0xf << 8) +#define GUC_CTB_MSG_0_NUM_DWORDS (0xff << 0) + +/** + * DOC: CTB HXG Message + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31:16 | FENCE | + * | +-------+--------------------------------------------------------------+ + * | | 15:12 | FORMAT = GUC_CTB_FORMAT_HXG_ | + * | +-------+--------------------------------------------------------------+ + * | | 11:8 | RESERVED = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 7:0 | NUM_DWORDS = length (in dwords) of the embedded HXG message | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | +--------------------------------------------------------+ | + * +---+-------+ | | | + * |...| | | Embedded `HXG Message`_ | | + * +---+-------+ | | | + * | n | 31:0 | +--------------------------------------------------------+ | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_CTB_HXG_MSG_MIN_LEN (GUC_CTB_MSG_MIN_LEN + GUC_HXG_MSG_MIN_LEN) +#define GUC_CTB_HXG_MSG_MAX_LEN GUC_CTB_MSG_MAX_LEN /** * DOC: CTB based communication @@ -60,28 +164,6 @@ * - **flags**, holds various bits to control message handling */ -/* - * Describes single command transport buffer. - * Used by both guc-master and clients. - */ -struct guc_ct_buffer_desc { - u32 addr; /* gfx address */ - u64 host_private; /* host private data */ - u32 size; /* size in bytes */ - u32 head; /* offset updated by GuC*/ - u32 tail; /* offset updated by owner */ - u32 is_in_error; /* error indicator */ - u32 reserved1; - u32 reserved2; - u32 owner; /* id of the channel owner */ - u32 owner_sub_id; /* owner-defined field for extra tracking */ - u32 reserved[5]; -} __packed; - -/* Type of command transport buffer */ -#define INTEL_GUC_CT_BUFFER_TYPE_SEND 0x0u -#define INTEL_GUC_CT_BUFFER_TYPE_RECV 0x1u - /* * Definition of the command transport message header (DW0) * diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h index be066a62e9e0..bbf1ddb77434 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h @@ -7,46 +7,43 @@ #define _ABI_GUC_COMMUNICATION_MMIO_ABI_H /** - * DOC: MMIO based communication + * DOC: GuC MMIO based communication * - * The MMIO based communication between Host and GuC uses software scratch - * registers, where first register holds data treated as message header, - * and other registers are used to hold message payload. + * The MMIO based communication between Host and GuC relies on special + * hardware registers which format could be defined by the software + * (so called scratch registers). * - * For Gen9+, GuC uses software scratch registers 0xC180-0xC1B8, - * but no H2G command takes more than 8 parameters and the GuC FW - * itself uses an 8-element array to store the H2G message. + * Each MMIO based message, both Host to GuC (H2G) and GuC to Host (G2H) + * messages, which maximum length depends on number of available scratch + * registers, is directly written into those scratch registers. * - * +-----------+---------+---------+---------+ - * | MMIO[0] | MMIO[1] | ... | MMIO[n] | - * +-----------+---------+---------+---------+ - * | header | optional payload | - * +======+====+=========+=========+=========+ - * | 31:28|type| | | | - * +------+----+ | | | - * | 27:16|data| | | | - * +------+----+ | | | - * | 15:0|code| | | | - * +------+----+---------+---------+---------+ + * For Gen9+, there are 16 software scratch registers 0xC180-0xC1B8, + * but no H2G command takes more than 4 parameters and the GuC firmware + * itself uses an 4-element array to store the H2G message. * - * The message header consists of: + * For Gen11+, there are additional 4 registers 0x190240-0x19024C, which + * are, regardless on lower count, preferred over legacy ones. * - * - **type**, indicates message type - * - **code**, indicates message code, is specific for **type** - * - **data**, indicates message data, optional, depends on **code** - * - * The following message **types** are supported: - * - * - **REQUEST**, indicates Host-to-GuC request, requested GuC action code - * must be priovided in **code** field. Optional action specific parameters - * can be provided in remaining payload registers or **data** field. - * - * - **RESPONSE**, indicates GuC-to-Host response from earlier GuC request, - * action response status will be provided in **code** field. Optional - * response data can be returned in remaining payload registers or **data** - * field. + * The MMIO based communication is mainly used during driver initialization + * phase to setup the `CTB based communication`_ that will be used afterwards. */ -#define GUC_MAX_MMIO_MSG_LEN 8 +#define GUC_MAX_MMIO_MSG_LEN 4 + +/** + * DOC: MMIO HXG Message + * + * Format of the MMIO messages follows definitions of `HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31:0 | +--------------------------------------------------------+ | + * +---+-------+ | | | + * |...| | | Embedded `HXG Message`_ | | + * +---+-------+ | | | + * | n | 31:0 | +--------------------------------------------------------+ | + * +---+-------+--------------------------------------------------------------+ + */ #endif /* _ABI_GUC_COMMUNICATION_MMIO_ABI_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index f147cb389a20..6661dcb02239 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -219,24 +219,19 @@ static u32 guc_ctl_log_params_flags(struct intel_guc *guc) BUILD_BUG_ON(!CRASH_BUFFER_SIZE); BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, UNIT)); - BUILD_BUG_ON(!DPC_BUFFER_SIZE); - BUILD_BUG_ON(!IS_ALIGNED(DPC_BUFFER_SIZE, UNIT)); - BUILD_BUG_ON(!ISR_BUFFER_SIZE); - BUILD_BUG_ON(!IS_ALIGNED(ISR_BUFFER_SIZE, UNIT)); + BUILD_BUG_ON(!DEBUG_BUFFER_SIZE); + BUILD_BUG_ON(!IS_ALIGNED(DEBUG_BUFFER_SIZE, UNIT)); BUILD_BUG_ON((CRASH_BUFFER_SIZE / UNIT - 1) > (GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT)); - BUILD_BUG_ON((DPC_BUFFER_SIZE / UNIT - 1) > - (GUC_LOG_DPC_MASK >> GUC_LOG_DPC_SHIFT)); - BUILD_BUG_ON((ISR_BUFFER_SIZE / UNIT - 1) > - (GUC_LOG_ISR_MASK >> GUC_LOG_ISR_SHIFT)); + BUILD_BUG_ON((DEBUG_BUFFER_SIZE / UNIT - 1) > + (GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT)); flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL | FLAG | ((CRASH_BUFFER_SIZE / UNIT - 1) << GUC_LOG_CRASH_SHIFT) | - ((DPC_BUFFER_SIZE / UNIT - 1) << GUC_LOG_DPC_SHIFT) | - ((ISR_BUFFER_SIZE / UNIT - 1) << GUC_LOG_ISR_SHIFT) | + ((DEBUG_BUFFER_SIZE / UNIT - 1) << GUC_LOG_DEBUG_SHIFT) | (offset << GUC_LOG_BUF_ADDR_SHIFT); #undef UNIT @@ -376,29 +371,27 @@ void intel_guc_fini(struct intel_guc *guc) /* * This function implements the MMIO based host to GuC interface. */ -int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len, +int intel_guc_send_mmio(struct intel_guc *guc, const u32 *request, u32 len, u32 *response_buf, u32 response_buf_size) { + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; struct intel_uncore *uncore = guc_to_gt(guc)->uncore; - u32 status; + u32 header; int i; int ret; GEM_BUG_ON(!len); GEM_BUG_ON(len > guc->send_regs.count); - /* We expect only action code */ - GEM_BUG_ON(*action & ~INTEL_GUC_MSG_CODE_MASK); - - /* If CT is available, we expect to use MMIO only during init/fini */ - GEM_BUG_ON(*action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER && - *action != INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER); + GEM_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, request[0]) != GUC_HXG_ORIGIN_HOST); + GEM_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, request[0]) != GUC_HXG_TYPE_REQUEST); mutex_lock(&guc->send_mutex); intel_uncore_forcewake_get(uncore, guc->send_regs.fw_domains); +retry: for (i = 0; i < len; i++) - intel_uncore_write(uncore, guc_send_reg(guc, i), action[i]); + intel_uncore_write(uncore, guc_send_reg(guc, i), request[i]); intel_uncore_posting_read(uncore, guc_send_reg(guc, i - 1)); @@ -410,30 +403,74 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len, */ ret = __intel_wait_for_register_fw(uncore, guc_send_reg(guc, 0), - INTEL_GUC_MSG_TYPE_MASK, - INTEL_GUC_MSG_TYPE_RESPONSE << - INTEL_GUC_MSG_TYPE_SHIFT, - 10, 10, &status); - /* If GuC explicitly returned an error, convert it to -EIO */ - if (!ret && !INTEL_GUC_MSG_IS_RESPONSE_SUCCESS(status)) - ret = -EIO; + GUC_HXG_MSG_0_ORIGIN, + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, + GUC_HXG_ORIGIN_GUC), + 10, 10, &header); + if (unlikely(ret)) { +timeout: + drm_err(&i915->drm, "mmio request %#x: no reply %x\n", + request[0], header); + goto out; + } - if (ret) { - DRM_ERROR("MMIO: GuC action %#x failed with error %d %#x\n", - action[0], ret, status); + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == GUC_HXG_TYPE_NO_RESPONSE_BUSY) { +#define done ({ header = intel_uncore_read(uncore, guc_send_reg(guc, 0)); \ + FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != GUC_HXG_ORIGIN_GUC || \ + FIELD_GET(GUC_HXG_MSG_0_TYPE, header) != GUC_HXG_TYPE_NO_RESPONSE_BUSY; }) + + ret = wait_for(done, 1000); + if (unlikely(ret)) + goto timeout; + if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != + GUC_HXG_ORIGIN_GUC)) + goto proto; +#undef done + } + + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == GUC_HXG_TYPE_NO_RESPONSE_RETRY) { + u32 reason = FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, header); + + drm_dbg(&i915->drm, "mmio request %#x: retrying, reason %u\n", + request[0], reason); + goto retry; + } + + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == GUC_HXG_TYPE_RESPONSE_FAILURE) { + u32 hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, header); + u32 error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, header); + + drm_err(&i915->drm, "mmio request %#x: failure %x/%u\n", + request[0], error, hint); + ret = -ENXIO; + goto out; + } + + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) != GUC_HXG_TYPE_RESPONSE_SUCCESS) { +proto: + drm_err(&i915->drm, "mmio request %#x: unexpected reply %#x\n", + request[0], header); + ret = -EPROTO; goto out; } if (response_buf) { - int count = min(response_buf_size, guc->send_regs.count - 1); + int count = min(response_buf_size, guc->send_regs.count); - for (i = 0; i < count; i++) + GEM_BUG_ON(!count); + + response_buf[0] = header; + + for (i = 1; i < count; i++) response_buf[i] = intel_uncore_read(uncore, - guc_send_reg(guc, i + 1)); - } + guc_send_reg(guc, i)); - /* Use data from the GuC response as our return value */ - ret = INTEL_GUC_MSG_TO_DATA(status); + /* Use number of copied dwords as our return value */ + ret = count; + } else { + /* Use data from the GuC response as our return value */ + ret = FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, header); + } out: intel_uncore_forcewake_put(uncore, guc->send_regs.fw_domains); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 9abfbc6edbd6..b82145652d57 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -24,10 +24,6 @@ * +---------------------------------------+ * | guc_gt_system_info | * +---------------------------------------+ - * | guc_clients_info | - * +---------------------------------------+ - * | guc_ct_pool_entry[size] | - * +---------------------------------------+ * | padding | * +---------------------------------------+ <== 4K aligned * | private data | @@ -39,8 +35,6 @@ struct __guc_ads_blob { struct guc_ads ads; struct guc_policies policies; struct guc_gt_system_info system_info; - struct guc_clients_info clients_info; - struct guc_ct_pool_entry ct_pool[GUC_CT_POOL_SIZE]; } __packed; static u32 guc_ads_private_data_size(struct intel_guc *guc) @@ -59,38 +53,15 @@ static u32 guc_ads_blob_size(struct intel_guc *guc) guc_ads_private_data_size(guc); } -static void guc_policy_init(struct guc_policy *policy) -{ - policy->execution_quantum = POLICY_DEFAULT_EXECUTION_QUANTUM_US; - policy->preemption_time = POLICY_DEFAULT_PREEMPTION_TIME_US; - policy->fault_time = POLICY_DEFAULT_FAULT_TIME_US; - policy->policy_flags = 0; -} - static void guc_policies_init(struct guc_policies *policies) { - struct guc_policy *policy; - u32 p, i; - - policies->dpc_promote_time = POLICY_DEFAULT_DPC_PROMOTE_TIME_US; - policies->max_num_work_items = POLICY_MAX_NUM_WI; - - for (p = 0; p < GUC_CLIENT_PRIORITY_NUM; p++) { - for (i = 0; i < GUC_MAX_ENGINE_CLASSES; i++) { - policy = &policies->policy[p][i]; - - guc_policy_init(policy); - } - } - + policies->dpc_promote_time = GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US; + policies->max_num_work_items = GLOBAL_POLICY_MAX_NUM_WI; + /* Disable automatic resets as not yet supported. */ + policies->global_flags = GLOBAL_POLICY_DISABLE_ENGINE_RESET; policies->is_valid = 1; } -static void guc_ct_pool_entries_init(struct guc_ct_pool_entry *pool, u32 num) -{ - memset(pool, 0, num * sizeof(*pool)); -} - static void guc_mapping_table_init(struct intel_gt *gt, struct guc_gt_system_info *system_info) { @@ -178,17 +149,9 @@ static void __guc_ads_init(struct intel_guc *guc) base = intel_guc_ggtt_offset(guc, guc->ads_vma); - /* Clients info */ - guc_ct_pool_entries_init(blob->ct_pool, ARRAY_SIZE(blob->ct_pool)); - - blob->clients_info.clients_num = 1; - blob->clients_info.ct_pool_addr = base + ptr_offset(blob, ct_pool); - blob->clients_info.ct_pool_count = ARRAY_SIZE(blob->ct_pool); - /* ADS */ blob->ads.scheduler_policies = base + ptr_offset(blob, policies); blob->ads.gt_system_info = base + ptr_offset(blob, system_info); - blob->ads.clients_info = base + ptr_offset(blob, clients_info); /* Private Data */ blob->ads.private_data = base + guc_ads_private_data_offset(guc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 8f7b148fef58..43409044528e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -103,61 +103,66 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct) static inline const char *guc_ct_buffer_type_to_str(u32 type) { switch (type) { - case INTEL_GUC_CT_BUFFER_TYPE_SEND: + case GUC_CTB_TYPE_HOST2GUC: return "SEND"; - case INTEL_GUC_CT_BUFFER_TYPE_RECV: + case GUC_CTB_TYPE_GUC2HOST: return "RECV"; default: return ""; } } -static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc, - u32 cmds_addr, u32 size) +static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc) { memset(desc, 0, sizeof(*desc)); - desc->addr = cmds_addr; - desc->size = size; - desc->owner = CTB_OWNER_HOST; } -static void guc_ct_buffer_reset(struct intel_guc_ct_buffer *ctb, u32 cmds_addr) +static void guc_ct_buffer_reset(struct intel_guc_ct_buffer *ctb) { - guc_ct_buffer_desc_init(ctb->desc, cmds_addr, ctb->size); + ctb->broken = false; + guc_ct_buffer_desc_init(ctb->desc); } static void guc_ct_buffer_init(struct intel_guc_ct_buffer *ctb, struct guc_ct_buffer_desc *desc, - u32 *cmds, u32 size) + u32 *cmds, u32 size_in_bytes) { - GEM_BUG_ON(size % 4); + GEM_BUG_ON(size_in_bytes % 4); ctb->desc = desc; ctb->cmds = cmds; - ctb->size = size; + ctb->size = size_in_bytes / 4; - guc_ct_buffer_reset(ctb, 0); + guc_ct_buffer_reset(ctb); } -static int guc_action_register_ct_buffer(struct intel_guc *guc, - u32 desc_addr, - u32 type) +static int guc_action_register_ct_buffer(struct intel_guc *guc, u32 type, + u32 desc_addr, u32 buff_addr, u32 size) { - u32 action[] = { - INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER, - desc_addr, - sizeof(struct guc_ct_buffer_desc), - type + u32 request[HOST2GUC_REGISTER_CTB_REQUEST_MSG_LEN] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_HOST2GUC_REGISTER_CTB), + FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_SIZE, size / SZ_4K - 1) | + FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_TYPE, type), + FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_2_DESC_ADDR, desc_addr), + FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_3_BUFF_ADDR, buff_addr), }; - /* Can't use generic send(), CT registration must go over MMIO */ - return intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); + GEM_BUG_ON(type != GUC_CTB_TYPE_HOST2GUC && type != GUC_CTB_TYPE_GUC2HOST); + GEM_BUG_ON(size % SZ_4K); + + /* CT registration must go over MMIO */ + return intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0); } -static int ct_register_buffer(struct intel_guc_ct *ct, u32 desc_addr, u32 type) +static int ct_register_buffer(struct intel_guc_ct *ct, u32 type, + u32 desc_addr, u32 buff_addr, u32 size) { - int err = guc_action_register_ct_buffer(ct_to_guc(ct), desc_addr, type); + int err; + err = guc_action_register_ct_buffer(ct_to_guc(ct), type, + desc_addr, buff_addr, size); if (unlikely(err)) CT_ERROR(ct, "Failed to register %s buffer (err=%d)\n", guc_ct_buffer_type_to_str(type), err); @@ -166,14 +171,17 @@ static int ct_register_buffer(struct intel_guc_ct *ct, u32 desc_addr, u32 type) static int guc_action_deregister_ct_buffer(struct intel_guc *guc, u32 type) { - u32 action[] = { - INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER, - CTB_OWNER_HOST, - type + u32 request[HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_LEN] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_HOST2GUC_DEREGISTER_CTB), + FIELD_PREP(HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_TYPE, type), }; - /* Can't use generic send(), CT deregistration must go over MMIO */ - return intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); + GEM_BUG_ON(type != GUC_CTB_TYPE_HOST2GUC && type != GUC_CTB_TYPE_GUC2HOST); + + /* CT deregistration must go over MMIO */ + return intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0); } static int ct_deregister_buffer(struct intel_guc_ct *ct, u32 type) @@ -261,7 +269,7 @@ void intel_guc_ct_fini(struct intel_guc_ct *ct) int intel_guc_ct_enable(struct intel_guc_ct *ct) { struct intel_guc *guc = ct_to_guc(ct); - u32 base, cmds; + u32 base, desc, cmds; void *blob; int err; @@ -277,23 +285,26 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct) GEM_BUG_ON(blob != ct->ctbs.send.desc); /* (re)initialize descriptors */ - cmds = base + ptrdiff(ct->ctbs.send.cmds, blob); - guc_ct_buffer_reset(&ct->ctbs.send, cmds); - - cmds = base + ptrdiff(ct->ctbs.recv.cmds, blob); - guc_ct_buffer_reset(&ct->ctbs.recv, cmds); + guc_ct_buffer_reset(&ct->ctbs.send); + guc_ct_buffer_reset(&ct->ctbs.recv); /* * Register both CT buffers starting with RECV buffer. * Descriptors are in first half of the blob. */ - err = ct_register_buffer(ct, base + ptrdiff(ct->ctbs.recv.desc, blob), - INTEL_GUC_CT_BUFFER_TYPE_RECV); + desc = base + ptrdiff(ct->ctbs.recv.desc, blob); + cmds = base + ptrdiff(ct->ctbs.recv.cmds, blob); + err = ct_register_buffer(ct, GUC_CTB_TYPE_GUC2HOST, + desc, cmds, ct->ctbs.recv.size * 4); + if (unlikely(err)) goto err_out; - err = ct_register_buffer(ct, base + ptrdiff(ct->ctbs.send.desc, blob), - INTEL_GUC_CT_BUFFER_TYPE_SEND); + desc = base + ptrdiff(ct->ctbs.send.desc, blob); + cmds = base + ptrdiff(ct->ctbs.send.cmds, blob); + err = ct_register_buffer(ct, GUC_CTB_TYPE_HOST2GUC, + desc, cmds, ct->ctbs.send.size * 4); + if (unlikely(err)) goto err_deregister; @@ -302,7 +313,7 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct) return 0; err_deregister: - ct_deregister_buffer(ct, INTEL_GUC_CT_BUFFER_TYPE_RECV); + ct_deregister_buffer(ct, GUC_CTB_TYPE_GUC2HOST); err_out: CT_PROBE_ERROR(ct, "Failed to enable CTB (%pe)\n", ERR_PTR(err)); return err; @@ -321,8 +332,8 @@ void intel_guc_ct_disable(struct intel_guc_ct *ct) ct->enabled = false; if (intel_guc_is_fw_running(guc)) { - ct_deregister_buffer(ct, INTEL_GUC_CT_BUFFER_TYPE_SEND); - ct_deregister_buffer(ct, INTEL_GUC_CT_BUFFER_TYPE_RECV); + ct_deregister_buffer(ct, GUC_CTB_TYPE_HOST2GUC); + ct_deregister_buffer(ct, GUC_CTB_TYPE_GUC2HOST); } } @@ -354,24 +365,6 @@ static void write_barrier(struct intel_guc_ct *ct) } } -/** - * DOC: CTB Host to GuC request - * - * Format of the CTB Host to GuC request message is as follows:: - * - * +------------+---------+---------+---------+---------+ - * | msg[0] | [1] | [2] | ... | [n-1] | - * +------------+---------+---------+---------+---------+ - * | MESSAGE | MESSAGE PAYLOAD | - * + HEADER +---------+---------+---------+---------+ - * | | 0 | 1 | ... | n | - * +============+=========+=========+=========+=========+ - * | len >= 1 | FENCE | request specific data | - * +------+-----+---------+---------+---------+---------+ - * - * ^-----------------len-------------------^ - */ - static int ct_write(struct intel_guc_ct *ct, const u32 *action, u32 len /* in dwords */, @@ -384,20 +377,22 @@ static int ct_write(struct intel_guc_ct *ct, u32 size = ctb->size; u32 used; u32 header; + u32 hxg; u32 *cmds = ctb->cmds; unsigned int i; - if (unlikely(desc->is_in_error)) + if (unlikely(ctb->broken)) return -EPIPE; - if (unlikely(!IS_ALIGNED(head | tail, 4) || - (tail | head) >= size)) + if (unlikely(desc->status)) goto corrupted; - /* later calculations will be done in dwords */ - head /= 4; - tail /= 4; - size /= 4; + if (unlikely((tail | head) >= size)) { + CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n", + head, tail, size); + desc->status |= GUC_CTB_STATUS_OVERFLOW; + goto corrupted; + } /* * tail == head condition indicates empty. GuC FW does not support @@ -413,22 +408,25 @@ static int ct_write(struct intel_guc_ct *ct, return -ENOSPC; /* - * Write the message. The format is the following: - * DW0: header (including action code) - * DW1: fence - * DW2+: action data + * dw0: CT header (including fence) + * dw1: HXG header (including action code) + * dw2+: action data */ - header = (len << GUC_CT_MSG_LEN_SHIFT) | - GUC_CT_MSG_SEND_STATUS | - (action[0] << GUC_CT_MSG_ACTION_SHIFT); + header = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) | + FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) | + FIELD_PREP(GUC_CTB_MSG_0_FENCE, fence); - CT_DEBUG(ct, "writing %*ph %*ph %*ph\n", - 4, &header, 4, &fence, 4 * (len - 1), &action[1]); + hxg = FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION | + GUC_HXG_REQUEST_MSG_0_DATA0, action[0]); + + CT_DEBUG(ct, "writing (tail %u) %*ph %*ph %*ph\n", + tail, 4, &header, 4, &hxg, 4 * (len - 1), &action[1]); cmds[tail] = header; tail = (tail + 1) % size; - cmds[tail] = fence; + cmds[tail] = hxg; tail = (tail + 1) % size; for (i = 1; i < len; i++) { @@ -443,14 +441,15 @@ static int ct_write(struct intel_guc_ct *ct, */ write_barrier(ct); - /* now update desc tail (back in bytes) */ - desc->tail = tail * 4; + /* now update descriptor */ + WRITE_ONCE(desc->tail, tail); + return 0; corrupted: - CT_ERROR(ct, "Corrupted descriptor addr=%#x head=%u tail=%u size=%u\n", - desc->addr, desc->head, desc->tail, desc->size); - desc->is_in_error = 1; + CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n", + desc->head, desc->tail, desc->status); + ctb->broken = true; return -EPIPE; } @@ -477,7 +476,9 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status) * up to that length of time, then switch to a slower sleep-wait loop. * No GuC command should ever take longer than 10ms. */ -#define done INTEL_GUC_MSG_IS_RESPONSE(READ_ONCE(req->status)) +#define done \ + (FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) == \ + GUC_HXG_ORIGIN_GUC) err = wait_for_us(done, 10); if (err) err = wait_for(done, 10); @@ -532,21 +533,21 @@ static int ct_send(struct intel_guc_ct *ct, if (unlikely(err)) goto unlink; - if (!INTEL_GUC_MSG_IS_RESPONSE_SUCCESS(*status)) { + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, *status) != GUC_HXG_TYPE_RESPONSE_SUCCESS) { err = -EIO; goto unlink; } if (response_buf) { /* There shall be no data in the status */ - WARN_ON(INTEL_GUC_MSG_TO_DATA(request.status)); + WARN_ON(FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, request.status)); /* Return actual response len */ err = request.response_len; } else { /* There shall be no response payload */ WARN_ON(request.response_len); /* Return data decoded from the status dword */ - err = INTEL_GUC_MSG_TO_DATA(*status); + err = FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, *status); } unlink: @@ -583,21 +584,6 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, return ret; } -static inline unsigned int ct_header_get_len(u32 header) -{ - return (header >> GUC_CT_MSG_LEN_SHIFT) & GUC_CT_MSG_LEN_MASK; -} - -static inline unsigned int ct_header_get_action(u32 header) -{ - return (header >> GUC_CT_MSG_ACTION_SHIFT) & GUC_CT_MSG_ACTION_MASK; -} - -static inline bool ct_header_is_response(u32 header) -{ - return !!(header & GUC_CT_MSG_IS_RESPONSE); -} - static struct ct_incoming_msg *ct_alloc_msg(u32 num_dwords) { struct ct_incoming_msg *msg; @@ -630,17 +616,18 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) unsigned int i; u32 header; - if (unlikely(desc->is_in_error)) + if (unlikely(ctb->broken)) return -EPIPE; - if (unlikely(!IS_ALIGNED(head | tail, 4) || - (tail | head) >= size)) + if (unlikely(desc->status)) goto corrupted; - /* later calculations will be done in dwords */ - head /= 4; - tail /= 4; - size /= 4; + if (unlikely((tail | head) >= size)) { + CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n", + head, tail, size); + desc->status |= GUC_CTB_STATUS_OVERFLOW; + goto corrupted; + } /* tail == head condition indicates empty */ available = tail - head; @@ -659,7 +646,7 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) head = (head + 1) % size; /* message len with header */ - len = ct_header_get_len(header) + 1; + len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, header) + GUC_CTB_MSG_MIN_LEN; if (unlikely(len > (u32)available)) { CT_ERROR(ct, "Incomplete message %*ph %*ph %*ph\n", 4, &header, @@ -667,6 +654,7 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) size - head : available - 1), &cmds[head], 4 * (head + available - 1 > size ? available - 1 - size + head : 0), &cmds[0]); + desc->status |= GUC_CTB_STATUS_UNDERFLOW; goto corrupted; } @@ -689,65 +677,36 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) } CT_DEBUG(ct, "received %*ph\n", 4 * len, (*msg)->msg); - desc->head = head * 4; + /* now update descriptor */ + WRITE_ONCE(desc->head, head); + return available - len; corrupted: - CT_ERROR(ct, "Corrupted descriptor addr=%#x head=%u tail=%u size=%u\n", - desc->addr, desc->head, desc->tail, desc->size); - desc->is_in_error = 1; + CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n", + desc->head, desc->tail, desc->status); + ctb->broken = true; return -EPIPE; } -/** - * DOC: CTB GuC to Host response - * - * Format of the CTB GuC to Host response message is as follows:: - * - * +------------+---------+---------+---------+---------+---------+ - * | msg[0] | [1] | [2] | [3] | ... | [n-1] | - * +------------+---------+---------+---------+---------+---------+ - * | MESSAGE | MESSAGE PAYLOAD | - * + HEADER +---------+---------+---------+---------+---------+ - * | | 0 | 1 | 2 | ... | n | - * +============+=========+=========+=========+=========+=========+ - * | len >= 2 | FENCE | STATUS | response specific data | - * +------+-----+---------+---------+---------+---------+---------+ - * - * ^-----------------------len-----------------------^ - */ - static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *response) { - u32 header = response->msg[0]; - u32 len = ct_header_get_len(header); - u32 fence; - u32 status; - u32 datalen; + u32 len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, response->msg[0]); + u32 fence = FIELD_GET(GUC_CTB_MSG_0_FENCE, response->msg[0]); + const u32 *hxg = &response->msg[GUC_CTB_MSG_MIN_LEN]; + const u32 *data = &hxg[GUC_HXG_MSG_MIN_LEN]; + u32 datalen = len - GUC_HXG_MSG_MIN_LEN; struct ct_request *req; unsigned long flags; bool found = false; int err = 0; - GEM_BUG_ON(!ct_header_is_response(header)); - - /* Response payload shall at least include fence and status */ - if (unlikely(len < 2)) { - CT_ERROR(ct, "Corrupted response (len %u)\n", len); - return -EPROTO; - } - - fence = response->msg[1]; - status = response->msg[2]; - datalen = len - 2; - - /* Format of the status follows RESPONSE message */ - if (unlikely(!INTEL_GUC_MSG_IS_RESPONSE(status))) { - CT_ERROR(ct, "Corrupted response (status %#x)\n", status); - return -EPROTO; - } + GEM_BUG_ON(len < GUC_HXG_MSG_MIN_LEN); + GEM_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg[0]) != GUC_HXG_ORIGIN_GUC); + GEM_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_RESPONSE_SUCCESS && + FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_RESPONSE_FAILURE); - CT_DEBUG(ct, "response fence %u status %#x\n", fence, status); + CT_DEBUG(ct, "response fence %u status %#x\n", fence, hxg[0]); spin_lock_irqsave(&ct->requests.lock, flags); list_for_each_entry(req, &ct->requests.pending, link) { @@ -763,9 +722,9 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r err = -EMSGSIZE; } if (datalen) - memcpy(req->response_buf, response->msg + 3, 4 * datalen); + memcpy(req->response_buf, data, 4 * datalen); req->response_len = datalen; - WRITE_ONCE(req->status, status); + WRITE_ONCE(req->status, hxg[0]); found = true; break; } @@ -786,14 +745,16 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *request) { struct intel_guc *guc = ct_to_guc(ct); - u32 header, action, len; + const u32 *hxg; const u32 *payload; + u32 hxg_len, action, len; int ret; - header = request->msg[0]; - payload = &request->msg[1]; - action = ct_header_get_action(header); - len = ct_header_get_len(header); + hxg = &request->msg[GUC_CTB_MSG_MIN_LEN]; + hxg_len = request->size - GUC_CTB_MSG_MIN_LEN; + payload = &hxg[GUC_HXG_MSG_MIN_LEN]; + action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]); + len = hxg_len - GUC_HXG_MSG_MIN_LEN; CT_DEBUG(ct, "request %x %*ph\n", action, 4 * len, payload); @@ -855,29 +816,12 @@ static void ct_incoming_request_worker_func(struct work_struct *w) queue_work(system_unbound_wq, &ct->requests.worker); } -/** - * DOC: CTB GuC to Host request - * - * Format of the CTB GuC to Host request message is as follows:: - * - * +------------+---------+---------+---------+---------+---------+ - * | msg[0] | [1] | [2] | [3] | ... | [n-1] | - * +------------+---------+---------+---------+---------+---------+ - * | MESSAGE | MESSAGE PAYLOAD | - * + HEADER +---------+---------+---------+---------+---------+ - * | | 0 | 1 | 2 | ... | n | - * +============+=========+=========+=========+=========+=========+ - * | len | request specific data | - * +------+-----+---------+---------+---------+---------+---------+ - * - * ^-----------------------len-----------------------^ - */ - -static int ct_handle_request(struct intel_guc_ct *ct, struct ct_incoming_msg *request) +static int ct_handle_event(struct intel_guc_ct *ct, struct ct_incoming_msg *request) { + const u32 *hxg = &request->msg[GUC_CTB_MSG_MIN_LEN]; unsigned long flags; - GEM_BUG_ON(ct_header_is_response(request->msg[0])); + GEM_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_EVENT); spin_lock_irqsave(&ct->requests.lock, flags); list_add_tail(&request->link, &ct->requests.incoming); @@ -887,15 +831,53 @@ static int ct_handle_request(struct intel_guc_ct *ct, struct ct_incoming_msg *re return 0; } -static void ct_handle_msg(struct intel_guc_ct *ct, struct ct_incoming_msg *msg) +static int ct_handle_hxg(struct intel_guc_ct *ct, struct ct_incoming_msg *msg) { - u32 header = msg->msg[0]; + u32 origin, type; + u32 *hxg; int err; - if (ct_header_is_response(header)) + if (unlikely(msg->size < GUC_CTB_HXG_MSG_MIN_LEN)) + return -EBADMSG; + + hxg = &msg->msg[GUC_CTB_MSG_MIN_LEN]; + + origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg[0]); + if (unlikely(origin != GUC_HXG_ORIGIN_GUC)) { + err = -EPROTO; + goto failed; + } + + type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]); + switch (type) { + case GUC_HXG_TYPE_EVENT: + err = ct_handle_event(ct, msg); + break; + case GUC_HXG_TYPE_RESPONSE_SUCCESS: + case GUC_HXG_TYPE_RESPONSE_FAILURE: err = ct_handle_response(ct, msg); + break; + default: + err = -EOPNOTSUPP; + } + + if (unlikely(err)) { +failed: + CT_ERROR(ct, "Failed to handle HXG message (%pe) %*ph\n", + ERR_PTR(err), 4 * GUC_HXG_MSG_MIN_LEN, hxg); + } + return err; +} + +static void ct_handle_msg(struct intel_guc_ct *ct, struct ct_incoming_msg *msg) +{ + u32 format = FIELD_GET(GUC_CTB_MSG_0_FORMAT, msg->msg[0]); + int err; + + if (format == GUC_CTB_FORMAT_HXG) + err = ct_handle_hxg(ct, msg); else - err = ct_handle_request(ct, msg); + err = -EOPNOTSUPP; if (unlikely(err)) { CT_ERROR(ct, "Failed to process CT message (%pe) %*ph\n", diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h index cb222f202301..1ae2dde6db93 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h @@ -31,13 +31,15 @@ struct intel_guc; * @lock: protects access to the commands buffer and buffer descriptor * @desc: pointer to the buffer descriptor * @cmds: pointer to the commands buffer - * @size: size of the commands buffer + * @size: size of the commands buffer in dwords + * @broken: flag to indicate if descriptor data is broken */ struct intel_guc_ct_buffer { spinlock_t lock; struct guc_ct_buffer_desc *desc; u32 *cmds; u32 size; + bool broken; }; @@ -59,7 +61,7 @@ struct intel_guc_ct { struct tasklet_struct receive_tasklet; struct { - u32 last_fence; /* last fence used to send request */ + u16 last_fence; /* last fence used to send request */ spinlock_t lock; /* protects pending requests list */ struct list_head pending; /* requests waiting for response */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index e9a9d85e2aa3..617ec601648d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -81,10 +81,8 @@ #define GUC_LOG_ALLOC_IN_MEGABYTE (1 << 3) #define GUC_LOG_CRASH_SHIFT 4 #define GUC_LOG_CRASH_MASK (0x3 << GUC_LOG_CRASH_SHIFT) -#define GUC_LOG_DPC_SHIFT 6 -#define GUC_LOG_DPC_MASK (0x7 << GUC_LOG_DPC_SHIFT) -#define GUC_LOG_ISR_SHIFT 9 -#define GUC_LOG_ISR_MASK (0x7 << GUC_LOG_ISR_SHIFT) +#define GUC_LOG_DEBUG_SHIFT 6 +#define GUC_LOG_DEBUG_MASK (0xF << GUC_LOG_DEBUG_SHIFT) #define GUC_LOG_BUF_ADDR_SHIFT 12 #define GUC_CTL_WA 1 @@ -247,32 +245,14 @@ struct guc_stage_desc { /* Scheduling policy settings */ -/* Reset engine upon preempt failure */ -#define POLICY_RESET_ENGINE (1<<0) -/* Preempt to idle on quantum expiry */ -#define POLICY_PREEMPT_TO_IDLE (1<<1) - -#define POLICY_MAX_NUM_WI 15 -#define POLICY_DEFAULT_DPC_PROMOTE_TIME_US 500000 -#define POLICY_DEFAULT_EXECUTION_QUANTUM_US 1000000 -#define POLICY_DEFAULT_PREEMPTION_TIME_US 500000 -#define POLICY_DEFAULT_FAULT_TIME_US 250000 - -struct guc_policy { - /* Time for one workload to execute. (in micro seconds) */ - u32 execution_quantum; - /* Time to wait for a preemption request to completed before issuing a - * reset. (in micro seconds). */ - u32 preemption_time; - /* How much time to allow to run after the first fault is observed. - * Then preempt afterwards. (in micro seconds) */ - u32 fault_time; - u32 policy_flags; - u32 reserved[8]; -} __packed; +#define GLOBAL_POLICY_MAX_NUM_WI 15 + +/* Don't reset an engine upon preemption failure */ +#define GLOBAL_POLICY_DISABLE_ENGINE_RESET BIT(0) + +#define GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US 500000 struct guc_policies { - struct guc_policy policy[GUC_CLIENT_PRIORITY_NUM][GUC_MAX_ENGINE_CLASSES]; u32 submission_queue_depth[GUC_MAX_ENGINE_CLASSES]; /* In micro seconds. How much time to allow before DPC processing is * called back via interrupt (to prevent DPC queue drain starving). @@ -286,6 +266,7 @@ struct guc_policies { * idle. */ u32 max_num_work_items; + u32 global_flags; u32 reserved[4]; } __packed; @@ -311,29 +292,13 @@ struct guc_gt_system_info { u32 generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_MAX]; } __packed; -/* Clients info */ -struct guc_ct_pool_entry { - struct guc_ct_buffer_desc desc; - u32 reserved[7]; -} __packed; - -#define GUC_CT_POOL_SIZE 2 - -struct guc_clients_info { - u32 clients_num; - u32 reserved0[13]; - u32 ct_pool_addr; - u32 ct_pool_count; - u32 reserved[4]; -} __packed; - /* GuC Additional Data Struct */ struct guc_ads { struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; u32 reserved0; u32 scheduler_policies; u32 gt_system_info; - u32 clients_info; + u32 reserved1; u32 control_data; u32 golden_context_lrca[GUC_MAX_ENGINE_CLASSES]; u32 eng_state_size[GUC_MAX_ENGINE_CLASSES]; @@ -344,8 +309,7 @@ struct guc_ads { /* GuC logging structures */ enum guc_log_buffer_type { - GUC_ISR_LOG_BUFFER, - GUC_DPC_LOG_BUFFER, + GUC_DEBUG_LOG_BUFFER, GUC_CRASH_DUMP_LOG_BUFFER, GUC_MAX_LOG_BUFFER }; @@ -414,23 +378,6 @@ struct guc_shared_ctx_data { struct guc_ctx_report preempt_ctx_report[GUC_MAX_ENGINES_NUM]; } __packed; -#define __INTEL_GUC_MSG_GET(T, m) \ - (((m) & INTEL_GUC_MSG_ ## T ## _MASK) >> INTEL_GUC_MSG_ ## T ## _SHIFT) -#define INTEL_GUC_MSG_TO_TYPE(m) __INTEL_GUC_MSG_GET(TYPE, m) -#define INTEL_GUC_MSG_TO_DATA(m) __INTEL_GUC_MSG_GET(DATA, m) -#define INTEL_GUC_MSG_TO_CODE(m) __INTEL_GUC_MSG_GET(CODE, m) - -#define __INTEL_GUC_MSG_TYPE_IS(T, m) \ - (INTEL_GUC_MSG_TO_TYPE(m) == INTEL_GUC_MSG_TYPE_ ## T) -#define INTEL_GUC_MSG_IS_REQUEST(m) __INTEL_GUC_MSG_TYPE_IS(REQUEST, m) -#define INTEL_GUC_MSG_IS_RESPONSE(m) __INTEL_GUC_MSG_TYPE_IS(RESPONSE, m) - -#define INTEL_GUC_MSG_IS_RESPONSE_SUCCESS(m) \ - (typecheck(u32, (m)) && \ - ((m) & (INTEL_GUC_MSG_TYPE_MASK | INTEL_GUC_MSG_CODE_MASK)) == \ - ((INTEL_GUC_MSG_TYPE_RESPONSE << INTEL_GUC_MSG_TYPE_SHIFT) | \ - (INTEL_GUC_RESPONSE_STATUS_SUCCESS << INTEL_GUC_MSG_CODE_SHIFT))) - /* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */ enum intel_guc_recv_message { INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1), diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index c36d5eb5bbb9..ac0931f0374b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -197,10 +197,8 @@ static bool guc_check_log_buf_overflow(struct intel_guc_log *log, static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type) { switch (type) { - case GUC_ISR_LOG_BUFFER: - return ISR_BUFFER_SIZE; - case GUC_DPC_LOG_BUFFER: - return DPC_BUFFER_SIZE; + case GUC_DEBUG_LOG_BUFFER: + return DEBUG_BUFFER_SIZE; case GUC_CRASH_DUMP_LOG_BUFFER: return CRASH_BUFFER_SIZE; default: @@ -245,7 +243,7 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log) src_data += PAGE_SIZE; dst_data += PAGE_SIZE; - for (type = GUC_ISR_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) { + for (type = GUC_DEBUG_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) { /* * Make a copy of the state structure, inside GuC log buffer * (which is uncached mapped), on the stack to avoid reading @@ -463,21 +461,16 @@ int intel_guc_log_create(struct intel_guc_log *log) * +===============================+ 00B * | Crash dump state header | * +-------------------------------+ 32B - * | DPC state header | + * | Debug state header | * +-------------------------------+ 64B - * | ISR state header | - * +-------------------------------+ 96B * | | * +===============================+ PAGE_SIZE (4KB) * | Crash Dump logs | * +===============================+ + CRASH_SIZE - * | DPC logs | - * +===============================+ + DPC_SIZE - * | ISR logs | - * +===============================+ + ISR_SIZE + * | Debug logs | + * +===============================+ + DEBUG_SIZE */ - guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DPC_BUFFER_SIZE + - ISR_BUFFER_SIZE; + guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE; vma = intel_guc_allocate_vma(guc, guc_log_size); if (IS_ERR(vma)) { @@ -675,10 +668,8 @@ static const char * stringify_guc_log_type(enum guc_log_buffer_type type) { switch (type) { - case GUC_ISR_LOG_BUFFER: - return "ISR"; - case GUC_DPC_LOG_BUFFER: - return "DPC"; + case GUC_DEBUG_LOG_BUFFER: + return "DEBUG"; case GUC_CRASH_DUMP_LOG_BUFFER: return "CRASH"; default: @@ -708,7 +699,7 @@ void intel_guc_log_info(struct intel_guc_log *log, struct drm_printer *p) drm_printf(p, "\tRelay full count: %u\n", log->relay.full_count); - for (type = GUC_ISR_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) { + for (type = GUC_DEBUG_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) { drm_printf(p, "\t%s:\tflush count %10u, overflow count %10u\n", stringify_guc_log_type(type), log->stats[type].flush, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h index 11fccd0b2294..ac1ee1d5ce10 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h @@ -17,12 +17,10 @@ struct intel_guc; #ifdef CONFIG_DRM_I915_DEBUG_GUC #define CRASH_BUFFER_SIZE SZ_2M -#define DPC_BUFFER_SIZE SZ_8M -#define ISR_BUFFER_SIZE SZ_8M +#define DEBUG_BUFFER_SIZE SZ_16M #else #define CRASH_BUFFER_SIZE SZ_8K -#define DPC_BUFFER_SIZE SZ_32K -#define ISR_BUFFER_SIZE SZ_32K +#define DEBUG_BUFFER_SIZE SZ_64K #endif /* diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index df647c9a8d56..9f23e9de3237 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -48,19 +48,19 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * firmware as TGL. */ #define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \ - fw_def(ALDERLAKE_S, 0, guc_def(tgl, 49, 0, 1), huc_def(tgl, 7, 5, 0)) \ - fw_def(ROCKETLAKE, 0, guc_def(tgl, 49, 0, 1), huc_def(tgl, 7, 5, 0)) \ - fw_def(TIGERLAKE, 0, guc_def(tgl, 49, 0, 1), huc_def(tgl, 7, 5, 0)) \ - fw_def(JASPERLAKE, 0, guc_def(ehl, 49, 0, 1), huc_def(ehl, 9, 0, 0)) \ - fw_def(ELKHARTLAKE, 0, guc_def(ehl, 49, 0, 1), huc_def(ehl, 9, 0, 0)) \ - fw_def(ICELAKE, 0, guc_def(icl, 49, 0, 1), huc_def(icl, 9, 0, 0)) \ - fw_def(COMETLAKE, 5, guc_def(cml, 49, 0, 1), huc_def(cml, 4, 0, 0)) \ - fw_def(COMETLAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \ - fw_def(COFFEELAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \ - fw_def(GEMINILAKE, 0, guc_def(glk, 49, 0, 1), huc_def(glk, 4, 0, 0)) \ - fw_def(KABYLAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \ - fw_def(BROXTON, 0, guc_def(bxt, 49, 0, 1), huc_def(bxt, 2, 0, 0)) \ - fw_def(SKYLAKE, 0, guc_def(skl, 49, 0, 1), huc_def(skl, 2, 0, 0)) + fw_def(ALDERLAKE_S, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 5, 0)) \ + fw_def(ROCKETLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 5, 0)) \ + fw_def(TIGERLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 5, 0)) \ + fw_def(JASPERLAKE, 0, guc_def(ehl, 62, 0, 0), huc_def(ehl, 9, 0, 0)) \ + fw_def(ELKHARTLAKE, 0, guc_def(ehl, 62, 0, 0), huc_def(ehl, 9, 0, 0)) \ + fw_def(ICELAKE, 0, guc_def(icl, 62, 0, 0), huc_def(icl, 9, 0, 0)) \ + fw_def(COMETLAKE, 5, guc_def(cml, 62, 0, 0), huc_def(cml, 4, 0, 0)) \ + fw_def(COMETLAKE, 0, guc_def(kbl, 62, 0, 0), huc_def(kbl, 4, 0, 0)) \ + fw_def(COFFEELAKE, 0, guc_def(kbl, 62, 0, 0), huc_def(kbl, 4, 0, 0)) \ + fw_def(GEMINILAKE, 0, guc_def(glk, 62, 0, 0), huc_def(glk, 4, 0, 0)) \ + fw_def(KABYLAKE, 0, guc_def(kbl, 62, 0, 0), huc_def(kbl, 4, 0, 0)) \ + fw_def(BROXTON, 0, guc_def(bxt, 62, 0, 0), huc_def(bxt, 2, 0, 0)) \ + fw_def(SKYLAKE, 0, guc_def(skl, 62, 0, 0), huc_def(skl, 2, 0, 0)) #define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \ "i915/" \ -- cgit v1.2.3 From ca319ee9ca6a6ef95143df8d0a57b2941c2a9566 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 18 Jun 2021 23:45:03 +0200 Subject: drm/i915/eb: Fix pagefault disabling in the first slowpath MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit ebc0808fa2da0548a78e715858024cb81cd732bc Author: Chris Wilson Date: Tue Oct 18 13:02:51 2016 +0100 drm/i915: Restrict pagefault disabling to just around copy_from_user() we entirely missed that there's a slow path call to eb_relocate_entry (or i915_gem_execbuffer_relocate_entry as it was called back then) which was left fully wrapped by pagefault_disable/enable() calls. Previously any issues with blocking calls where handled by the following code: /* we can't wait for rendering with pagefaults disabled */ if (pagefault_disabled() && !object_is_idle(obj)) return -EFAULT; Now at this point the prefaulting was still around, which means in normal applications it was very hard to hit this bug. No idea why the regressions in igts weren't caught. Now this all changed big time with 2 patches merged closely together. First commit 2889caa9232109afc8881f29a2205abeb5709d0c Author: Chris Wilson Date: Fri Jun 16 15:05:19 2017 +0100 drm/i915: Eliminate lots of iterations over the execobjects array removes the prefaulting from the first relocation path, pushing it into the first slowpath (of which this patch added a total of 3 escalation levels). This would have really quickly uncovered the above bug, were it not for immediate adding a duct-tape on top with commit 7dd4f6729f9243bd7046c6f04c107a456bda38eb Author: Chris Wilson Date: Fri Jun 16 15:05:24 2017 +0100 drm/i915: Async GPU relocation processing by pushing all all the relocation patching to the gpu if the buffer was busy, which avoided all the possible blocking calls. The entire slowpath was then furthermore ditched in commit 7dc8f1143778a35b190f9413f228b3cf28f67f8d Author: Chris Wilson Date: Wed Mar 11 16:03:10 2020 +0000 drm/i915/gem: Drop relocation slowpath and resurrected in commit fd1500fcd4420eee06e2c7f3aa6067b78ac05871 Author: Maarten Lankhorst Date: Wed Aug 19 16:08:43 2020 +0200 Revert "drm/i915/gem: Drop relocation slowpath". but this did not further impact what's going on. Since pagefault_disable/enable is an atomic section, any sleeping in there is prohibited, and we definitely do that without gpu relocations since we have to wait for the gpu usage to finish before we can patch up the relocations. Reviewed-by: Matthew Auld Reviewed-by: Maarten Lankhorst Signed-off-by: Daniel Vetter Cc: Jon Bloomfield Cc: Chris Wilson Cc: Maarten Lankhorst Cc: Joonas Lahtinen Cc: Daniel Vetter Cc: "Thomas Hellström" Cc: Matthew Auld Cc: Lionel Landwerlin Cc: Dave Airlie Cc: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210618214503.1773805-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 394eb40c95b5..3661461bc04e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2071,9 +2071,7 @@ repeat_validate: list_for_each_entry(ev, &eb->relocs, reloc_link) { if (!have_copy) { - pagefault_disable(); err = eb_relocate_vma(eb, ev); - pagefault_enable(); if (err) break; } else { -- cgit v1.2.3 From 4bc2d5747eb00320eb3bcdf4cf603504e638c22f Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 23 Jun 2021 15:34:11 +0100 Subject: drm/i915/ttm: fix static warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit warning: symbol 'i915_gem_ttm_obj_ops' was not declared. Should it be static? Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20210623143411.293630-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index c5deb8b7227c..cf5540c1537b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -730,7 +730,7 @@ static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj) return drm_vma_node_offset_addr(&obj->base.vma_node); } -const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = { +static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = { .name = "i915_gem_object_ttm", .flags = I915_GEM_OBJECT_HAS_IOMEM, -- cgit v1.2.3 From 0ff375759f64a0b81853d9d9b4c5b5b4b06f4a2c Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Thu, 24 Jun 2021 10:42:38 +0200 Subject: drm/i915: Update object placement flags to be mutable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The object ops i915_GEM_OBJECT_HAS_IOMEM and the object I915_BO_ALLOC_STRUCT_PAGE flags are considered immutable by much of our code. Introduce a new mem_flags member to hold these and make sure checks for these flags being set are either done under the object lock or with pages properly pinned. The flags will change during migration under the object lock. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210624084240.270219-2-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_internal.c | 4 +-- drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 22 +++++++++++++ drivers/gpu/drm/i915/gem/i915_gem_lmem.h | 2 ++ drivers/gpu/drm/i915/gem/i915_gem_mman.c | 12 ++++--- drivers/gpu/drm/i915/gem/i915_gem_object.c | 38 ++++++++++++++++++++++ drivers/gpu/drm/i915/gem/i915_gem_object.h | 14 ++------ drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 27 +++++++++------ drivers/gpu/drm/i915/gem/i915_gem_pages.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_phys.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 7 ++-- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 4 +-- .../gpu/drm/i915/gem/selftests/huge_gem_object.c | 4 +-- drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 5 ++- drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 25 +++++++++----- drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c | 3 +- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- 17 files changed, 123 insertions(+), 52 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index ce6b664b10aa..13b217f75055 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -177,8 +177,8 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, - I915_BO_ALLOC_STRUCT_PAGE); + i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, 0); + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; /* * Mark the object as volatile, such that the pages are marked as diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index d539dffa1554..41d5182cd367 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -71,6 +71,28 @@ bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) mr->type == INTEL_MEMORY_STOLEN_LOCAL); } +/** + * __i915_gem_object_is_lmem - Whether the object is resident in + * lmem while in the fence signaling critical path. + * @obj: The object to check. + * + * This function is intended to be called from within the fence signaling + * path where the fence keeps the object from being migrated. For example + * during gpu reset or similar. + * + * Return: Whether the object is resident in lmem. + */ +bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) +{ + struct intel_memory_region *mr = READ_ONCE(obj->mm.region); + +#ifdef CONFIG_LOCKDEP + GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, true)); +#endif + return mr && (mr->type == INTEL_MEMORY_LOCAL || + mr->type == INTEL_MEMORY_STOLEN_LOCAL); +} + struct drm_i915_gem_object * i915_gem_object_create_lmem(struct drm_i915_private *i915, resource_size_t size, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h index ea76fd11ccb0..27a611deba47 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h @@ -21,6 +21,8 @@ i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj, bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj); +bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj); + struct drm_i915_gem_object * i915_gem_object_create_lmem(struct drm_i915_private *i915, resource_size_t size, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 2fd155742bd2..6497a2dbdab9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -684,7 +684,7 @@ __assign_mmap_offset(struct drm_i915_gem_object *obj, if (mmap_type != I915_MMAP_TYPE_GTT && !i915_gem_object_has_struct_page(obj) && - !i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM)) + !i915_gem_object_has_iomem(obj)) return -ENODEV; mmo = mmap_offset_attach(obj, mmap_type, file); @@ -708,7 +708,12 @@ __assign_mmap_offset_handle(struct drm_file *file, if (!obj) return -ENOENT; + err = i915_gem_object_lock_interruptible(obj, NULL); + if (err) + goto out_put; err = __assign_mmap_offset(obj, mmap_type, offset, file); + i915_gem_object_unlock(obj); +out_put: i915_gem_object_put(obj); return err; } @@ -932,10 +937,7 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) return PTR_ERR(anon); } - vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; - - if (i915_gem_object_has_iomem(obj)) - vma->vm_flags |= VM_IO; + vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_IO; /* * We keep the ref on mmo->obj, not vm_file, but we require diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index cf18c430d51f..07e8ff9a8aae 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -475,6 +475,44 @@ bool i915_gem_object_migratable(struct drm_i915_gem_object *obj) return obj->mm.n_placements > 1; } +/** + * i915_gem_object_has_struct_page - Whether the object is page-backed + * @obj: The object to query. + * + * This function should only be called while the object is locked or pinned, + * otherwise the page backing may change under the caller. + * + * Return: True if page-backed, false otherwise. + */ +bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) +{ +#ifdef CONFIG_LOCKDEP + if (IS_DGFX(to_i915(obj->base.dev)) && + i915_gem_object_evictable((void __force *)obj)) + assert_object_held_shared(obj); +#endif + return obj->mem_flags & I915_BO_FLAG_STRUCT_PAGE; +} + +/** + * i915_gem_object_has_iomem - Whether the object is iomem-backed + * @obj: The object to query. + * + * This function should only be called while the object is locked or pinned, + * otherwise the iomem backing may change under the caller. + * + * Return: True if iomem-backed, false otherwise. + */ +bool i915_gem_object_has_iomem(const struct drm_i915_gem_object *obj) +{ +#ifdef CONFIG_LOCKDEP + if (IS_DGFX(to_i915(obj->base.dev)) && + i915_gem_object_evictable((void __force *)obj)) + assert_object_held_shared(obj); +#endif + return obj->mem_flags & I915_BO_FLAG_IOMEM; +} + void i915_gem_init__objects(struct drm_i915_private *i915) { INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 7bf4dd46d8d2..ea3224a480c4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -148,7 +148,7 @@ i915_gem_object_put(struct drm_i915_gem_object *obj) /* * If more than one potential simultaneous locker, assert held. */ -static inline void assert_object_held_shared(struct drm_i915_gem_object *obj) +static inline void assert_object_held_shared(const struct drm_i915_gem_object *obj) { /* * Note mm list lookup is protected by @@ -266,17 +266,9 @@ i915_gem_object_type_has(const struct drm_i915_gem_object *obj, return obj->ops->flags & flags; } -static inline bool -i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) -{ - return obj->flags & I915_BO_ALLOC_STRUCT_PAGE; -} +bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj); -static inline bool -i915_gem_object_has_iomem(const struct drm_i915_gem_object *obj) -{ - return i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM); -} +bool i915_gem_object_has_iomem(const struct drm_i915_gem_object *obj); static inline bool i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 3a2d9ecf8e03..441f913c87e6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -33,10 +33,9 @@ struct i915_lut_handle { struct drm_i915_gem_object_ops { unsigned int flags; -#define I915_GEM_OBJECT_HAS_IOMEM BIT(1) -#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(2) -#define I915_GEM_OBJECT_IS_PROXY BIT(3) -#define I915_GEM_OBJECT_NO_MMAP BIT(4) +#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) +#define I915_GEM_OBJECT_IS_PROXY BIT(2) +#define I915_GEM_OBJECT_NO_MMAP BIT(3) /* Interface between the GEM object and its backing storage. * get_pages() is called once prior to the use of the associated set @@ -201,17 +200,25 @@ struct drm_i915_gem_object { unsigned long flags; #define I915_BO_ALLOC_CONTIGUOUS BIT(0) #define I915_BO_ALLOC_VOLATILE BIT(1) -#define I915_BO_ALLOC_STRUCT_PAGE BIT(2) -#define I915_BO_ALLOC_CPU_CLEAR BIT(3) -#define I915_BO_ALLOC_USER BIT(4) +#define I915_BO_ALLOC_CPU_CLEAR BIT(2) +#define I915_BO_ALLOC_USER BIT(3) #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \ I915_BO_ALLOC_VOLATILE | \ - I915_BO_ALLOC_STRUCT_PAGE | \ I915_BO_ALLOC_CPU_CLEAR | \ I915_BO_ALLOC_USER) -#define I915_BO_READONLY BIT(5) -#define I915_TILING_QUIRK_BIT 6 /* unknown swizzling; do not release! */ +#define I915_BO_READONLY BIT(4) +#define I915_TILING_QUIRK_BIT 5 /* unknown swizzling; do not release! */ + /** + * @mem_flags - Mutable placement-related flags + * + * These are flags that indicate specifics of the memory region + * the object is currently in. As such they are only stable + * either under the object lock or if the object is pinned. + */ + unsigned int mem_flags; +#define I915_BO_FLAG_STRUCT_PAGE BIT(0) /* Object backed by struct pages */ +#define I915_BO_FLAG_IOMEM BIT(1) /* Object backed by IO memory */ /* * Is the object to be mapped as read-only to the GPU * Only honoured if hardware has relevant pte bit diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 086005c1c7ea..f2f850e31b8e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -351,7 +351,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, int err; if (!i915_gem_object_has_struct_page(obj) && - !i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM)) + !i915_gem_object_has_iomem(obj)) return ERR_PTR(-ENXIO); assert_object_held(obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index be72ad0634ba..7986612f48fa 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -76,7 +76,7 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); /* We're no longer struct page backed */ - obj->flags &= ~I915_BO_ALLOC_STRUCT_PAGE; + obj->mem_flags &= ~I915_BO_FLAG_STRUCT_PAGE; __i915_gem_object_set_pages(obj, st, sg->length); return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 5d16c4462fda..7aa1c95c7b7d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -444,7 +444,7 @@ shmem_pread(struct drm_i915_gem_object *obj, static void shmem_release(struct drm_i915_gem_object *obj) { - if (obj->flags & I915_BO_ALLOC_STRUCT_PAGE) + if (i915_gem_object_has_struct_page(obj)) i915_gem_object_release_memory_region(obj); fput(obj->base.filp); @@ -513,9 +513,8 @@ static int shmem_object_init(struct intel_memory_region *mem, mapping_set_gfp_mask(mapping, mask); GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); - i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, - I915_BO_ALLOC_STRUCT_PAGE); - + i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, 0); + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index cf5540c1537b..e41bb9d6a491 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -732,7 +732,6 @@ static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj) static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = { .name = "i915_gem_object_ttm", - .flags = I915_GEM_OBJECT_HAS_IOMEM, .get_pages = i915_ttm_get_pages, .put_pages = i915_ttm_put_pages, @@ -777,6 +776,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, i915_gem_object_init_memory_region(obj, mem); i915_gem_object_make_unshrinkable(obj); obj->read_domains = I915_GEM_DOMAIN_WC | I915_GEM_DOMAIN_GTT; + obj->mem_flags |= I915_BO_FLAG_IOMEM; i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN); mutex_init(&obj->ttm.get_io_page.lock); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 4b0acc7eaa27..56edfeff8c02 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -510,8 +510,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev, return -ENOMEM; drm_gem_private_object_init(dev, &obj->base, args->user_size); - i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class, - I915_BO_ALLOC_STRUCT_PAGE); + i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class, 0); + obj->mem_flags = I915_BO_FLAG_STRUCT_PAGE; obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c index 0c8ecfdf5405..f963b8e1e37b 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c @@ -114,8 +114,8 @@ huge_gem_object(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, dma_size); - i915_gem_object_init(obj, &huge_ops, &lock_class, - I915_BO_ALLOC_STRUCT_PAGE); + i915_gem_object_init(obj, &huge_ops, &lock_class, 0); + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index dadd485bc52f..ccc67ed1a84b 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -167,9 +167,8 @@ huge_pages_object(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &huge_page_ops, &lock_class, - I915_BO_ALLOC_STRUCT_PAGE); - + i915_gem_object_init(obj, &huge_page_ops, &lock_class, 0); + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; i915_gem_object_set_volatile(obj); obj->write_domain = I915_GEM_DOMAIN_CPU; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 44b5de06ce64..607b7d2d4c29 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -831,16 +831,19 @@ static int wc_check(struct drm_i915_gem_object *obj) static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type) { + bool no_map; + if (type == I915_MMAP_TYPE_GTT && !i915_ggtt_has_aperture(&to_i915(obj->base.dev)->ggtt)) return false; - if (type != I915_MMAP_TYPE_GTT && - !i915_gem_object_has_struct_page(obj) && - !i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM)) - return false; + i915_gem_object_lock(obj, NULL); + no_map = (type != I915_MMAP_TYPE_GTT && + !i915_gem_object_has_struct_page(obj) && + !i915_gem_object_has_iomem(obj)); + i915_gem_object_unlock(obj); - return true; + return !no_map; } static void object_set_placements(struct drm_i915_gem_object *obj, @@ -988,10 +991,16 @@ static const char *repr_mmap_type(enum i915_mmap_type type) } } -static bool can_access(const struct drm_i915_gem_object *obj) +static bool can_access(struct drm_i915_gem_object *obj) { - return i915_gem_object_has_struct_page(obj) || - i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM); + bool access; + + i915_gem_object_lock(obj, NULL); + access = i915_gem_object_has_struct_page(obj) || + i915_gem_object_has_iomem(obj); + i915_gem_object_unlock(obj); + + return access; } static int __igt_mmap_access(struct drm_i915_private *i915, diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c index 3a6ce87f8b52..d43d8dae0f69 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c @@ -25,13 +25,14 @@ static int mock_phys_object(void *arg) goto out; } + i915_gem_object_lock(obj, NULL); if (!i915_gem_object_has_struct_page(obj)) { + i915_gem_object_unlock(obj); err = -EINVAL; pr_err("shmem has no struct page\n"); goto out_obj; } - i915_gem_object_lock(obj, NULL); err = i915_gem_object_attach_phys(obj, PAGE_SIZE); i915_gem_object_unlock(obj); if (err) { diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index cb182c6d265a..a2c58b54a592 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1039,7 +1039,7 @@ i915_vma_coredump_create(const struct intel_gt *gt, if (ret) break; } - } else if (i915_gem_object_is_lmem(vma->obj)) { + } else if (__i915_gem_object_is_lmem(vma->obj)) { struct intel_memory_region *mem = vma->obj->mm.region; dma_addr_t dma; -- cgit v1.2.3 From 3c2b8f326e7f73dd10ae422dc65603a858f6c6b4 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Thu, 24 Jun 2021 10:42:39 +0200 Subject: drm/i915/ttm: Adjust gem flags and caching settings after a move MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After a TTM move or object init we need to update the i915 gem flags and caching settings to reflect the new placement. Currently caching settings are not changed during the lifetime of an object, although that might change moving forward if we run into performance issues or issues with WC system page allocations. Also introduce gpu_binds_iomem() and cpu_maps_iomem() to clean up the various ways we previously used to detect this. Finally, initialize the TTM object reserved to be able to update flags and caching before anyone else gets hold of the object. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210624084240.270219-3-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 144 ++++++++++++++++++++++++-------- 1 file changed, 108 insertions(+), 36 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index e41bb9d6a491..47cd3ee2e262 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -91,6 +91,26 @@ static int i915_ttm_err_to_gem(int err) return err; } +static bool gpu_binds_iomem(struct ttm_resource *mem) +{ + return mem->mem_type != TTM_PL_SYSTEM; +} + +static bool cpu_maps_iomem(struct ttm_resource *mem) +{ + /* Once / if we support GGTT, this is also false for cached ttm_tts */ + return mem->mem_type != TTM_PL_SYSTEM; +} + +static enum i915_cache_level +i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res, + struct ttm_tt *ttm) +{ + return ((HAS_LLC(i915) || HAS_SNOOP(i915)) && !gpu_binds_iomem(res) && + ttm->caching == ttm_cached) ? I915_CACHE_LLC : + I915_CACHE_NONE; +} + static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj); static enum ttm_caching @@ -248,6 +268,35 @@ static void i915_ttm_free_cached_io_st(struct drm_i915_gem_object *obj) obj->ttm.cached_io_st = NULL; } +static void +i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + + if (cpu_maps_iomem(bo->resource) || bo->ttm->caching != ttm_cached) { + obj->write_domain = I915_GEM_DOMAIN_WC; + obj->read_domains = I915_GEM_DOMAIN_WC; + } else { + obj->write_domain = I915_GEM_DOMAIN_CPU; + obj->read_domains = I915_GEM_DOMAIN_CPU; + } +} + +static void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + unsigned int cache_level; + + obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM); + + obj->mem_flags |= cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : + I915_BO_FLAG_STRUCT_PAGE; + + cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource, + bo->ttm); + i915_gem_object_set_cache_coherency(obj, cache_level); +} + static void i915_ttm_purge(struct drm_i915_gem_object *obj) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); @@ -263,8 +312,10 @@ static void i915_ttm_purge(struct drm_i915_gem_object *obj) /* TTM's purge interface. Note that we might be reentering. */ ret = ttm_bo_validate(bo, &place, &ctx); - if (!ret) { + obj->write_domain = 0; + obj->read_domains = 0; + i915_ttm_adjust_gem_after_move(obj); i915_ttm_free_cached_io_st(obj); obj->mm.madv = __I915_MADV_PURGED; } @@ -347,12 +398,15 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, struct ttm_resource *res) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); - struct ttm_resource_manager *man = - ttm_manager_type(bo->bdev, res->mem_type); - if (man->use_tt) + if (!gpu_binds_iomem(res)) return i915_ttm_tt_get_st(bo->ttm); + /* + * If CPU mapping differs, we need to add the ttm_tt pages to + * the resulting st. Might make sense for GGTT. + */ + GEM_WARN_ON(!cpu_maps_iomem(res)); return intel_region_ttm_resource_to_st(obj->mm.region, res); } @@ -367,23 +421,25 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); struct sg_table *src_st; struct i915_request *rq; + struct ttm_tt *ttm = bo->ttm; + enum i915_cache_level src_level, dst_level; int ret; if (!i915->gt.migrate.context) return -EINVAL; - if (!bo->ttm || !ttm_tt_is_populated(bo->ttm)) { + dst_level = i915_ttm_cache_level(i915, dst_mem, ttm); + if (!ttm || !ttm_tt_is_populated(ttm)) { if (bo->type == ttm_bo_type_kernel) return -EINVAL; - if (bo->ttm && - !(bo->ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)) + if (ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)) return 0; intel_engine_pm_get(i915->gt.migrate.context->engine); ret = intel_context_migrate_clear(i915->gt.migrate.context, NULL, - dst_st->sgl, I915_CACHE_NONE, - dst_mem->mem_type >= I915_PL_LMEM0, + dst_st->sgl, dst_level, + gpu_binds_iomem(dst_mem), 0, &rq); if (!ret && rq) { @@ -392,15 +448,16 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, } intel_engine_pm_put(i915->gt.migrate.context->engine); } else { - src_st = src_man->use_tt ? i915_ttm_tt_get_st(bo->ttm) : - obj->ttm.cached_io_st; + src_st = src_man->use_tt ? i915_ttm_tt_get_st(ttm) : + obj->ttm.cached_io_st; + src_level = i915_ttm_cache_level(i915, bo->resource, ttm); intel_engine_pm_get(i915->gt.migrate.context->engine); ret = intel_context_migrate_copy(i915->gt.migrate.context, - NULL, src_st->sgl, I915_CACHE_NONE, - bo->resource->mem_type >= I915_PL_LMEM0, - dst_st->sgl, I915_CACHE_NONE, - dst_mem->mem_type >= I915_PL_LMEM0, + NULL, src_st->sgl, src_level, + gpu_binds_iomem(bo->resource), + dst_st->sgl, dst_level, + gpu_binds_iomem(dst_mem), &rq); if (!ret && rq) { i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); @@ -420,8 +477,6 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); struct ttm_resource_manager *dst_man = ttm_manager_type(bo->bdev, dst_mem->mem_type); - struct ttm_resource_manager *src_man = - ttm_manager_type(bo->bdev, bo->resource->mem_type); struct intel_memory_region *dst_reg, *src_reg; union { struct ttm_kmap_iter_tt tt; @@ -465,12 +520,12 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, ret = i915_ttm_accel_move(bo, dst_mem, dst_st); if (ret) { /* If we start mapping GGTT, we can no longer use man::use_tt here. */ - dst_iter = dst_man->use_tt ? + dst_iter = !cpu_maps_iomem(dst_mem) ? ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm) : ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap, dst_st, dst_reg->region.start); - src_iter = src_man->use_tt ? + src_iter = !cpu_maps_iomem(bo->resource) ? ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) : ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap, obj->ttm.cached_io_st, @@ -478,21 +533,24 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); } + /* Below dst_mem becomes bo->resource. */ ttm_bo_move_sync_cleanup(bo, dst_mem); + i915_ttm_adjust_domains_after_move(obj); i915_ttm_free_cached_io_st(obj); - if (!dst_man->use_tt) { + if (gpu_binds_iomem(dst_mem) || cpu_maps_iomem(dst_mem)) { obj->ttm.cached_io_st = dst_st; obj->ttm.get_io_page.sg_pos = dst_st->sgl; obj->ttm.get_io_page.sg_idx = 0; } + i915_ttm_adjust_gem_after_move(obj); return 0; } static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem) { - if (mem->mem_type < I915_PL_LMEM0) + if (!cpu_maps_iomem(mem)) return 0; mem->bus.caching = ttm_write_combined; @@ -590,6 +648,16 @@ static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) return i915_ttm_err_to_gem(ret); } + i915_ttm_adjust_lru(obj); + if (bo->ttm && !ttm_tt_is_populated(bo->ttm)) { + ret = ttm_tt_populate(bo->bdev, bo->ttm, &ctx); + if (ret) + return ret; + + i915_ttm_adjust_domains_after_move(obj); + i915_ttm_adjust_gem_after_move(obj); + } + /* Object either has a page vector or is an iomem object */ st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj->ttm.cached_io_st; if (IS_ERR(st)) @@ -597,8 +665,6 @@ static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) __i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl)); - i915_ttm_adjust_lru(obj); - return ret; } @@ -768,6 +834,10 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, { static struct lock_class_key lock_class; struct drm_i915_private *i915 = mem->i915; + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; enum ttm_bo_type bo_type; int ret; @@ -775,14 +845,13 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, i915_gem_object_init(obj, &i915_gem_ttm_obj_ops, &lock_class, flags); i915_gem_object_init_memory_region(obj, mem); i915_gem_object_make_unshrinkable(obj); - obj->read_domains = I915_GEM_DOMAIN_WC | I915_GEM_DOMAIN_GTT; - obj->mem_flags |= I915_BO_FLAG_IOMEM; - i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN); mutex_init(&obj->ttm.get_io_page.lock); bo_type = (obj->flags & I915_BO_ALLOC_USER) ? ttm_bo_type_device : ttm_bo_type_kernel; + obj->base.vma_node.driver_private = i915_gem_to_ttm(obj); + /* * If this function fails, it will call the destructor, but * our caller still owns the object. So no freeing in the @@ -790,14 +859,17 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, * Similarly, in delayed_destroy, we can't call ttm_bo_put() * until successful initialization. */ - obj->base.vma_node.driver_private = i915_gem_to_ttm(obj); - ret = ttm_bo_init(&i915->bdev, i915_gem_to_ttm(obj), size, - bo_type, &i915_sys_placement, - mem->min_page_size >> PAGE_SHIFT, - true, NULL, NULL, i915_ttm_bo_destroy); - if (!ret) - obj->ttm.created = true; - - /* i915 wants -ENXIO when out of memory region space. */ - return i915_ttm_err_to_gem(ret); + ret = ttm_bo_init_reserved(&i915->bdev, i915_gem_to_ttm(obj), size, + bo_type, &i915_sys_placement, + mem->min_page_size >> PAGE_SHIFT, + &ctx, NULL, NULL, i915_ttm_bo_destroy); + if (ret) + return i915_ttm_err_to_gem(ret); + + obj->ttm.created = true; + i915_ttm_adjust_domains_after_move(obj); + i915_ttm_adjust_gem_after_move(obj); + i915_gem_object_unlock(obj); + + return 0; } -- cgit v1.2.3 From 32b7cf51a441270c62ebaa146c9431e6f155d901 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Thu, 24 Jun 2021 10:42:40 +0200 Subject: drm/i915/ttm: Use TTM for system memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For discrete, use TTM for both cached and WC system memory. That means we currently rely on the TTM memory accounting / shrinker. For cached system memory we should consider remaining shmem-backed, which can be implemented from our ttm_tt_populate callback. We can then also reuse our own very elaborate shrinker for that memory. If an object is evicted to a gem allowable region, we will now consider the object migrated, and we flip the gem region and move the object to a different region list. Since we are now changing gem regions, we can't any longer rely on the CONTIGUOUS flag being set based on the region min page size, so remove that flag update. If we want to reintroduce it, we need to put it in the mutable flags. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210624084240.270219-4-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_region.c | 4 --- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 2 ++ drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 52 ++++++++++++++++++++++++------ drivers/gpu/drm/i915/i915_drv.h | 3 -- drivers/gpu/drm/i915/intel_memory_region.c | 7 +++- drivers/gpu/drm/i915/intel_memory_region.h | 8 +++++ 6 files changed, 58 insertions(+), 18 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index d1f1840540dd..4925563018b4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -13,11 +13,7 @@ void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, { obj->mm.region = intel_memory_region_get(mem); - if (obj->base.size <= mem->min_page_size) - obj->flags |= I915_BO_ALLOC_CONTIGUOUS; - mutex_lock(&mem->objects.lock); - list_add(&obj->mm.region_link, &mem->objects.list); mutex_unlock(&mem->objects.lock); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 7aa1c95c7b7d..e9ac913b923a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -302,6 +302,7 @@ void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_ struct pagevec pvec; struct page *page; + GEM_WARN_ON(IS_DGFX(to_i915(obj->base.dev))); __i915_gem_object_release_shmem(obj, pages, true); i915_gem_gtt_finish_pages(obj, pages); @@ -560,6 +561,7 @@ i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv, resource_size_t offset; int err; + GEM_WARN_ON(IS_DGFX(dev_priv)); obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE)); if (IS_ERR(obj)) return obj; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 47cd3ee2e262..c39d982c4fa6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -286,6 +286,26 @@ static void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); unsigned int cache_level; + unsigned int i; + + /* + * If object was moved to an allowable region, update the object + * region to consider it migrated. Note that if it's currently not + * in an allowable region, it's evicted and we don't update the + * object region. + */ + if (intel_region_to_ttm_type(obj->mm.region) != bo->resource->mem_type) { + for (i = 0; i < obj->mm.n_placements; ++i) { + struct intel_memory_region *mr = obj->mm.placements[i]; + + if (intel_region_to_ttm_type(mr) == bo->resource->mem_type && + mr != obj->mm.region) { + i915_gem_object_release_memory_region(obj); + i915_gem_object_init_memory_region(obj, mr); + break; + } + } + } obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM); @@ -615,13 +635,6 @@ static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) /* Move to the requested placement. */ i915_ttm_placement_from_obj(obj, &requested, busy, &placement); - /* - * For now we support LMEM only with TTM. - * TODO: Remove with system support - */ - GEM_BUG_ON(requested.mem_type < I915_PL_LMEM0 || - busy[0].mem_type < I915_PL_LMEM0); - /* First try only the requested placement. No eviction. */ real_num_busy = fetch_and_zero(&placement.num_busy_placement); ret = ttm_bo_validate(bo, &placement, &ctx); @@ -635,9 +648,6 @@ static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) ret == -EAGAIN) return ret; - /* TODO: Remove this when we support system as TTM. */ - real_num_busy = 1; - /* * If the initial attempt fails, allow all accepted placements, * evicting if necessary. @@ -873,3 +883,25 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, return 0; } + +static const struct intel_memory_region_ops ttm_system_region_ops = { + .init_object = __i915_gem_ttm_object_init, +}; + +struct intel_memory_region * +i915_gem_ttm_system_setup(struct drm_i915_private *i915, + u16 type, u16 instance) +{ + struct intel_memory_region *mr; + + mr = intel_memory_region_create(i915, 0, + totalram_pages() << PAGE_SHIFT, + PAGE_SIZE, 0, + type, instance, + &ttm_system_region_ops); + if (IS_ERR(mr)) + return mr; + + intel_memory_region_set_name(mr, "system-ttm"); + return mr; +} diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 01e11fe38642..bfbfbae57573 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1751,9 +1751,6 @@ void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv); void i915_gem_init_early(struct drm_i915_private *dev_priv); void i915_gem_cleanup_early(struct drm_i915_private *dev_priv); -struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915, - u16 type, u16 instance); - static inline void i915_gem_drain_freed_objects(struct drm_i915_private *i915) { /* diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index df59f884d37c..779eb2fa90b6 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -173,7 +173,12 @@ int intel_memory_regions_hw_probe(struct drm_i915_private *i915) instance = intel_region_map[i].instance; switch (type) { case INTEL_MEMORY_SYSTEM: - mem = i915_gem_shmem_setup(i915, type, instance); + if (IS_DGFX(i915)) + mem = i915_gem_ttm_system_setup(i915, type, + instance); + else + mem = i915_gem_shmem_setup(i915, type, + instance); break; case INTEL_MEMORY_STOLEN_LOCAL: mem = i915_gem_stolen_lmem_setup(i915, type, instance); diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 2be8433d373a..b1b9e461d53b 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -125,4 +125,12 @@ intel_memory_region_set_name(struct intel_memory_region *mem, int intel_memory_region_reserve(struct intel_memory_region *mem, resource_size_t offset, resource_size_t size); + +struct intel_memory_region * +i915_gem_ttm_system_setup(struct drm_i915_private *i915, + u16 type, u16 instance); +struct intel_memory_region * +i915_gem_shmem_setup(struct drm_i915_private *i915, + u16 type, u16 instance); + #endif -- cgit v1.2.3 From d3f3baa3562a5d09f3e87f5fdf84952112807753 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Thu, 24 Jun 2021 13:29:14 +0200 Subject: drm/i915: Reinstate the mmap ioctl for some platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reinstate the mmap ioctl for all current integrated platforms. The intention was really to have it disabled for discrete graphics where we enforce a single mmap mode. This was reported to break ADL-P with the media stack, which was not the intention. Although longer term we do still plan to sunset this ioctl even for integrated, in favour of using mmap_offset instead. Fixes: 35cbd91eb541 ("drm/i915: Disable mmap ioctl for gen12+") Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Acked-by: Daniel Vetter Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210624112914.311984-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 6497a2dbdab9..a90f796e85c0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -62,10 +62,11 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_object *obj; unsigned long addr; - /* mmap ioctl is disallowed for all platforms after TGL-LP. This also - * covers all platforms with local memory. + /* + * mmap ioctl is disallowed for all discrete platforms, + * and for all platforms with GRAPHICS_VER > 12. */ - if (GRAPHICS_VER(i915) >= 12 && !IS_TIGERLAKE(i915)) + if (IS_DGFX(i915) || GRAPHICS_VER(i915) > 12) return -EOPNOTSUPP; if (args->flags & ~(I915_MMAP_WC)) -- cgit v1.2.3 From 53fe9cf2dafe2b0382a4e682e4eebe0a442dcb5a Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Thu, 24 Jun 2021 16:52:50 +0530 Subject: drm/i915/selftest: Extend ctx_timestamp ICL workaround to GEN11 EHL and JSL are also observing requirement for 80ns interval for CTX_TIMESTAMP thus extending it to GEN11. Changes since V1: - IS_GEN replaced by GRAPHICS_VER - Tvrtko Acked-by: Tvrtko Ursulin Signed-off-by: Tejas Upadhyay Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20210624112250.895410-1-tejaskumarx.surendrakumar.upadhyay@intel.com --- drivers/gpu/drm/i915/gt/selftest_engine_pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c index 72cca3f0da21..75569666105d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -173,8 +173,8 @@ static int __live_engine_timestamps(struct intel_engine_cs *engine) d_ctx = trifilter(s_ctx); d_ctx *= engine->gt->clock_frequency; - if (IS_ICELAKE(engine->i915)) - d_ring *= 12500000; /* Fixed 80ns for icl ctx timestamp? */ + if (GRAPHICS_VER(engine->i915) == 11) + d_ring *= 12500000; /* Fixed 80ns for GEN11 ctx timestamp? */ else d_ring *= engine->gt->clock_frequency; -- cgit v1.2.3 From b6e913e19c54eddd6a4d637969f5c079effb74c6 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Tue, 29 Jun 2021 17:12:01 +0200 Subject: drm/i915/gem: Implement object migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce an interface to migrate objects between regions. This is primarily intended to migrate objects to LMEM for display and to SYSTEM for dma-buf, but might be reused in one form or another for performance-based migration. v2: - Verify that the memory region given as an id really exists. (Reported by Matthew Auld) - Call i915_gem_object_{init,release}_memory_region() when switching region to handle also switching region lists. (Reported by Matthew Auld) v3: - Fix i915_gem_object_can_migrate() to return true if object is already in the correct region, even if the object ops doesn't have a migrate() callback. - Update typo in commit message. - Fix kerneldoc of i915_gem_object_wait_migration(). v4: - Improve documentation (Suggested by Mattew Auld and Michael Ruhl) - Always assume TTM migration hits a TTM move and unsets the pages through move_notify. (Reported by Matthew Auld) - Add a dma_fence_might_wait() annotation to i915_gem_object_wait_migration() (Suggested by Daniel Vetter) v5: - Re-add might_sleep() instead of __dma_fence_might_wait(), Sent v4 with the wrong version, didn't compile and __dma_fence_might_wait() is not exported. - Added an R-B. Reported-by: kernel test robot Signed-off-by: Thomas Hellström Reviewed-by: Michael J. Ruhl Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210629151203.209465-2-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 112 +++++++++++++++++++++++ drivers/gpu/drm/i915/gem/i915_gem_object.h | 12 +++ drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 9 ++ drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 77 +++++++++++++--- drivers/gpu/drm/i915/gem/i915_gem_wait.c | 19 ++++ 5 files changed, 217 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 07e8ff9a8aae..225b77fb4314 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -513,6 +513,118 @@ bool i915_gem_object_has_iomem(const struct drm_i915_gem_object *obj) return obj->mem_flags & I915_BO_FLAG_IOMEM; } +/** + * i915_gem_object_can_migrate - Whether an object likely can be migrated + * + * @obj: The object to migrate + * @id: The region intended to migrate to + * + * Check whether the object backend supports migration to the + * given region. Note that pinning may affect the ability to migrate as + * returned by this function. + * + * This function is primarily intended as a helper for checking the + * possibility to migrate objects and might be slightly less permissive + * than i915_gem_object_migrate() when it comes to objects with the + * I915_BO_ALLOC_USER flag set. + * + * Return: true if migration is possible, false otherwise. + */ +bool i915_gem_object_can_migrate(struct drm_i915_gem_object *obj, + enum intel_region_id id) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + unsigned int num_allowed = obj->mm.n_placements; + struct intel_memory_region *mr; + unsigned int i; + + GEM_BUG_ON(id >= INTEL_REGION_UNKNOWN); + GEM_BUG_ON(obj->mm.madv != I915_MADV_WILLNEED); + + mr = i915->mm.regions[id]; + if (!mr) + return false; + + if (obj->mm.region == mr) + return true; + + if (!i915_gem_object_evictable(obj)) + return false; + + if (!obj->ops->migrate) + return false; + + if (!(obj->flags & I915_BO_ALLOC_USER)) + return true; + + if (num_allowed == 0) + return false; + + for (i = 0; i < num_allowed; ++i) { + if (mr == obj->mm.placements[i]) + return true; + } + + return false; +} + +/** + * i915_gem_object_migrate - Migrate an object to the desired region id + * @obj: The object to migrate. + * @ww: An optional struct i915_gem_ww_ctx. If NULL, the backend may + * not be successful in evicting other objects to make room for this object. + * @id: The region id to migrate to. + * + * Attempt to migrate the object to the desired memory region. The + * object backend must support migration and the object may not be + * pinned, (explicitly pinned pages or pinned vmas). The object must + * be locked. + * On successful completion, the object will have pages pointing to + * memory in the new region, but an async migration task may not have + * completed yet, and to accomplish that, i915_gem_object_wait_migration() + * must be called. + * + * This function is a bit more permissive than i915_gem_object_can_migrate() + * to allow for migrating objects where the caller knows exactly what is + * happening. For example within selftests. More specifically this + * function allows migrating I915_BO_ALLOC_USER objects to regions + * that are not in the list of allowable regions. + * + * Note: the @ww parameter is not used yet, but included to make sure + * callers put some effort into obtaining a valid ww ctx if one is + * available. + * + * Return: 0 on success. Negative error code on failure. In particular may + * return -ENXIO on lack of region space, -EDEADLK for deadlock avoidance + * if @ww is set, -EINTR or -ERESTARTSYS if signal pending, and + * -EBUSY if the object is pinned. + */ +int i915_gem_object_migrate(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww, + enum intel_region_id id) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct intel_memory_region *mr; + + GEM_BUG_ON(id >= INTEL_REGION_UNKNOWN); + GEM_BUG_ON(obj->mm.madv != I915_MADV_WILLNEED); + assert_object_held(obj); + + mr = i915->mm.regions[id]; + GEM_BUG_ON(!mr); + + if (obj->mm.region == mr) + return 0; + + if (!i915_gem_object_evictable(obj)) + return -EBUSY; + + if (!obj->ops->migrate) + return -EOPNOTSUPP; + + return obj->ops->migrate(obj, mr); +} + void i915_gem_init__objects(struct drm_i915_private *i915) { INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index ea3224a480c4..8cbd7a5334e2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -17,6 +17,8 @@ #include "i915_gem_ww.h" #include "i915_vma_types.h" +enum intel_region_id; + /* * XXX: There is a prevalence of the assumption that we fit the * object's page count inside a 32bit _signed_ variable. Let's document @@ -597,6 +599,16 @@ bool i915_gem_object_migratable(struct drm_i915_gem_object *obj); bool i915_gem_object_validates_to_lmem(struct drm_i915_gem_object *obj); +int i915_gem_object_migrate(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww, + enum intel_region_id id); + +bool i915_gem_object_can_migrate(struct drm_i915_gem_object *obj, + enum intel_region_id id); + +int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj, + unsigned int flags); + #ifdef CONFIG_MMU_NOTIFIER static inline bool i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 441f913c87e6..ef3de2ae9723 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -18,6 +18,7 @@ struct drm_i915_gem_object; struct intel_fronbuffer; +struct intel_memory_region; /* * struct i915_lut_handle tracks the fast lookups from handle to vma used @@ -77,6 +78,14 @@ struct drm_i915_gem_object_ops { * delayed_free - Override the default delayed free implementation */ void (*delayed_free)(struct drm_i915_gem_object *obj); + + /** + * migrate - Migrate object to a different region either for + * pinning or for as long as the object lock is held. + */ + int (*migrate)(struct drm_i915_gem_object *obj, + struct intel_memory_region *mr); + void (*release)(struct drm_i915_gem_object *obj); const struct vm_operations_struct *mmap_ops; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index c39d982c4fa6..521ab740001a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -617,7 +617,8 @@ struct ttm_device_funcs *i915_ttm_driver(void) return &i915_ttm_bo_driver; } -static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) +static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, + struct ttm_placement *placement) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); struct ttm_operation_ctx ctx = { @@ -625,19 +626,12 @@ static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) .no_wait_gpu = false, }; struct sg_table *st; - struct ttm_place requested, busy[I915_TTM_MAX_PLACEMENTS]; - struct ttm_placement placement; int real_num_busy; int ret; - GEM_BUG_ON(obj->mm.n_placements > I915_TTM_MAX_PLACEMENTS); - - /* Move to the requested placement. */ - i915_ttm_placement_from_obj(obj, &requested, busy, &placement); - /* First try only the requested placement. No eviction. */ - real_num_busy = fetch_and_zero(&placement.num_busy_placement); - ret = ttm_bo_validate(bo, &placement, &ctx); + real_num_busy = fetch_and_zero(&placement->num_busy_placement); + ret = ttm_bo_validate(bo, placement, &ctx); if (ret) { ret = i915_ttm_err_to_gem(ret); /* @@ -652,8 +646,8 @@ static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) * If the initial attempt fails, allow all accepted placements, * evicting if necessary. */ - placement.num_busy_placement = real_num_busy; - ret = ttm_bo_validate(bo, &placement, &ctx); + placement->num_busy_placement = real_num_busy; + ret = ttm_bo_validate(bo, placement, &ctx); if (ret) return i915_ttm_err_to_gem(ret); } @@ -668,6 +662,7 @@ static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) i915_ttm_adjust_gem_after_move(obj); } + GEM_WARN_ON(obj->mm.pages); /* Object either has a page vector or is an iomem object */ st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj->ttm.cached_io_st; if (IS_ERR(st)) @@ -678,6 +673,63 @@ static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) return ret; } +static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) +{ + struct ttm_place requested, busy[I915_TTM_MAX_PLACEMENTS]; + struct ttm_placement placement; + + GEM_BUG_ON(obj->mm.n_placements > I915_TTM_MAX_PLACEMENTS); + + /* Move to the requested placement. */ + i915_ttm_placement_from_obj(obj, &requested, busy, &placement); + + return __i915_ttm_get_pages(obj, &placement); +} + +/** + * DOC: Migration vs eviction + * + * GEM migration may not be the same as TTM migration / eviction. If + * the TTM core decides to evict an object it may be evicted to a + * TTM memory type that is not in the object's allowable GEM regions, or + * in fact theoretically to a TTM memory type that doesn't correspond to + * a GEM memory region. In that case the object's GEM region is not + * updated, and the data is migrated back to the GEM region at + * get_pages time. TTM may however set up CPU ptes to the object even + * when it is evicted. + * Gem forced migration using the i915_ttm_migrate() op, is allowed even + * to regions that are not in the object's list of allowable placements. + */ +static int i915_ttm_migrate(struct drm_i915_gem_object *obj, + struct intel_memory_region *mr) +{ + struct ttm_place requested; + struct ttm_placement placement; + int ret; + + i915_ttm_place_from_region(mr, &requested, obj->flags); + placement.num_placement = 1; + placement.num_busy_placement = 1; + placement.placement = &requested; + placement.busy_placement = &requested; + + ret = __i915_ttm_get_pages(obj, &placement); + if (ret) + return ret; + + /* + * Reinitialize the region bindings. This is primarily + * required for objects where the new region is not in + * its allowable placements. + */ + if (obj->mm.region != mr) { + i915_gem_object_release_memory_region(obj); + i915_gem_object_init_memory_region(obj, mr); + } + + return 0; +} + static void i915_ttm_put_pages(struct drm_i915_gem_object *obj, struct sg_table *st) { @@ -814,6 +866,7 @@ static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = { .truncate = i915_ttm_purge, .adjust_lru = i915_ttm_adjust_lru, .delayed_free = i915_ttm_delayed_free, + .migrate = i915_ttm_migrate, .mmap_offset = i915_ttm_mmap_offset, .mmap_ops = &vm_ops_ttm, }; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index 1070d3afdce7..f909aaa09d9c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -290,3 +290,22 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) i915_gem_object_put(obj); return ret; } + +/** + * i915_gem_object_wait_migration - Sync an accelerated migration operation + * @obj: The migrating object. + * @flags: waiting flags. Currently supports only I915_WAIT_INTERRUPTIBLE. + * + * Wait for any pending async migration operation on the object, + * whether it's explicitly (i915_gem_object_migrate()) or implicitly + * (swapin, initial clearing) initiated. + * + * Return: 0 if successful, -ERESTARTSYS if a signal was hit during waiting. + */ +int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj, + unsigned int flags) +{ + might_sleep(); + /* NOP for now. */ + return 0; +} -- cgit v1.2.3 From bf74a18ca8569ff1ac89501026a8218753f757f7 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 29 Jun 2021 17:12:02 +0200 Subject: drm/i915/gem: Introduce a selftest for the gem object migrate functionality MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A selftest for the gem object migrate functionality. Slightly adapted from the original by Matthew to the new interface and new fill blit code. v4: - Initialize buffers and check contents after migration (Suggested by Matthew Auld) - Perform async migration (if implemented) in the igt_lmem_pages_migrate test - Test also migration to the current region. Co-developed-by: Thomas Hellström Signed-off-by: Thomas Hellström Signed-off-by: Matthew Auld Reviewed-by: Michael J. Ruhl #v3 Link: https://patchwork.freedesktop.org/patch/msgid/20210629151203.209465-3-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 1 + .../gpu/drm/i915/gem/selftests/i915_gem_migrate.c | 258 +++++++++++++++++++++ .../gpu/drm/i915/selftests/i915_live_selftests.h | 1 + 3 files changed, 260 insertions(+) create mode 100644 drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 225b77fb4314..547cc9dad90d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -665,6 +665,7 @@ static const struct drm_gem_object_funcs i915_gem_object_funcs = { #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/huge_gem_object.c" #include "selftests/huge_pages.c" +#include "selftests/i915_gem_migrate.c" #include "selftests/i915_gem_object.c" #include "selftests/i915_gem_coherency.c" #endif diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c new file mode 100644 index 000000000000..ced6e3a814a2 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020-2021 Intel Corporation + */ + +#include "gt/intel_migrate.h" + +static int igt_fill_check_buffer(struct drm_i915_gem_object *obj, + bool fill) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + unsigned int i, count = obj->base.size / sizeof(u32); + enum i915_map_type map_type = + i915_coherent_map_type(i915, obj, false); + u32 *cur; + int err = 0; + + assert_object_held(obj); + cur = i915_gem_object_pin_map(obj, map_type); + if (IS_ERR(cur)) + return PTR_ERR(cur); + + if (fill) + for (i = 0; i < count; ++i) + *cur++ = i; + else + for (i = 0; i < count; ++i) + if (*cur++ != i) { + pr_err("Object content mismatch at location %d of %d\n", i, count); + err = -EINVAL; + break; + } + + i915_gem_object_unpin_map(obj); + + return err; +} + +static int igt_create_migrate(struct intel_gt *gt, enum intel_region_id src, + enum intel_region_id dst) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_memory_region *src_mr = i915->mm.regions[src]; + struct drm_i915_gem_object *obj; + struct i915_gem_ww_ctx ww; + int err = 0; + + GEM_BUG_ON(!src_mr); + + /* Switch object backing-store on create */ + obj = i915_gem_object_create_region(src_mr, PAGE_SIZE, 0); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + for_i915_gem_ww(&ww, err, true) { + err = i915_gem_object_lock(obj, &ww); + if (err) + continue; + + err = igt_fill_check_buffer(obj, true); + if (err) + continue; + + if (!i915_gem_object_can_migrate(obj, dst)) { + err = -EINVAL; + continue; + } + + err = i915_gem_object_migrate(obj, &ww, dst); + if (err) + continue; + + err = i915_gem_object_pin_pages(obj); + if (err) + continue; + + if (i915_gem_object_can_migrate(obj, src)) + err = -EINVAL; + + i915_gem_object_unpin_pages(obj); + err = i915_gem_object_wait_migration(obj, true); + if (err) + continue; + + err = igt_fill_check_buffer(obj, false); + } + i915_gem_object_put(obj); + + return err; +} + +static int igt_smem_create_migrate(void *arg) +{ + return igt_create_migrate(arg, INTEL_REGION_LMEM, INTEL_REGION_SMEM); +} + +static int igt_lmem_create_migrate(void *arg) +{ + return igt_create_migrate(arg, INTEL_REGION_SMEM, INTEL_REGION_LMEM); +} + +static int igt_same_create_migrate(void *arg) +{ + return igt_create_migrate(arg, INTEL_REGION_LMEM, INTEL_REGION_LMEM); +} + +static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object *obj) +{ + int err; + + err = i915_gem_object_lock(obj, ww); + if (err) + return err; + + if (i915_gem_object_is_lmem(obj)) { + if (!i915_gem_object_can_migrate(obj, INTEL_REGION_SMEM)) { + pr_err("object can't migrate to smem.\n"); + return -EINVAL; + } + + err = i915_gem_object_migrate(obj, ww, INTEL_REGION_SMEM); + if (err) { + pr_err("Object failed migration to smem\n"); + if (err) + return err; + } + + if (i915_gem_object_is_lmem(obj)) { + pr_err("object still backed by lmem\n"); + err = -EINVAL; + } + + if (!i915_gem_object_has_struct_page(obj)) { + pr_err("object not backed by struct page\n"); + err = -EINVAL; + } + + } else { + if (!i915_gem_object_can_migrate(obj, INTEL_REGION_LMEM)) { + pr_err("object can't migrate to lmem.\n"); + return -EINVAL; + } + + err = i915_gem_object_migrate(obj, ww, INTEL_REGION_LMEM); + if (err) { + pr_err("Object failed migration to lmem\n"); + if (err) + return err; + } + + if (i915_gem_object_has_struct_page(obj)) { + pr_err("object still backed by struct page\n"); + err = -EINVAL; + } + + if (!i915_gem_object_is_lmem(obj)) { + pr_err("object not backed by lmem\n"); + err = -EINVAL; + } + } + + return err; +} + +static int igt_lmem_pages_migrate(void *arg) +{ + struct intel_gt *gt = arg; + struct drm_i915_private *i915 = gt->i915; + struct drm_i915_gem_object *obj; + struct i915_gem_ww_ctx ww; + struct i915_request *rq; + int err; + int i; + + /* From LMEM to shmem and back again */ + + obj = i915_gem_object_create_lmem(i915, SZ_2M, 0); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + /* Initial GPU fill, sync, CPU initialization. */ + for_i915_gem_ww(&ww, err, true) { + err = i915_gem_object_lock(obj, &ww); + if (err) + continue; + + err = ____i915_gem_object_get_pages(obj); + if (err) + continue; + + err = intel_migrate_clear(>->migrate, &ww, NULL, + obj->mm.pages->sgl, obj->cache_level, + i915_gem_object_is_lmem(obj), + 0xdeadbeaf, &rq); + if (rq) { + dma_resv_add_excl_fence(obj->base.resv, &rq->fence); + i915_request_put(rq); + } + if (err) + continue; + + err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, + 5 * HZ); + if (err) + continue; + + err = igt_fill_check_buffer(obj, true); + if (err) + continue; + } + if (err) + goto out_put; + + /* + * Migrate to and from smem without explicitly syncing. + * Finalize with data in smem for fast readout. + */ + for (i = 1; i <= 5; ++i) { + for_i915_gem_ww(&ww, err, true) + err = lmem_pages_migrate_one(&ww, obj); + if (err) + goto out_put; + } + + err = i915_gem_object_lock_interruptible(obj, NULL); + if (err) + goto out_put; + + /* Finally sync migration and check content. */ + err = i915_gem_object_wait_migration(obj, true); + if (err) + goto out_unlock; + + err = igt_fill_check_buffer(obj, false); + +out_unlock: + i915_gem_object_unlock(obj); +out_put: + i915_gem_object_put(obj); + + return err; +} + +int i915_gem_migrate_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_smem_create_migrate), + SUBTEST(igt_lmem_create_migrate), + SUBTEST(igt_same_create_migrate), + SUBTEST(igt_lmem_pages_migrate), + }; + + if (!HAS_LMEM(i915)) + return 0; + + return intel_gt_live_subtests(tests, &i915->gt); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index a68197cf1044..e2fd1b61af71 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -40,6 +40,7 @@ selftest(hugepages, i915_gem_huge_page_live_selftests) selftest(gem_contexts, i915_gem_context_live_selftests) selftest(gem_execbuf, i915_gem_execbuffer_live_selftests) selftest(client, i915_gem_client_blt_live_selftests) +selftest(gem_migrate, i915_gem_migrate_live_selftests) selftest(reset, intel_reset_live_selftests) selftest(memory_region, intel_memory_region_live_selftests) selftest(hangcheck, intel_hangcheck_live_selftests) -- cgit v1.2.3 From e11b7b6e574d57b99952213b5388db66445b18f2 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Tue, 29 Jun 2021 17:12:03 +0200 Subject: drm/i915/display: Migrate objects to LMEM if possible for display MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Objects intended to be used as display framebuffers must reside in LMEM for discrete. If they happen to not do that, migrate them to LMEM before pinning. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210629151203.209465-4-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 5 ++++- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 21 --------------------- drivers/gpu/drm/i915/gem/i915_gem_object.h | 2 -- 4 files changed, 5 insertions(+), 25 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 6be1b31af07b..5ef2b38bec83 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -1331,6 +1331,9 @@ retry: ret = i915_gem_object_lock(obj, &ww); if (!ret && phys_cursor) ret = i915_gem_object_attach_phys(obj, alignment); + else if (!ret && HAS_LMEM(dev_priv)) + ret = i915_gem_object_migrate(obj, &ww, INTEL_REGION_LMEM); + /* TODO: Do we need to sync when migration becomes async? */ if (!ret) ret = i915_gem_object_pin_pages(obj); if (ret) @@ -11771,7 +11774,7 @@ intel_user_framebuffer_create(struct drm_device *dev, /* object is backed with LMEM for discrete */ i915 = to_i915(obj->base.dev); - if (HAS_LMEM(i915) && !i915_gem_object_validates_to_lmem(obj)) { + if (HAS_LMEM(i915) && !i915_gem_object_can_migrate(obj, INTEL_REGION_LMEM)) { /* object is "remote", not in local memory */ i915_gem_object_put(obj); return ERR_PTR(-EREMOTE); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 073822100da7..7d1400b13429 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -375,7 +375,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, struct i915_vma *vma; int ret; - /* Frame buffer must be in LMEM (no migration yet) */ + /* Frame buffer must be in LMEM */ if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) return ERR_PTR(-EINVAL); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index 41d5182cd367..be1d122574af 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -23,27 +23,6 @@ i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj, return io_mapping_map_wc(&obj->mm.region->iomap, offset, size); } -/** - * i915_gem_object_validates_to_lmem - Whether the object is resident in - * lmem when pages are present. - * @obj: The object to check. - * - * Migratable objects residency may change from under us if the object is - * not pinned or locked. This function is intended to be used to check whether - * the object can only reside in lmem when pages are present. - * - * Return: Whether the object is always resident in lmem when pages are - * present. - */ -bool i915_gem_object_validates_to_lmem(struct drm_i915_gem_object *obj) -{ - struct intel_memory_region *mr = READ_ONCE(obj->mm.region); - - return !i915_gem_object_migratable(obj) && - mr && (mr->type == INTEL_MEMORY_LOCAL || - mr->type == INTEL_MEMORY_STOLEN_LOCAL); -} - /** * i915_gem_object_is_lmem - Whether the object is resident in * lmem diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 8cbd7a5334e2..d423d8cac4f2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -597,8 +597,6 @@ bool i915_gem_object_evictable(struct drm_i915_gem_object *obj); bool i915_gem_object_migratable(struct drm_i915_gem_object *obj); -bool i915_gem_object_validates_to_lmem(struct drm_i915_gem_object *obj); - int i915_gem_object_migrate(struct drm_i915_gem_object *obj, struct i915_gem_ww_ctx *ww, enum intel_region_id id); -- cgit v1.2.3 From d22632c83b948e4f7a3d4202a884be2409098cc2 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 25 Jun 2021 11:38:23 +0100 Subject: drm/i915: support forcing the page size with lmem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For some specialised objects we might need something larger than the regions min_page_size due to some hw restriction, and slightly more hairy is needing something smaller with the guarantee that such objects will never be inserted into any GTT, which is the case for the paging structures. This also fixes how we setup the BO page_alignment, if we later migrate the object somewhere else. For example if the placements are {SMEM, LMEM}, then we might get this wrong. Pushing the min_page_size behaviour into the manager should fix this. v2(Thomas): push the default page size behaviour into buddy_man, and let the user override it with the page-alignment, which looks cleaner v3: rebase on ttm sys changes Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20210625103824.558481-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_create.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 33 +++++++++++- drivers/gpu/drm/i915/gem/i915_gem_lmem.h | 5 ++ drivers/gpu/drm/i915/gem/i915_gem_region.c | 13 ++++- drivers/gpu/drm/i915/gem/i915_gem_region.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 6 ++- drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 1 + drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 3 +- .../gpu/drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 8 +-- drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 14 ++++- drivers/gpu/drm/i915/i915_ttm_buddy_manager.h | 2 +- drivers/gpu/drm/i915/intel_memory_region.h | 1 + drivers/gpu/drm/i915/intel_region_ttm.c | 4 +- .../gpu/drm/i915/selftests/intel_memory_region.c | 63 ++++++++++++++++++++-- drivers/gpu/drm/i915/selftests/mock_region.c | 1 + 18 files changed, 144 insertions(+), 21 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index 93bf63bbaff1..51f92e4b1a69 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -90,7 +90,7 @@ i915_gem_setup(struct drm_i915_gem_object *obj, u64 size) */ flags = I915_BO_ALLOC_USER; - ret = mr->ops->init_object(mr, obj, size, flags); + ret = mr->ops->init_object(mr, obj, size, 0, flags); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index be1d122574af..eb345305dc52 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -72,11 +72,42 @@ bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) mr->type == INTEL_MEMORY_STOLEN_LOCAL); } +/** + * __i915_gem_object_create_lmem_with_ps - Create lmem object and force the + * minimum page size for the backing pages. + * @i915: The i915 instance. + * @size: The size in bytes for the object. Note that we need to round the size + * up depending on the @page_size. The final object size can be fished out from + * the drm GEM object. + * @page_size: The requested minimum page size in bytes for this object. This is + * useful if we need something bigger than the regions min_page_size due to some + * hw restriction, or in some very specialised cases where it needs to be + * smaller, where the internal fragmentation cost is too great when rounding up + * the object size. + * @flags: The optional BO allocation flags. + * + * Note that this interface assumes you know what you are doing when forcing the + * @page_size. If this is smaller than the regions min_page_size then it can + * never be inserted into any GTT, otherwise it might lead to undefined + * behaviour. + * + * Return: The object pointer, which might be an ERR_PTR in the case of failure. + */ +struct drm_i915_gem_object * +__i915_gem_object_create_lmem_with_ps(struct drm_i915_private *i915, + resource_size_t size, + resource_size_t page_size, + unsigned int flags) +{ + return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM], + size, page_size, flags); +} + struct drm_i915_gem_object * i915_gem_object_create_lmem(struct drm_i915_private *i915, resource_size_t size, unsigned int flags) { return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM], - size, flags); + size, 0, flags); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h index 27a611deba47..4ee81fc66302 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h @@ -23,6 +23,11 @@ bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj); bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj); +struct drm_i915_gem_object * +__i915_gem_object_create_lmem_with_ps(struct drm_i915_private *i915, + resource_size_t size, + resource_size_t page_size, + unsigned int flags); struct drm_i915_gem_object * i915_gem_object_create_lmem(struct drm_i915_private *i915, resource_size_t size, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index 4925563018b4..1f557b2178ed 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -32,9 +32,11 @@ void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj) struct drm_i915_gem_object * i915_gem_object_create_region(struct intel_memory_region *mem, resource_size_t size, + resource_size_t page_size, unsigned int flags) { struct drm_i915_gem_object *obj; + resource_size_t default_page_size; int err; /* @@ -48,7 +50,14 @@ i915_gem_object_create_region(struct intel_memory_region *mem, if (!mem) return ERR_PTR(-ENODEV); - size = round_up(size, mem->min_page_size); + default_page_size = mem->min_page_size; + if (page_size) + default_page_size = page_size; + + GEM_BUG_ON(!is_power_of_2_u64(default_page_size)); + GEM_BUG_ON(default_page_size < PAGE_SIZE); + + size = round_up(size, default_page_size); GEM_BUG_ON(!size); GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_MIN_ALIGNMENT)); @@ -60,7 +69,7 @@ i915_gem_object_create_region(struct intel_memory_region *mem, if (!obj) return ERR_PTR(-ENOMEM); - err = mem->ops->init_object(mem, obj, size, flags); + err = mem->ops->init_object(mem, obj, size, page_size, flags); if (err) goto err_object_free; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.h b/drivers/gpu/drm/i915/gem/i915_gem_region.h index 84fcb3297400..1008e580a89a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.h @@ -19,6 +19,7 @@ void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj); struct drm_i915_gem_object * i915_gem_object_create_region(struct intel_memory_region *mem, resource_size_t size, + resource_size_t page_size, unsigned int flags); #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index e9ac913b923a..6a04cce188fc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -490,6 +490,7 @@ static int __create_shmem(struct drm_i915_private *i915, static int shmem_object_init(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, resource_size_t size, + resource_size_t page_size, unsigned int flags) { static struct lock_class_key lock_class; @@ -548,7 +549,7 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915, resource_size_t size) { return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_SMEM], - size, 0); + size, 0, 0); } /* Allocate a new GEM object and fill it with the supplied data */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index b0c3a7dc60d1..90708de27684 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -670,6 +670,7 @@ static int __i915_gem_object_create_stolen(struct intel_memory_region *mem, static int _i915_gem_object_stolen_init(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, resource_size_t size, + resource_size_t page_size, unsigned int flags) { struct drm_i915_private *i915 = mem->i915; @@ -708,7 +709,7 @@ struct drm_i915_gem_object * i915_gem_object_create_stolen(struct drm_i915_private *i915, resource_size_t size) { - return i915_gem_object_create_region(i915->mm.stolen_region, size, 0); + return i915_gem_object_create_region(i915->mm.stolen_region, size, 0, 0); } static int init_stolen_smem(struct intel_memory_region *mem) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 521ab740001a..6589411396d3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -893,6 +893,7 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo) int __i915_gem_ttm_object_init(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, resource_size_t size, + resource_size_t page_size, unsigned int flags) { static struct lock_class_key lock_class; @@ -915,6 +916,9 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, obj->base.vma_node.driver_private = i915_gem_to_ttm(obj); + /* Forcing the page size is kernel internal only */ + GEM_BUG_ON(page_size && obj->mm.n_placements); + /* * If this function fails, it will call the destructor, but * our caller still owns the object. So no freeing in the @@ -924,7 +928,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, */ ret = ttm_bo_init_reserved(&i915->bdev, i915_gem_to_ttm(obj), size, bo_type, &i915_sys_placement, - mem->min_page_size >> PAGE_SHIFT, + page_size >> PAGE_SHIFT, &ctx, NULL, NULL, i915_ttm_bo_destroy); if (ret) return i915_ttm_err_to_gem(ret); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h index b8d3dcbb50df..40927f67b6d9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h @@ -44,5 +44,6 @@ i915_ttm_to_gem(struct ttm_buffer_object *bo) int __i915_gem_ttm_object_init(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, resource_size_t size, + resource_size_t page_size, unsigned int flags); #endif diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index ccc67ed1a84b..a094f3ce1a90 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -496,7 +496,8 @@ static int igt_mock_memory_region_huge_pages(void *arg) int i; for (i = 0; i < ARRAY_SIZE(flags); ++i) { - obj = i915_gem_object_create_region(mem, page_size, + obj = i915_gem_object_create_region(mem, + page_size, page_size, flags[i]); if (IS_ERR(obj)) { err = PTR_ERR(obj); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c index ced6e3a814a2..0b7144d2991c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -48,7 +48,7 @@ static int igt_create_migrate(struct intel_gt *gt, enum intel_region_id src, GEM_BUG_ON(!src_mr); /* Switch object backing-store on create */ - obj = i915_gem_object_create_region(src_mr, PAGE_SIZE, 0); + obj = i915_gem_object_create_region(src_mr, PAGE_SIZE, 0, 0); if (IS_ERR(obj)) return PTR_ERR(obj); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 607b7d2d4c29..1da8bd675e54 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -958,7 +958,7 @@ static int igt_mmap(void *arg) struct drm_i915_gem_object *obj; int err; - obj = i915_gem_object_create_region(mr, sizes[i], I915_BO_ALLOC_USER); + obj = i915_gem_object_create_region(mr, sizes[i], 0, I915_BO_ALLOC_USER); if (obj == ERR_PTR(-ENODEV)) continue; @@ -1084,7 +1084,7 @@ static int igt_mmap_access(void *arg) struct drm_i915_gem_object *obj; int err; - obj = i915_gem_object_create_region(mr, PAGE_SIZE, I915_BO_ALLOC_USER); + obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0, I915_BO_ALLOC_USER); if (obj == ERR_PTR(-ENODEV)) continue; @@ -1229,7 +1229,7 @@ static int igt_mmap_gpu(void *arg) struct drm_i915_gem_object *obj; int err; - obj = i915_gem_object_create_region(mr, PAGE_SIZE, I915_BO_ALLOC_USER); + obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0, I915_BO_ALLOC_USER); if (obj == ERR_PTR(-ENODEV)) continue; @@ -1384,7 +1384,7 @@ static int igt_mmap_revoke(void *arg) struct drm_i915_gem_object *obj; int err; - obj = i915_gem_object_create_region(mr, PAGE_SIZE, I915_BO_ALLOC_USER); + obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0, I915_BO_ALLOC_USER); if (obj == ERR_PTR(-ENODEV)) continue; diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c index fc7ad5c035b8..6877362f6b85 100644 --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c @@ -18,6 +18,7 @@ struct i915_ttm_buddy_manager { struct i915_buddy_mm mm; struct list_head reserved; struct mutex lock; + u64 default_page_size; }; static struct i915_ttm_buddy_manager * @@ -53,7 +54,10 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man, GEM_BUG_ON(!bman_res->base.num_pages); size = bman_res->base.num_pages << PAGE_SHIFT; - min_page_size = bo->page_alignment << PAGE_SHIFT; + min_page_size = bman->default_page_size; + if (bo->page_alignment) + min_page_size = bo->page_alignment << PAGE_SHIFT; + GEM_BUG_ON(min_page_size < mm->chunk_size); min_order = ilog2(min_page_size) - ilog2(mm->chunk_size); if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { @@ -134,6 +138,9 @@ static const struct ttm_resource_manager_func i915_ttm_buddy_manager_func = { * @type: Memory type we want to manage * @use_tt: Set use_tt for the manager * @size: The size in bytes to manage + * @default_page_size: The default minimum page size in bytes for allocations, + * this must be at least as large as @chunk_size, and can be overridden by + * setting the BO page_alignment, to be larger or smaller as needed. * @chunk_size: The minimum page size in bytes for our allocations i.e * order-zero * @@ -154,7 +161,8 @@ static const struct ttm_resource_manager_func i915_ttm_buddy_manager_func = { */ int i915_ttm_buddy_man_init(struct ttm_device *bdev, unsigned int type, bool use_tt, - u64 size, u64 chunk_size) + u64 size, u64 default_page_size, + u64 chunk_size) { struct ttm_resource_manager *man; struct i915_ttm_buddy_manager *bman; @@ -170,6 +178,8 @@ int i915_ttm_buddy_man_init(struct ttm_device *bdev, mutex_init(&bman->lock); INIT_LIST_HEAD(&bman->reserved); + GEM_BUG_ON(default_page_size < chunk_size); + bman->default_page_size = default_page_size; man = &bman->manager; man->use_tt = use_tt; diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h index 26026213e20a..0722d33f3e14 100644 --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h @@ -46,7 +46,7 @@ to_ttm_buddy_resource(struct ttm_resource *res) int i915_ttm_buddy_man_init(struct ttm_device *bdev, unsigned type, bool use_tt, - u64 size, u64 chunk_size); + u64 size, u64 default_page_size, u64 chunk_size); int i915_ttm_buddy_man_fini(struct ttm_device *bdev, unsigned int type); diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index b1b9e461d53b..1f2b96efa69d 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -55,6 +55,7 @@ struct intel_memory_region_ops { int (*init_object)(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, resource_size_t size, + resource_size_t page_size, unsigned int flags); }; diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 4cd10f364e84..98c7339bf8ba 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -86,7 +86,8 @@ int intel_region_ttm_init(struct intel_memory_region *mem) int ret; ret = i915_ttm_buddy_man_init(bdev, mem_type, false, - resource_size(&mem->region), PAGE_SIZE); + resource_size(&mem->region), + mem->min_page_size, PAGE_SIZE); if (ret) return ret; @@ -167,7 +168,6 @@ intel_region_ttm_resource_alloc(struct intel_memory_region *mem, int ret; mock_bo.base.size = size; - mock_bo.page_alignment = mem->min_page_size >> PAGE_SHIFT; place.flags = flags; ret = man->func->alloc(man, &mock_bo, &place, &res); diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index ecc3b9e6c22b..1aaccb9841a0 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -68,7 +68,7 @@ static int igt_mock_fill(void *arg) resource_size_t size = page_num * page_size; struct drm_i915_gem_object *obj; - obj = i915_gem_object_create_region(mem, size, 0); + obj = i915_gem_object_create_region(mem, size, 0, 0); if (IS_ERR(obj)) { err = PTR_ERR(obj); break; @@ -110,7 +110,7 @@ igt_object_create(struct intel_memory_region *mem, struct drm_i915_gem_object *obj; int err; - obj = i915_gem_object_create_region(mem, size, flags); + obj = i915_gem_object_create_region(mem, size, 0, flags); if (IS_ERR(obj)) return obj; @@ -647,6 +647,62 @@ out_put: return err; } +static int igt_lmem_create_with_ps(void *arg) +{ + struct drm_i915_private *i915 = arg; + int err = 0; + u32 ps; + + for (ps = PAGE_SIZE; ps <= SZ_1G; ps <<= 1) { + struct drm_i915_gem_object *obj; + dma_addr_t daddr; + + obj = __i915_gem_object_create_lmem_with_ps(i915, ps, ps, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + if (err == -ENXIO || err == -E2BIG) { + pr_info("%s not enough lmem for ps(%u) err=%d\n", + __func__, ps, err); + err = 0; + } + + break; + } + + if (obj->base.size != ps) { + pr_err("%s size(%zu) != ps(%u)\n", + __func__, obj->base.size, ps); + err = -EINVAL; + goto out_put; + } + + i915_gem_object_lock(obj, NULL); + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + daddr = i915_gem_object_get_dma_address(obj, 0); + if (!IS_ALIGNED(daddr, ps)) { + pr_err("%s daddr(%pa) not aligned with ps(%u)\n", + __func__, &daddr, ps); + err = -EINVAL; + goto out_unpin; + } + +out_unpin: + i915_gem_object_unpin_pages(obj); + __i915_gem_object_put_pages(obj); +out_put: + i915_gem_object_unlock(obj); + i915_gem_object_put(obj); + + if (err) + break; + } + + return err; +} + static int igt_lmem_create_cleared_cpu(void *arg) { struct drm_i915_private *i915 = arg; @@ -932,7 +988,7 @@ create_region_for_mapping(struct intel_memory_region *mr, u64 size, u32 type, struct drm_i915_gem_object *obj; void *addr; - obj = i915_gem_object_create_region(mr, size, 0); + obj = i915_gem_object_create_region(mr, size, 0, 0); if (IS_ERR(obj)) { if (PTR_ERR(obj) == -ENOSPC) /* Stolen memory */ return ERR_PTR(-ENODEV); @@ -1149,6 +1205,7 @@ int intel_memory_region_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_lmem_create), + SUBTEST(igt_lmem_create_with_ps), SUBTEST(igt_lmem_create_cleared_cpu), SUBTEST(igt_lmem_write_cpu), SUBTEST(igt_lmem_write_gpu), diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index fa786dede608..efa86dffe3c6 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -63,6 +63,7 @@ static const struct drm_i915_gem_object_ops mock_region_obj_ops = { static int mock_object_init(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, resource_size_t size, + resource_size_t page_size, unsigned int flags) { static struct lock_class_key lock_class; -- cgit v1.2.3 From 32334c9b1fd78ad661582c55b15d263a5d6d157d Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 25 Jun 2021 11:38:24 +0100 Subject: drm/i915/gtt: ignore min_page_size for paging structures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The min_page_size is only needed for pages inserted into the GTT, and for our paging structures we only need at most 4K bytes, so simply ignore the min_page_size restrictions here, otherwise we might see some severe overallocation on some devices. v2(Thomas): add some commentary Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20210625103824.558481-2-matthew.auld@intel.com --- drivers/gpu/drm/i915/gt/intel_gtt.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 084ea65d59c0..f7e0352edb62 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -16,7 +16,19 @@ struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz) { struct drm_i915_gem_object *obj; - obj = i915_gem_object_create_lmem(vm->i915, sz, 0); + /* + * To avoid severe over-allocation when dealing with min_page_size + * restrictions, we override that behaviour here by allowing an object + * size and page layout which can be smaller. In practice this should be + * totally fine, since GTT paging structures are not typically inserted + * into the GTT. + * + * Note that we also hit this path for the scratch page, and for this + * case it might need to be 64K, but that should work fine here since we + * used the passed in size for the page size, which should ensure it + * also has the same alignment. + */ + obj = __i915_gem_object_create_lmem_with_ps(vm->i915, sz, sz, 0); /* * Ensure all paging structures for this vm share the same dma-resv * object underneath, with the idea that one object_lock() will lock -- cgit v1.2.3 From 27e4b467d94e216b365da388358c9407af818662 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 1 Jul 2021 19:36:17 +0200 Subject: drm/i915: Use the correct IRQ during resume The code in xcs_resume() probably didn't work as intended. It uses struct drm_device.irq, which is allocated to 0, but never initialized by i915 to the device's interrupt number. Change all calls to synchronize_hardirq() to intel_synchronize_irq(), which uses the correct interrupt. _hardirq() functions are not needed in this context. v5: * go back to _hardirq() after PCI probe reported wrong context; add rsp comment v4: * switch everything to intel_synchronize_irq() (Daniel) v3: * also use intel_synchronize_hardirq() at another callsite v2: * wrap irq code in intel_synchronize_hardirq() (Ville) Signed-off-by: Thomas Zimmermann Fixes: 536f77b1caa0 ("drm/i915/gt: Call stop_ring() from ring resume, again") Cc: Chris Wilson Cc: Mika Kuoppala Cc: Daniel Vetter Cc: Rodrigo Vivi Cc: Joonas Lahtinen Cc: Maarten Lankhorst Cc: Lucas De Marchi Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210701173618.10718-2-tzimmermann@suse.de --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 +- drivers/gpu/drm/i915/gt/intel_ring_submission.c | 7 +++++-- drivers/gpu/drm/i915/i915_irq.c | 5 +++++ drivers/gpu/drm/i915/i915_irq.h | 1 + 4 files changed, 12 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 88694822716a..5ca3d1664335 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1229,7 +1229,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) return true; /* Waiting to drain ELSP? */ - synchronize_hardirq(to_pci_dev(engine->i915->drm.dev)->irq); + intel_synchronize_hardirq(engine->i915); intel_engine_flush_submission(engine); /* ELSP is empty, but there are ready requests? E.g. after reset */ diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 5d42a12ef3d6..5c4d204d07cc 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -184,8 +184,11 @@ static int xcs_resume(struct intel_engine_cs *engine) ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n", ring->head, ring->tail); - /* Double check the ring is empty & disabled before we resume */ - synchronize_hardirq(engine->i915->drm.irq); + /* + * Double check the ring is empty & disabled before we resume. Called + * from atomic context during PCI probe, so _hardirq(). + */ + intel_synchronize_hardirq(engine->i915); if (!stop_ring(engine)) goto err; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index a11bdb667241..7d13d2147054 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -4575,3 +4575,8 @@ void intel_synchronize_irq(struct drm_i915_private *i915) { synchronize_irq(to_pci_dev(i915->drm.dev)->irq); } + +void intel_synchronize_hardirq(struct drm_i915_private *i915) +{ + synchronize_hardirq(to_pci_dev(i915->drm.dev)->irq); +} diff --git a/drivers/gpu/drm/i915/i915_irq.h b/drivers/gpu/drm/i915/i915_irq.h index db34d5dbe402..e43b6734f21b 100644 --- a/drivers/gpu/drm/i915/i915_irq.h +++ b/drivers/gpu/drm/i915/i915_irq.h @@ -94,6 +94,7 @@ void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv); void intel_runtime_pm_enable_interrupts(struct drm_i915_private *dev_priv); bool intel_irqs_enabled(struct drm_i915_private *dev_priv); void intel_synchronize_irq(struct drm_i915_private *i915); +void intel_synchronize_hardirq(struct drm_i915_private *i915); int intel_get_crtc_scanline(struct intel_crtc *crtc); void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv, -- cgit v1.2.3 From 91b96f0008a2d66d76b525556e4818f5a4a089e4 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 1 Jul 2021 19:36:18 +0200 Subject: drm/i915: Drop all references to DRM IRQ midlayer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove all references to DRM's IRQ midlayer. i915 uses Linux' interrupt functions directly. v2: * also remove an outdated comment * move IRQ fix into separate patch * update Fixes tag (Daniel) Signed-off-by: Thomas Zimmermann Fixes: b318b82455bd ("drm/i915: Nuke drm_driver irq vfuncs") Cc: Ville Syrjälä Cc: Chris Wilson Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: intel-gfx@lists.freedesktop.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210701173618.10718-3-tzimmermann@suse.de --- drivers/gpu/drm/i915/i915_drv.c | 1 - drivers/gpu/drm/i915/i915_irq.c | 5 ----- 2 files changed, 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 850b499c71c8..73de45472f60 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7d13d2147054..c03943198089 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -33,7 +33,6 @@ #include #include -#include #include "display/intel_de.h" #include "display/intel_display_types.h" @@ -4564,10 +4563,6 @@ void intel_runtime_pm_enable_interrupts(struct drm_i915_private *dev_priv) bool intel_irqs_enabled(struct drm_i915_private *dev_priv) { - /* - * We only use drm_irq_uninstall() at unload and VT switch, so - * this is the only thing we need to check. - */ return dev_priv->runtime_pm.irqs_enabled; } -- cgit v1.2.3 From 7e8376f1d1272d1d12ea0b841ae05e21a9a574cc Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 2 Jul 2021 22:17:08 +0200 Subject: drm/i915: Improve debug Kconfig texts a bit We're not consistently recommending these for developers only. I stumbled over this due to DRM_I915_LOW_LEVEL_TRACEPOINTS, which was added in commit 354d036fcf70654cff2e2cbdda54a835d219b9d2 Author: Tvrtko Ursulin Date: Tue Feb 21 11:01:42 2017 +0000 drm/i915/tracepoints: Add request submit and execute tracepoints to "alleviate the performance impact concerns." Which is nonsense. Tvrtko and Joonas pointed out on irc that the real (but undocumented reason) was stable abi concerns for tracepoints, see https://lwn.net/Articles/705270/ and the specific change that was blocked around tracepoints: https://lwn.net/Articles/442113/ Anyway to make it a notch clearer why we have this Kconfig option consistly add the "Recommended for driver developers only." to it and all the other debug options we have. Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Matthew Brost Reviewed-by: Joonas Lahtinen Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210702201708.2075793-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/Kconfig.debug | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 72a38f28393f..47e845353ffa 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -207,6 +207,8 @@ config DRM_I915_LOW_LEVEL_TRACEPOINTS This provides the ability to precisely monitor engine utilisation and also analyze the request dependency resolving timeline. + Recommended for driver developers only. + If in doubt, say "N". config DRM_I915_DEBUG_VBLANK_EVADE @@ -220,6 +222,8 @@ config DRM_I915_DEBUG_VBLANK_EVADE is exceeded, even if there isn't an actual risk of missing the vblank. + Recommended for driver developers only. + If in doubt, say "N". config DRM_I915_DEBUG_RUNTIME_PM @@ -232,4 +236,6 @@ config DRM_I915_DEBUG_RUNTIME_PM runtime PM functionality. This may introduce overhead during driver loading, suspend and resume operations. + Recommended for driver developers only. + If in doubt, say "N" -- cgit v1.2.3 From 0c1145e05ce1c38d81f332263186092294ec7d90 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 2 Jul 2021 11:46:41 +0100 Subject: drm/i915/selftests: fix smatch warning in igt_check_blocks The block here can't be NULL, especially since we already dereferenced it earlier, so remove the redundant check. igt_check_blocks() warn: variable dereferenced before check 'block' (see line 126) Reported-by: Dan Carpenter Signed-off-by: Matthew Auld Reviewed-by: Ramalingam C Link: https://patchwork.freedesktop.org/patch/msgid/20210702104642.1189978-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/selftests/i915_buddy.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/selftests/i915_buddy.c b/drivers/gpu/drm/i915/selftests/i915_buddy.c index f0f5c4df8dbc..d61ec9c951bf 100644 --- a/drivers/gpu/drm/i915/selftests/i915_buddy.c +++ b/drivers/gpu/drm/i915/selftests/i915_buddy.c @@ -166,10 +166,8 @@ static int igt_check_blocks(struct i915_buddy_mm *mm, igt_dump_block(mm, prev); } - if (block) { - pr_err("bad block, dump:\n"); - igt_dump_block(mm, block); - } + pr_err("bad block, dump:\n"); + igt_dump_block(mm, block); return err; } -- cgit v1.2.3 From b23228f287bc7dd970fed85f15ea4630b93fc8a0 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 2 Jul 2021 11:46:42 +0100 Subject: drm/i915/selftests: fix smatch warning in mock_reserve If mock_region_create fails then mem will be an error pointer. Instead we just need to use the correct ordering for the onion unwind. igt_mock_reserve() error: 'mem' dereferencing possible ERR_PTR() Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Matthew Auld Reviewed-by: Ramalingam C Link: https://patchwork.freedesktop.org/patch/msgid/20210702104642.1189978-2-matthew.auld@intel.com --- drivers/gpu/drm/i915/selftests/intel_memory_region.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index 1aaccb9841a0..418caae84759 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -173,7 +173,7 @@ static int igt_mock_reserve(void *arg) if (IS_ERR(mem)) { pr_err("failed to create memory region\n"); err = PTR_ERR(mem); - goto out_close; + goto out_free_order; } /* Reserve a bunch of ranges within the region */ @@ -224,9 +224,10 @@ static int igt_mock_reserve(void *arg) } out_close: - kfree(order); close_objects(mem, &objects); intel_memory_region_put(mem); +out_free_order: + kfree(order); return err; } -- cgit v1.2.3 From 7c517f83fa8c35a03a13d7af36bd13fb991eae06 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 7 Jul 2021 11:13:23 -0700 Subject: drm/i915/gt: finish INTEL_GEN and friends conversion Commit c816723b6b8a ("drm/i915/gt: replace IS_GEN and friends with GRAPHICS_VER") converted INTEL_GEN and friends to the new version check macros. Meanwhile, some changes sneaked in to use INTEL_GEN. Remove the last users so we can remove the macros. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210707181325.2130821-2-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/intel_migrate.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index 23c59ce66cee..14afa1974ea5 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -277,7 +277,7 @@ static int emit_pte(struct i915_request *rq, u32 *hdr, *cs; int pkt; - GEM_BUG_ON(INTEL_GEN(rq->engine->i915) < 8); + GEM_BUG_ON(GRAPHICS_VER(rq->engine->i915) < 8); /* Compute the page directory offset for the target address range */ offset += (u64)rq->engine->instance << 32; @@ -347,11 +347,11 @@ static int emit_pte(struct i915_request *rq, return total; } -static bool wa_1209644611_applies(int gen, u32 size) +static bool wa_1209644611_applies(int ver, u32 size) { u32 height = size >> PAGE_SHIFT; - if (gen != 11) + if (ver != 11) return false; return height % 4 == 3 && height <= 8; @@ -359,15 +359,15 @@ static bool wa_1209644611_applies(int gen, u32 size) static int emit_copy(struct i915_request *rq, int size) { - const int gen = INTEL_GEN(rq->engine->i915); + const int ver = GRAPHICS_VER(rq->engine->i915); u32 instance = rq->engine->instance; u32 *cs; - cs = intel_ring_begin(rq, gen >= 8 ? 10 : 6); + cs = intel_ring_begin(rq, ver >= 8 ? 10 : 6); if (IS_ERR(cs)) return PTR_ERR(cs); - if (gen >= 9 && !wa_1209644611_applies(gen, size)) { + if (ver >= 9 && !wa_1209644611_applies(ver, size)) { *cs++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2); *cs++ = BLT_DEPTH_32 | PAGE_SIZE; *cs++ = 0; @@ -378,7 +378,7 @@ static int emit_copy(struct i915_request *rq, int size) *cs++ = PAGE_SIZE; *cs++ = 0; /* src offset */ *cs++ = instance; - } else if (gen >= 8) { + } else if (ver >= 8) { *cs++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2); *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; *cs++ = 0; @@ -491,17 +491,17 @@ out_ce: static int emit_clear(struct i915_request *rq, int size, u32 value) { - const int gen = INTEL_GEN(rq->engine->i915); + const int ver = GRAPHICS_VER(rq->engine->i915); u32 instance = rq->engine->instance; u32 *cs; GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); - cs = intel_ring_begin(rq, gen >= 8 ? 8 : 6); + cs = intel_ring_begin(rq, ver >= 8 ? 8 : 6); if (IS_ERR(cs)) return PTR_ERR(cs); - if (gen >= 8) { + if (ver >= 8) { *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2); *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; *cs++ = 0; -- cgit v1.2.3 From 88c6317b36c0d90c903b8d04fa296ca109e4e2da Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Thu, 8 Jul 2021 12:42:22 +0530 Subject: drm/i915/adl_s: Fix dma_mask_size to 39 bit 46 bit addressing enables you to use 4 bits to support some MKTME features, and 3 more bits for Optane support that uses a subset of MTKME for persistent memory. But GTT addressing sticking to 39 bit addressing, thus setting dma_mask_size to 39 fixes below tests : igt@i915_selftest@live@mman igt@kms_big_fb@linear-32bpp-rotate-0 igt@gem_create@create-clear igt@gem_mmap_offset@clear igt@gem_mmap_gtt@cpuset-big-copy In a way solves Gitlab#3142 https://gitlab.freedesktop.org/drm/intel/-/issues/3142, which had following errors : DMAR: DRHD: handling fault status reg 2 DMAR: [DMA Write] Request device [00:02.0] PASID ffffffff fault addr 7effff9000 [fault reason 05] PTE Write access is not set 0x7effff9000 is suspiciously exactly 39 bits, so it seems likely that the HW just ends up masking off those extra bits hence DMA errors. Changes since V2 : - dim checkpatch error solved Changes since V1 : - Added more details to commit message - Matthew Auld Signed-off-by: Tejas Upadhyay Acked-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210708071222.955455-1-tejaskumarx.surendrakumar.upadhyay@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 83b500bb170c..866ba136794c 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -936,7 +936,7 @@ static const struct intel_device_info adl_s_info = { .display.has_psr_hw_tracking = 0, .platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2), - .dma_mask_size = 46, + .dma_mask_size = 39, }; #define XE_LPD_FEATURES \ -- cgit v1.2.3 From 4a832721238c3a552342a282f00fdde25bc3c2a7 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 25 Jun 2021 17:45:21 -0700 Subject: drm/i915/huc: Update TGL and friends to HuC 7.9.3 A new HuC is available for TGL and compatible platforms, so switch to using it. Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20210626004522.1699509-2-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 9f23e9de3237..f05b1572e3c3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -48,9 +48,9 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * firmware as TGL. */ #define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \ - fw_def(ALDERLAKE_S, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 5, 0)) \ - fw_def(ROCKETLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 5, 0)) \ - fw_def(TIGERLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 5, 0)) \ + fw_def(ALDERLAKE_S, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ + fw_def(ROCKETLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ + fw_def(TIGERLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ fw_def(JASPERLAKE, 0, guc_def(ehl, 62, 0, 0), huc_def(ehl, 9, 0, 0)) \ fw_def(ELKHARTLAKE, 0, guc_def(ehl, 62, 0, 0), huc_def(ehl, 9, 0, 0)) \ fw_def(ICELAKE, 0, guc_def(icl, 62, 0, 0), huc_def(icl, 9, 0, 0)) \ -- cgit v1.2.3 From 520dfc807d315bc33f0c80a518973791f31dfbd6 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 25 Jun 2021 17:45:22 -0700 Subject: drm/i915/adlp: Add ADL-P GuC/HuC firmware files Add ADL-P to the list of supported GuC and HuC firmware versions. For HuC, it reuses the existing TGL firmware file. For GuC, there is a dedicated firmware release. Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20210626004522.1699509-3-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index f05b1572e3c3..3a16d08608a5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -48,6 +48,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * firmware as TGL. */ #define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \ + fw_def(ALDERLAKE_P, 0, guc_def(adlp, 62, 0, 3), huc_def(tgl, 7, 9, 3)) \ fw_def(ALDERLAKE_S, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ fw_def(ROCKETLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ fw_def(TIGERLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ -- cgit v1.2.3 From fe4751c3d513ff4f5422dbf55a966abafe39255e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:06 -0500 Subject: drm/i915: Drop I915_CONTEXT_PARAM_RINGSIZE This reverts commit 88be76cdafc7 ("drm/i915: Allow userspace to specify ringsize on construction"). This API was originally added for OpenCL but the compute-runtime PR has sat open for a year without action so we can still pull it out if we want. I argue we should drop it for three reasons: 1. If the compute-runtime PR has sat open for a year, this clearly isn't that important. 2. It's a very leaky API. Ring size is an implementation detail of the current execlist scheduler and really only makes sense there. It can't apply to the older ring-buffer scheduler on pre-execlist hardware because that's shared across all contexts and it won't apply to the GuC scheduler that's in the pipeline. 3. Having userspace set a ring size in bytes is a bad solution to the problem of having too small a ring. There is no way that userspace has the information to know how to properly set the ring size so it's just going to detect the feature and always set it to the maximum of 512K. This is what the compute-runtime PR does. The scheduler in i915, on the other hand, does have the information to make an informed choice. It could detect if the ring size is a problem and grow it itself. Or, if that's too hard, we could just increase the default size from 16K to 32K or even 64K instead of relying on userspace to do it. Let's drop this API for now and, if someone decides they really care about solving this problem, they can do it properly. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-2-jason@jlekstrand.net --- drivers/gpu/drm/i915/Makefile | 1 - drivers/gpu/drm/i915/gem/i915_gem_context.c | 85 +-------------------------- drivers/gpu/drm/i915/gt/intel_context_param.c | 63 -------------------- drivers/gpu/drm/i915/gt/intel_context_param.h | 3 - include/uapi/drm/i915_drm.h | 20 +------ 5 files changed, 4 insertions(+), 168 deletions(-) delete mode 100644 drivers/gpu/drm/i915/gt/intel_context_param.c (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 01f28ad5ea57..10b3bb6207ba 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -89,7 +89,6 @@ gt-y += \ gt/gen8_ppgtt.o \ gt/intel_breadcrumbs.o \ gt/intel_context.o \ - gt/intel_context_param.o \ gt/intel_context_sseu.o \ gt/intel_engine_cs.o \ gt/intel_engine_heartbeat.o \ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 7720b8c22c81..ddc3cc3f8f09 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1334,63 +1334,6 @@ out: return err; } -static int __apply_ringsize(struct intel_context *ce, void *sz) -{ - return intel_context_set_ring_size(ce, (unsigned long)sz); -} - -static int set_ringsize(struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - if (!HAS_LOGICAL_RING_CONTEXTS(ctx->i915)) - return -ENODEV; - - if (args->size) - return -EINVAL; - - if (!IS_ALIGNED(args->value, I915_GTT_PAGE_SIZE)) - return -EINVAL; - - if (args->value < I915_GTT_PAGE_SIZE) - return -EINVAL; - - if (args->value > 128 * I915_GTT_PAGE_SIZE) - return -EINVAL; - - return context_apply_all(ctx, - __apply_ringsize, - __intel_context_ring_size(args->value)); -} - -static int __get_ringsize(struct intel_context *ce, void *arg) -{ - long sz; - - sz = intel_context_get_ring_size(ce); - GEM_BUG_ON(sz > INT_MAX); - - return sz; /* stop on first engine */ -} - -static int get_ringsize(struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - int sz; - - if (!HAS_LOGICAL_RING_CONTEXTS(ctx->i915)) - return -ENODEV; - - if (args->size) - return -EINVAL; - - sz = context_apply_all(ctx, __get_ringsize, NULL); - if (sz < 0) - return sz; - - args->value = sz; - return 0; -} - int i915_gem_user_to_context_sseu(struct intel_gt *gt, const struct drm_i915_gem_context_param_sseu *user, @@ -2036,11 +1979,8 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, ret = set_persistence(ctx, args); break; - case I915_CONTEXT_PARAM_RINGSIZE: - ret = set_ringsize(ctx, args); - break; - case I915_CONTEXT_PARAM_BAN_PERIOD: + case I915_CONTEXT_PARAM_RINGSIZE: default: ret = -EINVAL; break; @@ -2068,18 +2008,6 @@ static int create_setparam(struct i915_user_extension __user *ext, void *data) return ctx_setparam(arg->fpriv, arg->ctx, &local.param); } -static int copy_ring_size(struct intel_context *dst, - struct intel_context *src) -{ - long sz; - - sz = intel_context_get_ring_size(src); - if (sz < 0) - return sz; - - return intel_context_set_ring_size(dst, sz); -} - static int clone_engines(struct i915_gem_context *dst, struct i915_gem_context *src) { @@ -2124,12 +2052,6 @@ static int clone_engines(struct i915_gem_context *dst, } intel_context_set_gem(clone->engines[n], dst); - - /* Copy across the preferred ringsize */ - if (copy_ring_size(clone->engines[n], e->engines[n])) { - __free_engines(clone, n + 1); - goto err_unlock; - } } clone->num_engines = n; i915_sw_fence_complete(&e->fence); @@ -2489,11 +2411,8 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, args->value = i915_gem_context_is_persistent(ctx); break; - case I915_CONTEXT_PARAM_RINGSIZE: - ret = get_ringsize(ctx, args); - break; - case I915_CONTEXT_PARAM_BAN_PERIOD: + case I915_CONTEXT_PARAM_RINGSIZE: default: ret = -EINVAL; break; diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.c b/drivers/gpu/drm/i915/gt/intel_context_param.c deleted file mode 100644 index 65dcd090245d..000000000000 --- a/drivers/gpu/drm/i915/gt/intel_context_param.c +++ /dev/null @@ -1,63 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2019 Intel Corporation - */ - -#include "i915_active.h" -#include "intel_context.h" -#include "intel_context_param.h" -#include "intel_ring.h" - -int intel_context_set_ring_size(struct intel_context *ce, long sz) -{ - int err; - - if (intel_context_lock_pinned(ce)) - return -EINTR; - - err = i915_active_wait(&ce->active); - if (err < 0) - goto unlock; - - if (intel_context_is_pinned(ce)) { - err = -EBUSY; /* In active use, come back later! */ - goto unlock; - } - - if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { - struct intel_ring *ring; - - /* Replace the existing ringbuffer */ - ring = intel_engine_create_ring(ce->engine, sz); - if (IS_ERR(ring)) { - err = PTR_ERR(ring); - goto unlock; - } - - intel_ring_put(ce->ring); - ce->ring = ring; - - /* Context image will be updated on next pin */ - } else { - ce->ring = __intel_context_ring_size(sz); - } - -unlock: - intel_context_unlock_pinned(ce); - return err; -} - -long intel_context_get_ring_size(struct intel_context *ce) -{ - long sz = (unsigned long)READ_ONCE(ce->ring); - - if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { - if (intel_context_lock_pinned(ce)) - return -EINTR; - - sz = ce->ring->size; - intel_context_unlock_pinned(ce); - } - - return sz; -} diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.h b/drivers/gpu/drm/i915/gt/intel_context_param.h index 3ecacc675f41..dffedd983693 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_param.h +++ b/drivers/gpu/drm/i915/gt/intel_context_param.h @@ -10,9 +10,6 @@ #include "intel_context.h" -int intel_context_set_ring_size(struct intel_context *ce, long sz); -long intel_context_get_ring_size(struct intel_context *ce); - static inline int intel_context_set_watchdog_us(struct intel_context *ce, u64 timeout_us) { diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 2f70c48567c0..f229c0abcbb5 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1722,24 +1722,8 @@ struct drm_i915_gem_context_param { */ #define I915_CONTEXT_PARAM_PERSISTENCE 0xb -/* - * I915_CONTEXT_PARAM_RINGSIZE: - * - * Sets the size of the CS ringbuffer to use for logical ring contexts. This - * applies a limit of how many batches can be queued to HW before the caller - * is blocked due to lack of space for more commands. - * - * Only reliably possible to be set prior to first use, i.e. during - * construction. At any later point, the current execution must be flushed as - * the ring can only be changed while the context is idle. Note, the ringsize - * can be specified as a constructor property, see - * I915_CONTEXT_CREATE_EXT_SETPARAM, but can also be set later if required. - * - * Only applies to the current set of engine and lost when those engines - * are replaced by a new mapping (see I915_CONTEXT_PARAM_ENGINES). - * - * Must be between 4 - 512 KiB, in intervals of page size [4 KiB]. - * Default is 16 KiB. +/* This API has been removed. On the off chance someone somewhere has + * attempted to use it, never re-use this context param number. */ #define I915_CONTEXT_PARAM_RINGSIZE 0xc /* Must be kept compact -- no holes and well documented */ -- cgit v1.2.3 From 74e4b90988b25d7bb60cf072b0f1b1afc1af27d5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:07 -0500 Subject: drm/i915: Stop storing the ring size in the ring pointer (v3) Previously, we were storing the ring size in the ring pointer before it was actually allocated. We would then guard setting the ring size on checking for CONTEXT_ALLOC_BIT. This is error-prone at best and really only saves us a few bytes on something that already burns at least 4K. Instead, this patch adds a new ring_size field and makes everything use that. v2 (Daniel Vetter): - Replace 512 * SZ_4K with SZ_2M v2 (Jason Ekstrand): - Rebase on top of page migration code Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-3-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 3 +-- drivers/gpu/drm/i915/gt/intel_context.c | 3 ++- drivers/gpu/drm/i915/gt/intel_context.h | 5 ----- drivers/gpu/drm/i915/gt/intel_context_types.h | 1 + drivers/gpu/drm/i915/gt/intel_engine_cs.c | 3 ++- drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +- drivers/gpu/drm/i915/gt/intel_migrate.c | 3 ++- drivers/gpu/drm/i915/gt/selftest_execlists.c | 2 +- drivers/gpu/drm/i915/gt/selftest_mocs.c | 2 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gvt/scheduler.c | 7 ++----- 11 files changed, 14 insertions(+), 19 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index ddc3cc3f8f09..a4faf06022d5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -211,8 +211,7 @@ static void intel_context_set_gem(struct intel_context *ce, GEM_BUG_ON(rcu_access_pointer(ce->gem_context)); RCU_INIT_POINTER(ce->gem_context, ctx); - if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) - ce->ring = __intel_context_ring_size(SZ_16K); + ce->ring_size = SZ_16K; if (rcu_access_pointer(ctx->vm)) { struct i915_address_space *vm; diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 4033184f13b9..bd63813c8a80 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -371,7 +371,8 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) ce->engine = engine; ce->ops = engine->cops; ce->sseu = engine->sseu; - ce->ring = __intel_context_ring_size(SZ_4K); + ce->ring = NULL; + ce->ring_size = SZ_4K; ewma_runtime_init(&ce->runtime.avg); diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index f83a73a2b39f..b10cbe8fee99 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -175,11 +175,6 @@ int intel_context_prepare_remote_request(struct intel_context *ce, struct i915_request *intel_context_create_request(struct intel_context *ce); -static inline struct intel_ring *__intel_context_ring_size(u64 sz) -{ - return u64_to_ptr(struct intel_ring, sz); -} - static inline bool intel_context_is_barrier(const struct intel_context *ce) { return test_bit(CONTEXT_BARRIER_BIT, &ce->flags); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index ed8c447a7346..90026c177105 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -82,6 +82,7 @@ struct intel_context { spinlock_t signal_lock; /* protects signals, the list of requests */ struct i915_vma *state; + u32 ring_size; struct intel_ring *ring; struct intel_timeline *timeline; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 5ca3d1664335..d561573ed98c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -807,7 +807,8 @@ intel_engine_create_pinned_context(struct intel_engine_cs *engine, __set_bit(CONTEXT_BARRIER_BIT, &ce->flags); ce->timeline = page_pack_bits(NULL, hwsp); - ce->ring = __intel_context_ring_size(ring_size); + ce->ring = NULL; + ce->ring_size = ring_size; i915_vm_put(ce->vm); ce->vm = i915_vm_get(vm); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index a27bac0a4bfb..8ada1afe3d22 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -845,7 +845,7 @@ int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine) if (IS_ERR(vma)) return PTR_ERR(vma); - ring = intel_engine_create_ring(engine, (unsigned long)ce->ring); + ring = intel_engine_create_ring(engine, ce->ring_size); if (IS_ERR(ring)) { err = PTR_ERR(ring); goto err_vma; diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index 14afa1974ea5..d0a7c934fd3b 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -232,7 +232,8 @@ struct intel_context *intel_migrate_create_context(struct intel_migrate *m) if (IS_ERR(ce)) return ce; - ce->ring = __intel_context_ring_size(SZ_256K); + ce->ring = NULL; + ce->ring_size = SZ_256K; i915_vm_put(ce->vm); ce->vm = i915_vm_get(m->context->vm); diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 08896ae027d5..d790b8b946ed 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -2810,7 +2810,7 @@ static int __live_preempt_ring(struct intel_engine_cs *engine, goto err_ce; } - tmp->ring = __intel_context_ring_size(ring_sz); + tmp->ring_size = ring_sz; err = intel_context_pin(tmp); if (err) { diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c index b9bb0e6e97f7..8763bbeca0f7 100644 --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c @@ -28,7 +28,7 @@ static struct intel_context *mocs_context_create(struct intel_engine_cs *engine) return ce; /* We build large requests to read the registers from the ring */ - ce->ring = __intel_context_ring_size(SZ_16K); + ce->ring_size = SZ_16K; return ce; } diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index 64da0c91dec1..d0b6a3afcf44 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -874,7 +874,7 @@ static int create_watcher(struct hwsp_watcher *w, if (IS_ERR(ce)) return PTR_ERR(ce); - ce->ring = __intel_context_ring_size(ringsz); + ce->ring_size = ringsz; w->rq = intel_context_create_request(ce); intel_context_put(ce); if (IS_ERR(w->rq)) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 734c37c5e347..b56a8e37a3cd 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -1409,11 +1409,8 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) intel_context_set_single_submission(ce); /* Max ring buffer size */ - if (!intel_uc_wants_guc_submission(&engine->gt->uc)) { - const unsigned int ring_size = 512 * SZ_4K; - - ce->ring = __intel_context_ring_size(ring_size); - } + if (!intel_uc_wants_guc_submission(&engine->gt->uc)) + ce->ring_size = SZ_2M; s->shadow[i] = ce; } -- cgit v1.2.3 From 6ff6d61dd2a943bd0c80bb77eb5630e8aa0cac15 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:08 -0500 Subject: drm/i915: Drop I915_CONTEXT_PARAM_NO_ZEROMAP The idea behind this param is to support OpenCL drivers with relocations because OpenCL reserves 0x0 for NULL and, if we placed memory there, it would confuse CL kernels. It was originally sent out as part of a patch series including libdrm [1] and Beignet [2] support. However, the libdrm and Beignet patches never landed in their respective upstream projects so this API has never been used. It's never been used in Mesa or any other driver, either. Dropping this API allows us to delete a small bit of code. [1]: https://lists.freedesktop.org/archives/intel-gfx/2015-May/067030.html [2]: https://lists.freedesktop.org/archives/intel-gfx/2015-May/067031.html Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-4-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 16 ++-------------- drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 1 - drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 8 -------- include/uapi/drm/i915_drm.h | 4 ++++ 4 files changed, 6 insertions(+), 23 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index a4faf06022d5..5fc0eb4beeea 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1920,15 +1920,6 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, int ret = 0; switch (args->param) { - case I915_CONTEXT_PARAM_NO_ZEROMAP: - if (args->size) - ret = -EINVAL; - else if (args->value) - set_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); - else - clear_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); - break; - case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: if (args->size) ret = -EINVAL; @@ -1978,6 +1969,7 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, ret = set_persistence(ctx, args); break; + case I915_CONTEXT_PARAM_NO_ZEROMAP: case I915_CONTEXT_PARAM_BAN_PERIOD: case I915_CONTEXT_PARAM_RINGSIZE: default: @@ -2358,11 +2350,6 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, return -ENOENT; switch (args->param) { - case I915_CONTEXT_PARAM_NO_ZEROMAP: - args->size = 0; - args->value = test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); - break; - case I915_CONTEXT_PARAM_GTT_SIZE: args->size = 0; rcu_read_lock(); @@ -2410,6 +2397,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, args->value = i915_gem_context_is_persistent(ctx); break; + case I915_CONTEXT_PARAM_NO_ZEROMAP: case I915_CONTEXT_PARAM_BAN_PERIOD: case I915_CONTEXT_PARAM_RINGSIZE: default: diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 340473aa70de..5ae71ec936f7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -129,7 +129,6 @@ struct i915_gem_context { * @user_flags: small set of booleans controlled by the user */ unsigned long user_flags; -#define UCONTEXT_NO_ZEROMAP 0 #define UCONTEXT_NO_ERROR_CAPTURE 1 #define UCONTEXT_BANNABLE 2 #define UCONTEXT_RECOVERABLE 3 diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 3661461bc04e..cb86574bdab4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -290,7 +290,6 @@ struct i915_execbuffer { struct intel_context *reloc_context; u64 invalid_flags; /** Set of execobj.flags that are invalid */ - u32 context_flags; /** Set of execobj.flags to insert from the ctx */ u64 batch_len; /** Length of batch within object */ u32 batch_start_offset; /** Location within object of batch */ @@ -541,9 +540,6 @@ eb_validate_vma(struct i915_execbuffer *eb, entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP; } - if (!(entry->flags & EXEC_OBJECT_PINNED)) - entry->flags |= eb->context_flags; - return 0; } @@ -750,10 +746,6 @@ static int eb_select_context(struct i915_execbuffer *eb) if (rcu_access_pointer(ctx->vm)) eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT; - eb->context_flags = 0; - if (test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags)) - eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS; - return 0; } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index f229c0abcbb5..79dcafaf476e 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1638,6 +1638,10 @@ struct drm_i915_gem_context_param { __u32 size; __u64 param; #define I915_CONTEXT_PARAM_BAN_PERIOD 0x1 +/* I915_CONTEXT_PARAM_NO_ZEROMAP has been removed. On the off chance + * someone somewhere has attempted to use it, never re-use this context + * param number. + */ #define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2 #define I915_CONTEXT_PARAM_GTT_SIZE 0x3 #define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4 -- cgit v1.2.3 From 677db6adc57ddee1f39a14c49ccc1623c39a03f2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:09 -0500 Subject: drm/i915/gem: Set the watchdog timeout directly in intel_context_set_gem (v2) Instead of handling it like a context param, unconditionally set it when intel_contexts are created. For years we've had the idea of a watchdog uAPI floating about. The aim was for media, so that they could set very tight deadlines for their transcodes jobs, so that if you have a corrupt bitstream (especially for decoding) you don't hang your desktop too hard. But it's been stuck in limbo since forever, and this simplifies things a bit in preparation for the proto-context work. If we decide to actually make said uAPI a reality, we can do it through the proto- context easily enough. This does mean that we move from reading the request_timeout_ms param once per engine when engines are created instead of once at context creation. If someone changes request_timeout_ms between creating a context and setting engines, it will mean that they get the new timeout. If someone races setting request_timeout_ms and context creation, they can theoretically end up with different timeouts. However, since both of these are fairly harmless and require changing kernel params, we don't care. v2 (Tvrtko Ursulin): - Add a comment about races with request_timeout_ms Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-5-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 44 ++++------------------- drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 4 --- drivers/gpu/drm/i915/gt/intel_context_param.h | 3 +- 3 files changed, 7 insertions(+), 44 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 5fc0eb4beeea..9750a1ac7023 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -232,7 +232,12 @@ static void intel_context_set_gem(struct intel_context *ce, intel_engine_has_timeslices(ce->engine)) __set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); - intel_context_set_watchdog_us(ce, ctx->watchdog.timeout_us); + if (IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) && + ctx->i915->params.request_timeout_ms) { + unsigned int timeout_ms = ctx->i915->params.request_timeout_ms; + + intel_context_set_watchdog_us(ce, (u64)timeout_ms * 1000); + } } static void __free_engines(struct i915_gem_engines *e, unsigned int count) @@ -791,41 +796,6 @@ static void __assign_timeline(struct i915_gem_context *ctx, context_apply_all(ctx, __apply_timeline, timeline); } -static int __apply_watchdog(struct intel_context *ce, void *timeout_us) -{ - return intel_context_set_watchdog_us(ce, (uintptr_t)timeout_us); -} - -static int -__set_watchdog(struct i915_gem_context *ctx, unsigned long timeout_us) -{ - int ret; - - ret = context_apply_all(ctx, __apply_watchdog, - (void *)(uintptr_t)timeout_us); - if (!ret) - ctx->watchdog.timeout_us = timeout_us; - - return ret; -} - -static void __set_default_fence_expiry(struct i915_gem_context *ctx) -{ - struct drm_i915_private *i915 = ctx->i915; - int ret; - - if (!IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) || - !i915->params.request_timeout_ms) - return; - - /* Default expiry for user fences. */ - ret = __set_watchdog(ctx, i915->params.request_timeout_ms * 1000); - if (ret) - drm_notice(&i915->drm, - "Failed to configure default fence expiry! (%d)", - ret); -} - static struct i915_gem_context * i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) { @@ -870,8 +840,6 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) intel_timeline_put(timeline); } - __set_default_fence_expiry(ctx); - trace_i915_context_create(ctx); return ctx; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 5ae71ec936f7..676592e27e7d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -153,10 +153,6 @@ struct i915_gem_context { */ atomic_t active_count; - struct { - u64 timeout_us; - } watchdog; - /** * @hang_timestamp: The last time(s) this context caused a GPU hang */ diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.h b/drivers/gpu/drm/i915/gt/intel_context_param.h index dffedd983693..0c69cb42d075 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_param.h +++ b/drivers/gpu/drm/i915/gt/intel_context_param.h @@ -10,11 +10,10 @@ #include "intel_context.h" -static inline int +static inline void intel_context_set_watchdog_us(struct intel_context *ce, u64 timeout_us) { ce->watchdog.timeout_us = timeout_us; - return 0; } #endif /* INTEL_CONTEXT_PARAM_H */ -- cgit v1.2.3 From 8cc256a24da14d74c68721f28d7a75c68ebf6552 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:10 -0500 Subject: drm/i915/gem: Return void from context_apply_all None of the callbacks we use with it return an error code anymore; they all return 0 unconditionally. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-6-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 9750a1ac7023..3503d46c88cb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -718,32 +718,25 @@ __context_engines_await(const struct i915_gem_context *ctx, return engines; } -static int +static void context_apply_all(struct i915_gem_context *ctx, - int (*fn)(struct intel_context *ce, void *data), + void (*fn)(struct intel_context *ce, void *data), void *data) { struct i915_gem_engines_iter it; struct i915_gem_engines *e; struct intel_context *ce; - int err = 0; e = __context_engines_await(ctx, NULL); - for_each_gem_engine(ce, e, it) { - err = fn(ce, data); - if (err) - break; - } + for_each_gem_engine(ce, e, it) + fn(ce, data); i915_sw_fence_complete(&e->fence); - - return err; } -static int __apply_ppgtt(struct intel_context *ce, void *vm) +static void __apply_ppgtt(struct intel_context *ce, void *vm) { i915_vm_put(ce->vm); ce->vm = i915_vm_get(vm); - return 0; } static struct i915_address_space * @@ -783,10 +776,9 @@ static void __set_timeline(struct intel_timeline **dst, intel_timeline_put(old); } -static int __apply_timeline(struct intel_context *ce, void *timeline) +static void __apply_timeline(struct intel_context *ce, void *timeline) { __set_timeline(&ce->timeline, timeline); - return 0; } static void __assign_timeline(struct i915_gem_context *ctx, @@ -1841,19 +1833,17 @@ set_persistence(struct i915_gem_context *ctx, return __context_set_persistence(ctx, args->value); } -static int __apply_priority(struct intel_context *ce, void *arg) +static void __apply_priority(struct intel_context *ce, void *arg) { struct i915_gem_context *ctx = arg; if (!intel_engine_has_timeslices(ce->engine)) - return 0; + return; if (ctx->sched.priority >= I915_PRIORITY_NORMAL) intel_context_set_use_semaphores(ce); else intel_context_clear_use_semaphores(ce); - - return 0; } static int set_priority(struct i915_gem_context *ctx, -- cgit v1.2.3 From 4a766ae40ec8330103a27922b5aa978fdf8bc005 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:11 -0500 Subject: drm/i915: Drop the CONTEXT_CLONE API (v2) This API allows one context to grab bits out of another context upon creation. It can be used as a short-cut for setparam(getparam()) for things like I915_CONTEXT_PARAM_VM. However, it's never been used by any real userspace. It's used by a few IGT tests and that's it. Since it doesn't add any real value (most of the stuff you can CLONE you can copy in other ways), drop it. There is one thing that this API allows you to clone which you cannot clone via getparam/setparam: timelines. However, timelines are an implementation detail of i915 and not really something that needs to be exposed to userspace. Also, sharing timelines between contexts isn't obviously useful and supporting it has the potential to complicate i915 internally. It also doesn't add any functionality that the client can't get in other ways. If a client really wants a shared timeline, they can use a syncobj and set it as an in and out fence on every submit. v2 (Jason Ekstrand): - More detailed commit message Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-7-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 199 +-------------------- .../gpu/drm/i915/gt/intel_execlists_submission.c | 28 --- .../gpu/drm/i915/gt/intel_execlists_submission.h | 3 - include/uapi/drm/i915_drm.h | 16 +- 4 files changed, 6 insertions(+), 240 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 3503d46c88cb..9f9369d3c000 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1957,207 +1957,14 @@ static int create_setparam(struct i915_user_extension __user *ext, void *data) return ctx_setparam(arg->fpriv, arg->ctx, &local.param); } -static int clone_engines(struct i915_gem_context *dst, - struct i915_gem_context *src) +static int invalid_ext(struct i915_user_extension __user *ext, void *data) { - struct i915_gem_engines *clone, *e; - bool user_engines; - unsigned long n; - - e = __context_engines_await(src, &user_engines); - if (!e) - return -ENOENT; - - clone = alloc_engines(e->num_engines); - if (!clone) - goto err_unlock; - - for (n = 0; n < e->num_engines; n++) { - struct intel_engine_cs *engine; - - if (!e->engines[n]) { - clone->engines[n] = NULL; - continue; - } - engine = e->engines[n]->engine; - - /* - * Virtual engines are singletons; they can only exist - * inside a single context, because they embed their - * HW context... As each virtual context implies a single - * timeline (each engine can only dequeue a single request - * at any time), it would be surprising for two contexts - * to use the same engine. So let's create a copy of - * the virtual engine instead. - */ - if (intel_engine_is_virtual(engine)) - clone->engines[n] = - intel_execlists_clone_virtual(engine); - else - clone->engines[n] = intel_context_create(engine); - if (IS_ERR_OR_NULL(clone->engines[n])) { - __free_engines(clone, n); - goto err_unlock; - } - - intel_context_set_gem(clone->engines[n], dst); - } - clone->num_engines = n; - i915_sw_fence_complete(&e->fence); - - /* Serialised by constructor */ - engines_idle_release(dst, rcu_replace_pointer(dst->engines, clone, 1)); - if (user_engines) - i915_gem_context_set_user_engines(dst); - else - i915_gem_context_clear_user_engines(dst); - return 0; - -err_unlock: - i915_sw_fence_complete(&e->fence); - return -ENOMEM; -} - -static int clone_flags(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - dst->user_flags = src->user_flags; - return 0; -} - -static int clone_schedattr(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - dst->sched = src->sched; - return 0; -} - -static int clone_sseu(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - struct i915_gem_engines *e = i915_gem_context_lock_engines(src); - struct i915_gem_engines *clone; - unsigned long n; - int err; - - /* no locking required; sole access under constructor*/ - clone = __context_engines_static(dst); - if (e->num_engines != clone->num_engines) { - err = -EINVAL; - goto unlock; - } - - for (n = 0; n < e->num_engines; n++) { - struct intel_context *ce = e->engines[n]; - - if (clone->engines[n]->engine->class != ce->engine->class) { - /* Must have compatible engine maps! */ - err = -EINVAL; - goto unlock; - } - - /* serialises with set_sseu */ - err = intel_context_lock_pinned(ce); - if (err) - goto unlock; - - clone->engines[n]->sseu = ce->sseu; - intel_context_unlock_pinned(ce); - } - - err = 0; -unlock: - i915_gem_context_unlock_engines(src); - return err; -} - -static int clone_timeline(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - if (src->timeline) - __assign_timeline(dst, src->timeline); - - return 0; -} - -static int clone_vm(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - struct i915_address_space *vm; - int err = 0; - - if (!rcu_access_pointer(src->vm)) - return 0; - - rcu_read_lock(); - vm = context_get_vm_rcu(src); - rcu_read_unlock(); - - if (!mutex_lock_interruptible(&dst->mutex)) { - __assign_ppgtt(dst, vm); - mutex_unlock(&dst->mutex); - } else { - err = -EINTR; - } - - i915_vm_put(vm); - return err; -} - -static int create_clone(struct i915_user_extension __user *ext, void *data) -{ - static int (* const fn[])(struct i915_gem_context *dst, - struct i915_gem_context *src) = { -#define MAP(x, y) [ilog2(I915_CONTEXT_CLONE_##x)] = y - MAP(ENGINES, clone_engines), - MAP(FLAGS, clone_flags), - MAP(SCHEDATTR, clone_schedattr), - MAP(SSEU, clone_sseu), - MAP(TIMELINE, clone_timeline), - MAP(VM, clone_vm), -#undef MAP - }; - struct drm_i915_gem_context_create_ext_clone local; - const struct create_ext *arg = data; - struct i915_gem_context *dst = arg->ctx; - struct i915_gem_context *src; - int err, bit; - - if (copy_from_user(&local, ext, sizeof(local))) - return -EFAULT; - - BUILD_BUG_ON(GENMASK(BITS_PER_TYPE(local.flags) - 1, ARRAY_SIZE(fn)) != - I915_CONTEXT_CLONE_UNKNOWN); - - if (local.flags & I915_CONTEXT_CLONE_UNKNOWN) - return -EINVAL; - - if (local.rsvd) - return -EINVAL; - - rcu_read_lock(); - src = __i915_gem_context_lookup_rcu(arg->fpriv, local.clone_id); - rcu_read_unlock(); - if (!src) - return -ENOENT; - - GEM_BUG_ON(src == dst); - - for (bit = 0; bit < ARRAY_SIZE(fn); bit++) { - if (!(local.flags & BIT(bit))) - continue; - - err = fn[bit](dst, src); - if (err) - return err; - } - - return 0; + return -EINVAL; } static const i915_user_extension_fn create_extensions[] = { [I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam, - [I915_CONTEXT_CREATE_EXT_CLONE] = create_clone, + [I915_CONTEXT_CREATE_EXT_CLONE] = invalid_ext, }; static bool client_is_banned(struct drm_i915_file_private *file_priv) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index cdb2126a159a..7dd7afccb3ad 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3850,34 +3850,6 @@ err_put: return ERR_PTR(err); } -struct intel_context * -intel_execlists_clone_virtual(struct intel_engine_cs *src) -{ - struct virtual_engine *se = to_virtual_engine(src); - struct intel_context *dst; - - dst = intel_execlists_create_virtual(se->siblings, - se->num_siblings); - if (IS_ERR(dst)) - return dst; - - if (se->num_bonds) { - struct virtual_engine *de = to_virtual_engine(dst->engine); - - de->bonds = kmemdup(se->bonds, - sizeof(*se->bonds) * se->num_bonds, - GFP_KERNEL); - if (!de->bonds) { - intel_context_put(dst); - return ERR_PTR(-ENOMEM); - } - - de->num_bonds = se->num_bonds; - } - - return dst; -} - int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, const struct intel_engine_cs *master, const struct intel_engine_cs *sibling) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h index 4ca9b475e252..c0b23f69535e 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h @@ -36,9 +36,6 @@ struct intel_context * intel_execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count); -struct intel_context * -intel_execlists_clone_virtual(struct intel_engine_cs *src); - int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, const struct intel_engine_cs *master, const struct intel_engine_cs *sibling); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 79dcafaf476e..e334a8b14ef2 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -2006,20 +2006,10 @@ struct drm_i915_gem_context_create_ext_setparam { struct drm_i915_gem_context_param param; }; -struct drm_i915_gem_context_create_ext_clone { +/* This API has been removed. On the off chance someone somewhere has + * attempted to use it, never re-use this extension number. + */ #define I915_CONTEXT_CREATE_EXT_CLONE 1 - struct i915_user_extension base; - __u32 clone_id; - __u32 flags; -#define I915_CONTEXT_CLONE_ENGINES (1u << 0) -#define I915_CONTEXT_CLONE_FLAGS (1u << 1) -#define I915_CONTEXT_CLONE_SCHEDATTR (1u << 2) -#define I915_CONTEXT_CLONE_SSEU (1u << 3) -#define I915_CONTEXT_CLONE_TIMELINE (1u << 4) -#define I915_CONTEXT_CLONE_VM (1u << 5) -#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1) - __u64 rsvd; -}; struct drm_i915_gem_context_destroy { __u32 ctx_id; -- cgit v1.2.3 From 00dae4d3d35d4f526929633b76e00b0ab4d3970d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:12 -0500 Subject: drm/i915: Implement SINGLE_TIMELINE with a syncobj (v4) This API is entirely unnecessary and I'd love to get rid of it. If userspace wants a single timeline across multiple contexts, they can either use implicit synchronization or a syncobj, both of which existed at the time this feature landed. The justification given at the time was that it would help GL drivers which are inherently single-timeline. However, neither of our GL drivers actually wanted the feature. i965 was already in maintenance mode at the time and iris uses syncobj for everything. Unfortunately, as much as I'd love to get rid of it, it is used by the media driver so we can't do that. We can, however, do the next-best thing which is to embed a syncobj in the context and do exactly what we'd expect from userspace internally. This isn't an entirely identical implementation because it's no longer atomic if userspace races with itself by calling execbuffer2 twice simultaneously from different threads. It won't crash in that case; it just doesn't guarantee any ordering between those two submits. It also means that sync files exported from different engines on a SINGLE_TIMELINE context will have different fence contexts. This is visible to userspace if it looks at the obj_name field of sync_fence_info. Moving SINGLE_TIMELINE to a syncobj emulation has a couple of technical advantages beyond mere annoyance. One is that intel_timeline is no longer an api-visible object and can remain entirely an implementation detail. This may be advantageous as we make scheduler changes going forward. Second is that, together with deleting the CLONE_CONTEXT API, we should now have a 1:1 mapping between intel_context and intel_timeline which may help us reduce locking. v2 (Tvrtko Ursulin): - Update the comment on i915_gem_context::syncobj to mention that it's an emulation and the possible race if userspace calls execbuffer2 twice on the same context concurrently. v2 (Jason Ekstrand): - Wrap the checks for eb.gem_context->syncobj in unlikely() - Drop the dma_fence reference - Improved commit message v3 (Jason Ekstrand): - Move the dma_fence_put() to before the error exit v4 (Tvrtko Ursulin): - Add a comment about fence contexts to the commit message Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-8-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 49 +++++------------------ drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 14 ++++++- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 16 ++++++++ 3 files changed, 40 insertions(+), 39 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 9f9369d3c000..249bd36f1401 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -67,6 +67,8 @@ #include #include +#include + #include "gt/gen6_ppgtt.h" #include "gt/intel_context.h" #include "gt/intel_context_param.h" @@ -224,10 +226,6 @@ static void intel_context_set_gem(struct intel_context *ce, ce->vm = vm; } - GEM_BUG_ON(ce->timeline); - if (ctx->timeline) - ce->timeline = intel_timeline_get(ctx->timeline); - if (ctx->sched.priority >= I915_PRIORITY_NORMAL && intel_engine_has_timeslices(ce->engine)) __set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); @@ -351,9 +349,6 @@ void i915_gem_context_release(struct kref *ref) mutex_destroy(&ctx->engines_mutex); mutex_destroy(&ctx->lut_mutex); - if (ctx->timeline) - intel_timeline_put(ctx->timeline); - put_pid(ctx->pid); mutex_destroy(&ctx->mutex); @@ -570,6 +565,9 @@ static void context_close(struct i915_gem_context *ctx) if (vm) i915_vm_close(vm); + if (ctx->syncobj) + drm_syncobj_put(ctx->syncobj); + ctx->file_priv = ERR_PTR(-EBADF); /* @@ -765,33 +763,11 @@ static void __assign_ppgtt(struct i915_gem_context *ctx, i915_vm_close(vm); } -static void __set_timeline(struct intel_timeline **dst, - struct intel_timeline *src) -{ - struct intel_timeline *old = *dst; - - *dst = src ? intel_timeline_get(src) : NULL; - - if (old) - intel_timeline_put(old); -} - -static void __apply_timeline(struct intel_context *ce, void *timeline) -{ - __set_timeline(&ce->timeline, timeline); -} - -static void __assign_timeline(struct i915_gem_context *ctx, - struct intel_timeline *timeline) -{ - __set_timeline(&ctx->timeline, timeline); - context_apply_all(ctx, __apply_timeline, timeline); -} - static struct i915_gem_context * i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) { struct i915_gem_context *ctx; + int ret; if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE && !HAS_EXECLISTS(i915)) @@ -820,16 +796,13 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) } if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) { - struct intel_timeline *timeline; - - timeline = intel_timeline_create(&i915->gt); - if (IS_ERR(timeline)) { + ret = drm_syncobj_create(&ctx->syncobj, + DRM_SYNCOBJ_CREATE_SIGNALED, + NULL); + if (ret) { context_close(ctx); - return ERR_CAST(timeline); + return ERR_PTR(ret); } - - __assign_timeline(ctx, timeline); - intel_timeline_put(timeline); } trace_i915_context_create(ctx); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 676592e27e7d..df76767f0c41 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -83,7 +83,19 @@ struct i915_gem_context { struct i915_gem_engines __rcu *engines; struct mutex engines_mutex; /* guards writes to engines */ - struct intel_timeline *timeline; + /** + * @syncobj: Shared timeline syncobj + * + * When the SHARED_TIMELINE flag is set on context creation, we + * emulate a single timeline across all engines using this syncobj. + * For every execbuffer2 call, this syncobj is used as both an in- + * and out-fence. Unlike the real intel_timeline, this doesn't + * provide perfect atomic in-order guarantees if the client races + * with itself by calling execbuffer2 twice concurrently. However, + * if userspace races with itself, that's not likely to yield well- + * defined results anyway so we choose to not care. + */ + struct drm_syncobj *syncobj; /** * @vm: unique address space (GTT) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index cb86574bdab4..380374f683b2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -3470,6 +3470,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_vma; } + if (unlikely(eb.gem_context->syncobj)) { + struct dma_fence *fence; + + fence = drm_syncobj_fence_get(eb.gem_context->syncobj); + err = i915_request_await_dma_fence(eb.request, fence); + dma_fence_put(fence); + if (err) + goto err_ext; + } + if (in_fence) { if (args->flags & I915_EXEC_FENCE_SUBMIT) err = i915_request_await_execution(eb.request, @@ -3527,6 +3537,12 @@ err_request: fput(out_fence->file); } } + + if (unlikely(eb.gem_context->syncobj)) { + drm_syncobj_replace_fence(eb.gem_context->syncobj, + &eb.request->fence); + } + i915_request_put(eb.request); err_vma: -- cgit v1.2.3 From c7a71fc8ee04669037501da2f987f0732e8754ee Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:13 -0500 Subject: drm/i915: Drop getparam support for I915_CONTEXT_PARAM_ENGINES This has never been used by any userspace except IGT and provides no real functionality beyond parroting back parameters userspace passed in as part of context creation or via setparam. If the context is in legacy mode (where you use I915_EXEC_RENDER and friends), it returns success with zero data so it's not useful for discovering what engines are in the context. It's also not a replacement for the recently removed I915_CONTEXT_CLONE_ENGINES because it doesn't return any of the balancing or bonding information. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-9-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 77 +---------------------------- 1 file changed, 1 insertion(+), 76 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 249bd36f1401..e36e3b1ae14e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1724,78 +1724,6 @@ replace: return 0; } -static int -get_engines(struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - struct i915_context_param_engines __user *user; - struct i915_gem_engines *e; - size_t n, count, size; - bool user_engines; - int err = 0; - - e = __context_engines_await(ctx, &user_engines); - if (!e) - return -ENOENT; - - if (!user_engines) { - i915_sw_fence_complete(&e->fence); - args->size = 0; - return 0; - } - - count = e->num_engines; - - /* Be paranoid in case we have an impedance mismatch */ - if (!check_struct_size(user, engines, count, &size)) { - err = -EINVAL; - goto err_free; - } - if (overflows_type(size, args->size)) { - err = -EINVAL; - goto err_free; - } - - if (!args->size) { - args->size = size; - goto err_free; - } - - if (args->size < size) { - err = -EINVAL; - goto err_free; - } - - user = u64_to_user_ptr(args->value); - if (put_user(0, &user->extensions)) { - err = -EFAULT; - goto err_free; - } - - for (n = 0; n < count; n++) { - struct i915_engine_class_instance ci = { - .engine_class = I915_ENGINE_CLASS_INVALID, - .engine_instance = I915_ENGINE_CLASS_INVALID_NONE, - }; - - if (e->engines[n]) { - ci.engine_class = e->engines[n]->engine->uabi_class; - ci.engine_instance = e->engines[n]->engine->uabi_instance; - } - - if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) { - err = -EFAULT; - goto err_free; - } - } - - args->size = size; - -err_free: - i915_sw_fence_complete(&e->fence); - return err; -} - static int set_persistence(struct i915_gem_context *ctx, const struct drm_i915_gem_context_param *args) @@ -2126,10 +2054,6 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, ret = get_ppgtt(file_priv, ctx, args); break; - case I915_CONTEXT_PARAM_ENGINES: - ret = get_engines(ctx, args); - break; - case I915_CONTEXT_PARAM_PERSISTENCE: args->size = 0; args->value = i915_gem_context_is_persistent(ctx); @@ -2137,6 +2061,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, case I915_CONTEXT_PARAM_NO_ZEROMAP: case I915_CONTEXT_PARAM_BAN_PERIOD: + case I915_CONTEXT_PARAM_ENGINES: case I915_CONTEXT_PARAM_RINGSIZE: default: ret = -EINVAL; -- cgit v1.2.3 From 521695c6f1a308058967612f4b94435445bccb34 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:14 -0500 Subject: drm/i915/gem: Disallow bonding of virtual engines (v3) This adds a bunch of complexity which the media driver has never actually used. The media driver does technically bond a balanced engine to another engine but the balanced engine only has one engine in the sibling set. This doesn't actually result in a virtual engine. This functionality was originally added to handle cases where we may have more than two video engines and media might want to load-balance their bonded submits by, for instance, submitting to a balanced vcs0-1 as the primary and then vcs2-3 as the secondary. However, no such hardware has shipped thus far and, if we ever want to enable such use-cases in the future, we'll use the up-and-coming parallel submit API which targets GuC submission. This makes I915_CONTEXT_ENGINES_EXT_BOND a total no-op. We leave the validation code in place in case we ever decide we want to do something interesting with the bonding information. v2 (Jason Ekstrand): - Don't delete quite as much code. v3 (Tvrtko Ursulin): - Add some history to the commit message Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-10-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 18 +- .../gpu/drm/i915/gt/intel_execlists_submission.c | 69 ------- .../gpu/drm/i915/gt/intel_execlists_submission.h | 5 +- drivers/gpu/drm/i915/gt/selftest_execlists.c | 229 --------------------- 4 files changed, 8 insertions(+), 313 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index e36e3b1ae14e..5eca91ded342 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1552,6 +1552,12 @@ set_engines__bond(struct i915_user_extension __user *base, void *data) } virtual = set->engines->engines[idx]->engine; + if (intel_engine_is_virtual(virtual)) { + drm_dbg(&i915->drm, + "Bonding with virtual engines not allowed\n"); + return -EINVAL; + } + err = check_user_mbz(&ext->flags); if (err) return err; @@ -1592,18 +1598,6 @@ set_engines__bond(struct i915_user_extension __user *base, void *data) n, ci.engine_class, ci.engine_instance); return -EINVAL; } - - /* - * A non-virtual engine has no siblings to choose between; and - * a submit fence will always be directed to the one engine. - */ - if (intel_engine_is_virtual(virtual)) { - err = intel_virtual_engine_attach_bond(virtual, - master, - bond); - if (err) - return err; - } } return 0; diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 7dd7afccb3ad..98b256352c23 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -182,18 +182,6 @@ struct virtual_engine { int prio; } nodes[I915_NUM_ENGINES]; - /* - * Keep track of bonded pairs -- restrictions upon on our selection - * of physical engines any particular request may be submitted to. - * If we receive a submit-fence from a master engine, we will only - * use one of sibling_mask physical engines. - */ - struct ve_bond { - const struct intel_engine_cs *master; - intel_engine_mask_t sibling_mask; - } *bonds; - unsigned int num_bonds; - /* And finally, which physical engines this virtual engine maps onto. */ unsigned int num_siblings; struct intel_engine_cs *siblings[]; @@ -3413,7 +3401,6 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk) i915_sched_engine_put(ve->base.sched_engine); intel_engine_free_request_pool(&ve->base); - kfree(ve->bonds); kfree(ve); } @@ -3668,33 +3655,13 @@ unlock: spin_unlock_irqrestore(&ve->base.sched_engine->lock, flags); } -static struct ve_bond * -virtual_find_bond(struct virtual_engine *ve, - const struct intel_engine_cs *master) -{ - int i; - - for (i = 0; i < ve->num_bonds; i++) { - if (ve->bonds[i].master == master) - return &ve->bonds[i]; - } - - return NULL; -} - static void virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal) { - struct virtual_engine *ve = to_virtual_engine(rq->engine); intel_engine_mask_t allowed, exec; - struct ve_bond *bond; allowed = ~to_request(signal)->engine->mask; - bond = virtual_find_bond(ve, to_request(signal)->engine); - if (bond) - allowed &= bond->sibling_mask; - /* Restrict the bonded request to run on only the available engines */ exec = READ_ONCE(rq->execution_mask); while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed)) @@ -3850,42 +3817,6 @@ err_put: return ERR_PTR(err); } -int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, - const struct intel_engine_cs *master, - const struct intel_engine_cs *sibling) -{ - struct virtual_engine *ve = to_virtual_engine(engine); - struct ve_bond *bond; - int n; - - /* Sanity check the sibling is part of the virtual engine */ - for (n = 0; n < ve->num_siblings; n++) - if (sibling == ve->siblings[n]) - break; - if (n == ve->num_siblings) - return -EINVAL; - - bond = virtual_find_bond(ve, master); - if (bond) { - bond->sibling_mask |= sibling->mask; - return 0; - } - - bond = krealloc(ve->bonds, - sizeof(*bond) * (ve->num_bonds + 1), - GFP_KERNEL); - if (!bond) - return -ENOMEM; - - bond[ve->num_bonds].master = master; - bond[ve->num_bonds].sibling_mask = sibling->mask; - - ve->bonds = bond; - ve->num_bonds++; - - return 0; -} - void intel_execlists_show_requests(struct intel_engine_cs *engine, struct drm_printer *m, void (*show_request)(struct drm_printer *m, diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h index c0b23f69535e..ad4f3e1a0fde 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h @@ -36,8 +36,7 @@ struct intel_context * intel_execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count); -int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, - const struct intel_engine_cs *master, - const struct intel_engine_cs *sibling); +bool +intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine); #endif /* __INTEL_EXECLISTS_SUBMISSION_H__ */ diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index d790b8b946ed..6abce18d0ef3 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -4328,234 +4328,6 @@ static int live_virtual_preserved(void *arg) return 0; } -static int bond_virtual_engine(struct intel_gt *gt, - unsigned int class, - struct intel_engine_cs **siblings, - unsigned int nsibling, - unsigned int flags) -#define BOND_SCHEDULE BIT(0) -{ - struct intel_engine_cs *master; - struct i915_request *rq[16]; - enum intel_engine_id id; - struct igt_spinner spin; - unsigned long n; - int err; - - /* - * A set of bonded requests is intended to be run concurrently - * across a number of engines. We use one request per-engine - * and a magic fence to schedule each of the bonded requests - * at the same time. A consequence of our current scheduler is that - * we only move requests to the HW ready queue when the request - * becomes ready, that is when all of its prerequisite fences have - * been signaled. As one of those fences is the master submit fence, - * there is a delay on all secondary fences as the HW may be - * currently busy. Equally, as all the requests are independent, - * they may have other fences that delay individual request - * submission to HW. Ergo, we do not guarantee that all requests are - * immediately submitted to HW at the same time, just that if the - * rules are abided by, they are ready at the same time as the - * first is submitted. Userspace can embed semaphores in its batch - * to ensure parallel execution of its phases as it requires. - * Though naturally it gets requested that perhaps the scheduler should - * take care of parallel execution, even across preemption events on - * different HW. (The proper answer is of course "lalalala".) - * - * With the submit-fence, we have identified three possible phases - * of synchronisation depending on the master fence: queued (not - * ready), executing, and signaled. The first two are quite simple - * and checked below. However, the signaled master fence handling is - * contentious. Currently we do not distinguish between a signaled - * fence and an expired fence, as once signaled it does not convey - * any information about the previous execution. It may even be freed - * and hence checking later it may not exist at all. Ergo we currently - * do not apply the bonding constraint for an already signaled fence, - * as our expectation is that it should not constrain the secondaries - * and is outside of the scope of the bonded request API (i.e. all - * userspace requests are meant to be running in parallel). As - * it imposes no constraint, and is effectively a no-op, we do not - * check below as normal execution flows are checked extensively above. - * - * XXX Is the degenerate handling of signaled submit fences the - * expected behaviour for userpace? - */ - - GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1); - - if (igt_spinner_init(&spin, gt)) - return -ENOMEM; - - err = 0; - rq[0] = ERR_PTR(-ENOMEM); - for_each_engine(master, gt, id) { - struct i915_sw_fence fence = {}; - struct intel_context *ce; - - if (master->class == class) - continue; - - ce = intel_context_create(master); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - goto out; - } - - memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq)); - - rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP); - intel_context_put(ce); - if (IS_ERR(rq[0])) { - err = PTR_ERR(rq[0]); - goto out; - } - i915_request_get(rq[0]); - - if (flags & BOND_SCHEDULE) { - onstack_fence_init(&fence); - err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit, - &fence, - GFP_KERNEL); - } - - i915_request_add(rq[0]); - if (err < 0) - goto out; - - if (!(flags & BOND_SCHEDULE) && - !igt_wait_for_spinner(&spin, rq[0])) { - err = -EIO; - goto out; - } - - for (n = 0; n < nsibling; n++) { - struct intel_context *ve; - - ve = intel_execlists_create_virtual(siblings, nsibling); - if (IS_ERR(ve)) { - err = PTR_ERR(ve); - onstack_fence_fini(&fence); - goto out; - } - - err = intel_virtual_engine_attach_bond(ve->engine, - master, - siblings[n]); - if (err) { - intel_context_put(ve); - onstack_fence_fini(&fence); - goto out; - } - - err = intel_context_pin(ve); - intel_context_put(ve); - if (err) { - onstack_fence_fini(&fence); - goto out; - } - - rq[n + 1] = i915_request_create(ve); - intel_context_unpin(ve); - if (IS_ERR(rq[n + 1])) { - err = PTR_ERR(rq[n + 1]); - onstack_fence_fini(&fence); - goto out; - } - i915_request_get(rq[n + 1]); - - err = i915_request_await_execution(rq[n + 1], - &rq[0]->fence, - ve->engine->bond_execute); - i915_request_add(rq[n + 1]); - if (err < 0) { - onstack_fence_fini(&fence); - goto out; - } - } - onstack_fence_fini(&fence); - intel_engine_flush_submission(master); - igt_spinner_end(&spin); - - if (i915_request_wait(rq[0], 0, HZ / 10) < 0) { - pr_err("Master request did not execute (on %s)!\n", - rq[0]->engine->name); - err = -EIO; - goto out; - } - - for (n = 0; n < nsibling; n++) { - if (i915_request_wait(rq[n + 1], 0, - MAX_SCHEDULE_TIMEOUT) < 0) { - err = -EIO; - goto out; - } - - if (rq[n + 1]->engine != siblings[n]) { - pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n", - siblings[n]->name, - rq[n + 1]->engine->name, - rq[0]->engine->name); - err = -EINVAL; - goto out; - } - } - - for (n = 0; !IS_ERR(rq[n]); n++) - i915_request_put(rq[n]); - rq[0] = ERR_PTR(-ENOMEM); - } - -out: - for (n = 0; !IS_ERR(rq[n]); n++) - i915_request_put(rq[n]); - if (igt_flush_test(gt->i915)) - err = -EIO; - - igt_spinner_fini(&spin); - return err; -} - -static int live_virtual_bond(void *arg) -{ - static const struct phase { - const char *name; - unsigned int flags; - } phases[] = { - { "", 0 }, - { "schedule", BOND_SCHEDULE }, - { }, - }; - struct intel_gt *gt = arg; - struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; - unsigned int class; - int err; - - if (intel_uc_uses_guc_submission(>->uc)) - return 0; - - for (class = 0; class <= MAX_ENGINE_CLASS; class++) { - const struct phase *p; - int nsibling; - - nsibling = select_siblings(gt, class, siblings); - if (nsibling < 2) - continue; - - for (p = phases; p->name; p++) { - err = bond_virtual_engine(gt, - class, siblings, nsibling, - p->flags); - if (err) { - pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n", - __func__, p->name, class, nsibling, err); - return err; - } - } - } - - return 0; -} - static int reset_virtual_engine(struct intel_gt *gt, struct intel_engine_cs **siblings, unsigned int nsibling) @@ -4721,7 +4493,6 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_virtual_mask), SUBTEST(live_virtual_preserved), SUBTEST(live_virtual_slice), - SUBTEST(live_virtual_bond), SUBTEST(live_virtual_reset), }; -- cgit v1.2.3 From dd4f1bbae8f9b4faa38b697e3ff248eb0f4404f6 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:15 -0500 Subject: drm/i915/gem: Remove engine auto-magic with FENCE_SUBMIT (v2) Even though FENCE_SUBMIT is only documented to wait until the request in the in-fence starts instead of waiting until it completes, it has a bit more magic than that. If FENCE_SUBMIT is used to submit something to a balanced engine, we would wait to assign engines until the primary request was ready to start and then attempt to assign it to a different engine than the primary. There is an IGT test (the bonded-slice subtest of gem_exec_balancer) which exercises this by submitting a primary batch to a specific VCS and then using FENCE_SUBMIT to submit a secondary which can run on any VCS and have i915 figure out which VCS to run it on such that they can run in parallel. However, this functionality has never been used in the real world. The media driver (the only user of FENCE_SUBMIT) always picks exactly two physical engines to bond and never asks us to pick which to use. v2 (Daniel Vetter): - Mention the exact IGT test this breaks Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-11-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/gt/intel_engine_types.h | 7 ------- drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 17 ----------------- 3 files changed, 1 insertion(+), 25 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 380374f683b2..2671d7a911ff 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -3484,7 +3484,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (args->flags & I915_EXEC_FENCE_SUBMIT) err = i915_request_await_execution(eb.request, in_fence, - eb.engine->bond_execute); + NULL); else err = i915_request_await_dma_fence(eb.request, in_fence); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 5b91068ab277..1cb9c3b70b29 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -416,13 +416,6 @@ struct intel_engine_cs { */ void (*submit_request)(struct i915_request *rq); - /* - * Called on signaling of a SUBMIT_FENCE, passing along the signaling - * request down to the bonded pairs. - */ - void (*bond_execute)(struct i915_request *rq, - struct dma_fence *signal); - void (*release)(struct intel_engine_cs *engine); struct intel_engine_execlists execlists; diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 98b256352c23..56e25090da67 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3655,22 +3655,6 @@ unlock: spin_unlock_irqrestore(&ve->base.sched_engine->lock, flags); } -static void -virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal) -{ - intel_engine_mask_t allowed, exec; - - allowed = ~to_request(signal)->engine->mask; - - /* Restrict the bonded request to run on only the available engines */ - exec = READ_ONCE(rq->execution_mask); - while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed)) - ; - - /* Prevent the master from being re-run on the bonded engines */ - to_request(signal)->execution_mask &= ~allowed; -} - struct intel_context * intel_execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count) @@ -3731,7 +3715,6 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, ve->base.sched_engine->schedule = i915_schedule; ve->base.sched_engine->kick_backend = kick_execlists; ve->base.submit_request = virtual_submit_request; - ve->base.bond_execute = virtual_bond_execute; INIT_LIST_HEAD(virtual_queue(ve)); tasklet_setup(&ve->base.sched_engine->tasklet, virtual_submission_tasklet); -- cgit v1.2.3 From 5ac545b8b0145cfa8123f8e9ddc066da49eec261 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:16 -0500 Subject: drm/i915/request: Remove the hook from await_execution This was only ever used for FENCE_SUBMIT automatic engine selection which was removed in the previous commit. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-12-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 3 +- drivers/gpu/drm/i915/i915_request.c | 42 +++++--------------------- drivers/gpu/drm/i915/i915_request.h | 4 +-- 3 files changed, 9 insertions(+), 40 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 2671d7a911ff..70e19a22a51a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -3483,8 +3483,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (in_fence) { if (args->flags & I915_EXEC_FENCE_SUBMIT) err = i915_request_await_execution(eb.request, - in_fence, - NULL); + in_fence); else err = i915_request_await_dma_fence(eb.request, in_fence); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index c5989c0b83d3..86b4c9f2613d 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -49,7 +49,6 @@ struct execute_cb { struct irq_work work; struct i915_sw_fence *fence; - void (*hook)(struct i915_request *rq, struct dma_fence *signal); struct i915_request *signal; }; @@ -180,17 +179,6 @@ static void irq_execute_cb(struct irq_work *wrk) kmem_cache_free(global.slab_execute_cbs, cb); } -static void irq_execute_cb_hook(struct irq_work *wrk) -{ - struct execute_cb *cb = container_of(wrk, typeof(*cb), work); - - cb->hook(container_of(cb->fence, struct i915_request, submit), - &cb->signal->fence); - i915_request_put(cb->signal); - - irq_execute_cb(wrk); -} - static __always_inline void __notify_execute_cb(struct i915_request *rq, bool (*fn)(struct irq_work *wrk)) { @@ -517,17 +505,12 @@ static bool __request_in_flight(const struct i915_request *signal) static int __await_execution(struct i915_request *rq, struct i915_request *signal, - void (*hook)(struct i915_request *rq, - struct dma_fence *signal), gfp_t gfp) { struct execute_cb *cb; - if (i915_request_is_active(signal)) { - if (hook) - hook(rq, &signal->fence); + if (i915_request_is_active(signal)) return 0; - } cb = kmem_cache_alloc(global.slab_execute_cbs, gfp); if (!cb) @@ -537,12 +520,6 @@ __await_execution(struct i915_request *rq, i915_sw_fence_await(cb->fence); init_irq_work(&cb->work, irq_execute_cb); - if (hook) { - cb->hook = hook; - cb->signal = i915_request_get(signal); - cb->work.func = irq_execute_cb_hook; - } - /* * Register the callback first, then see if the signaler is already * active. This ensures that if we race with the @@ -1253,7 +1230,7 @@ emit_semaphore_wait(struct i915_request *to, goto await_fence; /* Only submit our spinner after the signaler is running! */ - if (__await_execution(to, from, NULL, gfp)) + if (__await_execution(to, from, gfp)) goto await_fence; if (__emit_semaphore_wait(to, from, from->fence.seqno)) @@ -1284,16 +1261,14 @@ static int intel_timeline_sync_set_start(struct intel_timeline *tl, static int __i915_request_await_execution(struct i915_request *to, - struct i915_request *from, - void (*hook)(struct i915_request *rq, - struct dma_fence *signal)) + struct i915_request *from) { int err; GEM_BUG_ON(intel_context_is_barrier(from->context)); /* Submit both requests at the same time */ - err = __await_execution(to, from, hook, I915_FENCE_GFP); + err = __await_execution(to, from, I915_FENCE_GFP); if (err) return err; @@ -1406,9 +1381,7 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) int i915_request_await_execution(struct i915_request *rq, - struct dma_fence *fence, - void (*hook)(struct i915_request *rq, - struct dma_fence *signal)) + struct dma_fence *fence) { struct dma_fence **child = &fence; unsigned int nchild = 1; @@ -1441,8 +1414,7 @@ i915_request_await_execution(struct i915_request *rq, if (dma_fence_is_i915(fence)) ret = __i915_request_await_execution(rq, - to_request(fence), - hook); + to_request(fence)); else ret = i915_request_await_external(rq, fence); if (ret < 0) @@ -1468,7 +1440,7 @@ await_request_submit(struct i915_request *to, struct i915_request *from) &from->submit, I915_FENCE_GFP); else - return __i915_request_await_execution(to, from, NULL); + return __i915_request_await_execution(to, from); } static int diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 239964bec1fa..5deb65ec5fa5 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -352,9 +352,7 @@ int i915_request_await_object(struct i915_request *to, int i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence); int i915_request_await_execution(struct i915_request *rq, - struct dma_fence *fence, - void (*hook)(struct i915_request *rq, - struct dma_fence *signal)); + struct dma_fence *fence); void i915_request_add(struct i915_request *rq); -- cgit v1.2.3 From ebb1ca741bee54e833ecabbd95090c92210c65d2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:17 -0500 Subject: drm/i915/gem: Disallow creating contexts with too many engines There's no sense in allowing userspace to create more engines than it can possibly access via execbuf. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-13-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 5eca91ded342..0ba8506fb966 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1639,11 +1639,11 @@ set_engines(struct i915_gem_context *ctx, return -EINVAL; } - /* - * Note that I915_EXEC_RING_MASK limits execbuf to only using the - * first 64 engines defined here. - */ num_engines = (args->size - sizeof(*user)) / sizeof(*user->engines); + /* RING_MASK has no shift so we can use it directly here */ + if (num_engines > I915_EXEC_RING_MASK + 1) + return -EINVAL; + set.engines = alloc_engines(num_engines); if (!set.engines) return -ENOMEM; -- cgit v1.2.3 From a4839cb1137b0df1303072473e8fd1b32daaeb13 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:18 -0500 Subject: drm/i915: Stop manually RCU banging in reset_stats_ioctl (v2) As far as I can tell, the only real reason for this is to avoid taking a reference to the i915_gem_context. The cost of those two atomics probably pales in comparison to the cost of the ioctl itself so we're really not buying ourselves anything here. We're about to make context lookup a tiny bit more complicated, so let's get rid of the one hand- rolled case. Some usermode drivers such as our Vulkan driver call GET_RESET_STATS on every execbuf so the perf here could theoretically be an issue. If this ever does become a performance issue for any such userspace drivers, they can use set CONTEXT_PARAM_RECOVERABLE to false and look for -EIO coming from execbuf to check for hangs instead. v2 (Daniel Vetter): - Add a comment in the commit message about recoverable contexts Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-14-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 13 ++++--------- drivers/gpu/drm/i915/i915_drv.h | 8 +------- 2 files changed, 5 insertions(+), 16 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 0ba8506fb966..61fe6d18d406 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -2090,16 +2090,13 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_reset_stats *args = data; struct i915_gem_context *ctx; - int ret; if (args->flags || args->pad) return -EINVAL; - ret = -ENOENT; - rcu_read_lock(); - ctx = __i915_gem_context_lookup_rcu(file->driver_priv, args->ctx_id); + ctx = i915_gem_context_lookup(file->driver_priv, args->ctx_id); if (!ctx) - goto out; + return -ENOENT; /* * We opt for unserialised reads here. This may result in tearing @@ -2116,10 +2113,8 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, args->batch_active = atomic_read(&ctx->guilty_count); args->batch_pending = atomic_read(&ctx->active_count); - ret = 0; -out: - rcu_read_unlock(); - return ret; + i915_gem_context_put(ctx); + return 0; } /* GEM context-engines iterator: for_each_gem_engine() */ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bfbfbae57573..91a15100602d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1847,19 +1847,13 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags); -static inline struct i915_gem_context * -__i915_gem_context_lookup_rcu(struct drm_i915_file_private *file_priv, u32 id) -{ - return xa_load(&file_priv->context_xa, id); -} - static inline struct i915_gem_context * i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id) { struct i915_gem_context *ctx; rcu_read_lock(); - ctx = __i915_gem_context_lookup_rcu(file_priv, id); + ctx = xa_load(&file_priv->context_xa, id); if (ctx && !kref_get_unless_zero(&ctx->ref)) ctx = NULL; rcu_read_unlock(); -- cgit v1.2.3 From aaa5957c97592baa62ae5ce8079b18f94b1241f8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:19 -0500 Subject: drm/i915/gem: Add a separate validate_priority helper With the proto-context stuff added later in this series, we end up having to duplicate set_priority. This lets us avoid duplicating the validation logic. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-15-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 42 ++++++++++++++++++----------- 1 file changed, 27 insertions(+), 15 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 61fe6d18d406..f9a6eac78c0a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -169,6 +169,28 @@ lookup_user_engine(struct i915_gem_context *ctx, return i915_gem_context_get_engine(ctx, idx); } +static int validate_priority(struct drm_i915_private *i915, + const struct drm_i915_gem_context_param *args) +{ + s64 priority = args->value; + + if (args->size) + return -EINVAL; + + if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) + return -ENODEV; + + if (priority > I915_CONTEXT_MAX_USER_PRIORITY || + priority < I915_CONTEXT_MIN_USER_PRIORITY) + return -EINVAL; + + if (priority > I915_CONTEXT_DEFAULT_PRIORITY && + !capable(CAP_SYS_NICE)) + return -EPERM; + + return 0; +} + static struct i915_address_space * context_get_vm_rcu(struct i915_gem_context *ctx) { @@ -1744,23 +1766,13 @@ static void __apply_priority(struct intel_context *ce, void *arg) static int set_priority(struct i915_gem_context *ctx, const struct drm_i915_gem_context_param *args) { - s64 priority = args->value; - - if (args->size) - return -EINVAL; - - if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) - return -ENODEV; - - if (priority > I915_CONTEXT_MAX_USER_PRIORITY || - priority < I915_CONTEXT_MIN_USER_PRIORITY) - return -EINVAL; + int err; - if (priority > I915_CONTEXT_DEFAULT_PRIORITY && - !capable(CAP_SYS_NICE)) - return -EPERM; + err = validate_priority(ctx->i915, args); + if (err) + return err; - ctx->sched.priority = priority; + ctx->sched.priority = args->value; context_apply_all(ctx, __apply_priority, ctx); return 0; -- cgit v1.2.3 From f8a9a5c2e9058bcfc3a3d5b444d10fd8f20cb29e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:20 -0500 Subject: drm/i915: Add gem/i915_gem_context.h to the docs In order to prevent kernel doc warnings, also fill out docs for any missing fields and fix those that forgot the "@". Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-16-jason@jlekstrand.net --- Documentation/gpu/i915.rst | 2 ++ drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 43 +++++++++++++++++++---- 2 files changed, 38 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index e6fd9608e9c6..204ebdaadb45 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -422,6 +422,8 @@ Batchbuffer Parsing User Batchbuffer Execution -------------------------- +.. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_context_types.h + .. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c :doc: User command execution diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index df76767f0c41..5f0673a2129f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -30,19 +30,39 @@ struct i915_address_space; struct intel_timeline; struct intel_ring; +/** + * struct i915_gem_engines - A set of engines + */ struct i915_gem_engines { union { + /** @link: Link in i915_gem_context::stale::engines */ struct list_head link; + + /** @rcu: RCU to use when freeing */ struct rcu_head rcu; }; + + /** @fence: Fence used for delayed destruction of engines */ struct i915_sw_fence fence; + + /** @ctx: i915_gem_context backpointer */ struct i915_gem_context *ctx; + + /** @num_engines: Number of engines in this set */ unsigned int num_engines; + + /** @engines: Array of engines */ struct intel_context *engines[]; }; +/** + * struct i915_gem_engines_iter - Iterator for an i915_gem_engines set + */ struct i915_gem_engines_iter { + /** @idx: Index into i915_gem_engines::engines */ unsigned int idx; + + /** @engines: Engine set being iterated */ const struct i915_gem_engines *engines; }; @@ -53,10 +73,10 @@ struct i915_gem_engines_iter { * logical hardware state for a particular client. */ struct i915_gem_context { - /** i915: i915 device backpointer */ + /** @i915: i915 device backpointer */ struct drm_i915_private *i915; - /** file_priv: owning file descriptor */ + /** @file_priv: owning file descriptor */ struct drm_i915_file_private *file_priv; /** @@ -81,7 +101,9 @@ struct i915_gem_context { * CONTEXT_USER_ENGINES flag is set). */ struct i915_gem_engines __rcu *engines; - struct mutex engines_mutex; /* guards writes to engines */ + + /** @engines_mutex: guards writes to engines */ + struct mutex engines_mutex; /** * @syncobj: Shared timeline syncobj @@ -118,7 +140,7 @@ struct i915_gem_context { */ struct pid *pid; - /** link: place with &drm_i915_private.context_list */ + /** @link: place with &drm_i915_private.context_list */ struct list_head link; /** @@ -153,11 +175,13 @@ struct i915_gem_context { #define CONTEXT_CLOSED 0 #define CONTEXT_USER_ENGINES 1 + /** @mutex: guards everything that isn't engines or handles_vma */ struct mutex mutex; + /** @sched: scheduler parameters */ struct i915_sched_attr sched; - /** guilty_count: How many times this context has caused a GPU hang. */ + /** @guilty_count: How many times this context has caused a GPU hang. */ atomic_t guilty_count; /** * @active_count: How many times this context was active during a GPU @@ -171,15 +195,17 @@ struct i915_gem_context { unsigned long hang_timestamp[2]; #define CONTEXT_FAST_HANG_JIFFIES (120 * HZ) /* 3 hangs within 120s? Banned! */ - /** remap_slice: Bitmask of cache lines that need remapping */ + /** @remap_slice: Bitmask of cache lines that need remapping */ u8 remap_slice; /** - * handles_vma: rbtree to look up our context specific obj/vma for + * @handles_vma: rbtree to look up our context specific obj/vma for * the user handle. (user handles are per fd, but the binding is * per vm, which may be one per context or shared with the global GTT) */ struct radix_tree_root handles_vma; + + /** @lut_mutex: Locks handles_vma */ struct mutex lut_mutex; /** @@ -191,8 +217,11 @@ struct i915_gem_context { */ char name[TASK_COMM_LEN + 8]; + /** @stale: tracks stale engines to be destroyed */ struct { + /** @lock: guards engines */ spinlock_t lock; + /** @engines: list of stale engines */ struct list_head engines; } stale; }; -- cgit v1.2.3 From a34857dc92475a926cbfbebcba8fb1f1b1c6056b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:21 -0500 Subject: drm/i915/gem: Add an intermediate proto_context struct (v5) The current context uAPI allows for two methods of setting context parameters: SET_CONTEXT_PARAM and CONTEXT_CREATE_EXT_SETPARAM. The former is allowed to be called at any time while the later happens as part of GEM_CONTEXT_CREATE. Currently, everything settable via one is settable via the other. While some params are fairly simple and setting them on a live context is harmless such the context priority, others are far trickier such as the VM or the set of engines. In order to swap out the VM, for instance, we have to delay until all current in-flight work is complete, swap in the new VM, and then continue. This leads to a plethora of potential race conditions we'd really rather avoid. Unfortunately, both methods of setting the VM and the engine set are in active use today so we can't simply disallow setting the VM or engine set vial SET_CONTEXT_PARAM. In order to work around this wart, this commit adds a proto-context struct which contains all the context create parameters. v2 (Daniel Vetter): - Better commit message - Use __set/clear_bit instead of set/clear_bit because there's no race and we don't need the atomics v3 (Daniel Vetter): - Use manual bitops and BIT() instead of __set_bit v4 (Daniel Vetter): - Add a changelog to the commit message - Better hyperlinking in docs - Create the default PPGTT in i915_gem_create_context v5 (Daniel Vetter): - Hand-roll the initialization of UCONTEXT_PERSISTENCE Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-17-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 84 +++++++++++++++++++---- drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 22 ++++++ drivers/gpu/drm/i915/gem/selftests/mock_context.c | 16 ++++- 3 files changed, 105 insertions(+), 17 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index f9a6eac78c0a..741624da8db7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -191,6 +191,43 @@ static int validate_priority(struct drm_i915_private *i915, return 0; } +static void proto_context_close(struct i915_gem_proto_context *pc) +{ + if (pc->vm) + i915_vm_put(pc->vm); + kfree(pc); +} + +static struct i915_gem_proto_context * +proto_context_create(struct drm_i915_private *i915, unsigned int flags) +{ + struct i915_gem_proto_context *pc, *err; + + pc = kzalloc(sizeof(*pc), GFP_KERNEL); + if (!pc) + return ERR_PTR(-ENOMEM); + + pc->user_flags = BIT(UCONTEXT_BANNABLE) | + BIT(UCONTEXT_RECOVERABLE); + if (i915->params.enable_hangcheck) + pc->user_flags |= BIT(UCONTEXT_PERSISTENCE); + pc->sched.priority = I915_PRIORITY_NORMAL; + + if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) { + if (!HAS_EXECLISTS(i915)) { + err = ERR_PTR(-EINVAL); + goto proto_close; + } + pc->single_timeline = true; + } + + return pc; + +proto_close: + proto_context_close(pc); + return err; +} + static struct i915_address_space * context_get_vm_rcu(struct i915_gem_context *ctx) { @@ -660,7 +697,8 @@ static int __context_set_persistence(struct i915_gem_context *ctx, bool state) } static struct i915_gem_context * -__create_context(struct drm_i915_private *i915) +__create_context(struct drm_i915_private *i915, + const struct i915_gem_proto_context *pc) { struct i915_gem_context *ctx; struct i915_gem_engines *e; @@ -673,7 +711,7 @@ __create_context(struct drm_i915_private *i915) kref_init(&ctx->ref); ctx->i915 = i915; - ctx->sched.priority = I915_PRIORITY_NORMAL; + ctx->sched = pc->sched; mutex_init(&ctx->mutex); INIT_LIST_HEAD(&ctx->link); @@ -696,9 +734,7 @@ __create_context(struct drm_i915_private *i915) * is no remap info, it will be a NOP. */ ctx->remap_slice = ALL_L3_SLICES(i915); - i915_gem_context_set_bannable(ctx); - i915_gem_context_set_recoverable(ctx); - __context_set_persistence(ctx, true /* cgroup hook? */); + ctx->user_flags = pc->user_flags; for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; @@ -786,20 +822,22 @@ static void __assign_ppgtt(struct i915_gem_context *ctx, } static struct i915_gem_context * -i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) +i915_gem_create_context(struct drm_i915_private *i915, + const struct i915_gem_proto_context *pc) { struct i915_gem_context *ctx; int ret; - if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE && - !HAS_EXECLISTS(i915)) - return ERR_PTR(-EINVAL); - - ctx = __create_context(i915); + ctx = __create_context(i915, pc); if (IS_ERR(ctx)) return ctx; - if (HAS_FULL_PPGTT(i915)) { + if (pc->vm) { + /* __assign_ppgtt() requires this mutex to be held */ + mutex_lock(&ctx->mutex); + __assign_ppgtt(ctx, pc->vm); + mutex_unlock(&ctx->mutex); + } else if (HAS_FULL_PPGTT(i915)) { struct i915_ppgtt *ppgtt; ppgtt = i915_ppgtt_create(&i915->gt); @@ -810,14 +848,16 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) return ERR_CAST(ppgtt); } + /* __assign_ppgtt() requires this mutex to be held */ mutex_lock(&ctx->mutex); __assign_ppgtt(ctx, &ppgtt->vm); mutex_unlock(&ctx->mutex); + /* __assign_ppgtt() takes another reference for us */ i915_vm_put(&ppgtt->vm); } - if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) { + if (pc->single_timeline) { ret = drm_syncobj_create(&ctx->syncobj, DRM_SYNCOBJ_CREATE_SIGNALED, NULL); @@ -883,6 +923,7 @@ int i915_gem_context_open(struct drm_i915_private *i915, struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; + struct i915_gem_proto_context *pc; struct i915_gem_context *ctx; int err; u32 id; @@ -892,7 +933,14 @@ int i915_gem_context_open(struct drm_i915_private *i915, /* 0 reserved for invalid/unassigned ppgtt */ xa_init_flags(&file_priv->vm_xa, XA_FLAGS_ALLOC1); - ctx = i915_gem_create_context(i915, 0); + pc = proto_context_create(i915, 0); + if (IS_ERR(pc)) { + err = PTR_ERR(pc); + goto err; + } + + ctx = i915_gem_create_context(i915, pc); + proto_context_close(pc); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto err; @@ -1884,6 +1932,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, { struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_gem_context_create_ext *args = data; + struct i915_gem_proto_context *pc; struct create_ext ext_data; int ret; u32 id; @@ -1906,7 +1955,12 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, return -EIO; } - ext_data.ctx = i915_gem_create_context(i915, args->flags); + pc = proto_context_create(i915, args->flags); + if (IS_ERR(pc)) + return PTR_ERR(pc); + + ext_data.ctx = i915_gem_create_context(i915, pc); + proto_context_close(pc); if (IS_ERR(ext_data.ctx)) return PTR_ERR(ext_data.ctx); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 5f0673a2129f..e0bdf3e298a6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -66,6 +66,28 @@ struct i915_gem_engines_iter { const struct i915_gem_engines *engines; }; +/** + * struct i915_gem_proto_context - prototype context + * + * The struct i915_gem_proto_context represents the creation parameters for + * a struct i915_gem_context. This is used to gather parameters provided + * either through creation flags or via SET_CONTEXT_PARAM so that, when we + * create the final i915_gem_context, those parameters can be immutable. + */ +struct i915_gem_proto_context { + /** @vm: See &i915_gem_context.vm */ + struct i915_address_space *vm; + + /** @user_flags: See &i915_gem_context.user_flags */ + unsigned long user_flags; + + /** @sched: See &i915_gem_context.sched */ + struct i915_sched_attr sched; + + /** @single_timeline: See See &i915_gem_context.syncobj */ + bool single_timeline; +}; + /** * struct i915_gem_context - client state * diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index 51b5a3421b40..e0f512ef7f3c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -80,11 +80,17 @@ void mock_init_contexts(struct drm_i915_private *i915) struct i915_gem_context * live_context(struct drm_i915_private *i915, struct file *file) { + struct i915_gem_proto_context *pc; struct i915_gem_context *ctx; int err; u32 id; - ctx = i915_gem_create_context(i915, 0); + pc = proto_context_create(i915, 0); + if (IS_ERR(pc)) + return ERR_CAST(pc); + + ctx = i915_gem_create_context(i915, pc); + proto_context_close(pc); if (IS_ERR(ctx)) return ctx; @@ -142,8 +148,14 @@ struct i915_gem_context * kernel_context(struct drm_i915_private *i915) { struct i915_gem_context *ctx; + struct i915_gem_proto_context *pc; + + pc = proto_context_create(i915, 0); + if (IS_ERR(pc)) + return ERR_CAST(pc); - ctx = i915_gem_create_context(i915, 0); + ctx = i915_gem_create_context(i915, pc); + proto_context_close(pc); if (IS_ERR(ctx)) return ctx; -- cgit v1.2.3 From 07a635a825e6649f7c6dbea55e2a0557c30f1a73 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:22 -0500 Subject: drm/i915/gem: Rework error handling in default_engines Since free_engines works for partially constructed engine sets, we can use the usual goto pattern. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-18-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 741624da8db7..5b75f98274b9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -366,7 +366,7 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) { const struct intel_gt *gt = &ctx->i915->gt; struct intel_engine_cs *engine; - struct i915_gem_engines *e; + struct i915_gem_engines *e, *err; enum intel_engine_id id; e = alloc_engines(I915_NUM_ENGINES); @@ -384,18 +384,21 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) ce = intel_context_create(engine); if (IS_ERR(ce)) { - __free_engines(e, e->num_engines + 1); - return ERR_CAST(ce); + err = ERR_CAST(ce); + goto free_engines; } intel_context_set_gem(ce, ctx); e->engines[engine->legacy_idx] = ce; - e->num_engines = max(e->num_engines, engine->legacy_idx); + e->num_engines = max(e->num_engines, engine->legacy_idx + 1); } - e->num_engines++; return e; + +free_engines: + free_engines(e); + return err; } void i915_gem_context_release(struct kref *ref) -- cgit v1.2.3 From 263ae12c3c8de253ebd33b99518297877d1892c5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:23 -0500 Subject: drm/i915/gem: Optionally set SSEU in intel_context_set_gem For now this is a no-op because everyone passes in a null SSEU but it lets us get some of the error handling and selftest refactoring plumbed through. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-19-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 41 ++++++++++++++++++----- drivers/gpu/drm/i915/gem/selftests/mock_context.c | 6 ++-- 2 files changed, 36 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 5b75f98274b9..206721dccd24 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -266,9 +266,12 @@ context_get_vm_rcu(struct i915_gem_context *ctx) } while (1); } -static void intel_context_set_gem(struct intel_context *ce, - struct i915_gem_context *ctx) +static int intel_context_set_gem(struct intel_context *ce, + struct i915_gem_context *ctx, + struct intel_sseu sseu) { + int ret = 0; + GEM_BUG_ON(rcu_access_pointer(ce->gem_context)); RCU_INIT_POINTER(ce->gem_context, ctx); @@ -295,6 +298,12 @@ static void intel_context_set_gem(struct intel_context *ce, intel_context_set_watchdog_us(ce, (u64)timeout_ms * 1000); } + + /* A valid SSEU has no zero fields */ + if (sseu.slice_mask && !WARN_ON(ce->engine->class != RENDER_CLASS)) + ret = intel_context_reconfigure_sseu(ce, sseu); + + return ret; } static void __free_engines(struct i915_gem_engines *e, unsigned int count) @@ -362,7 +371,8 @@ static struct i915_gem_engines *alloc_engines(unsigned int count) return e; } -static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) +static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx, + struct intel_sseu rcs_sseu) { const struct intel_gt *gt = &ctx->i915->gt; struct intel_engine_cs *engine; @@ -375,6 +385,8 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) for_each_engine(engine, gt, id) { struct intel_context *ce; + struct intel_sseu sseu = {}; + int ret; if (engine->legacy_idx == INVALID_ENGINE) continue; @@ -388,10 +400,18 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) goto free_engines; } - intel_context_set_gem(ce, ctx); - e->engines[engine->legacy_idx] = ce; e->num_engines = max(e->num_engines, engine->legacy_idx + 1); + + if (engine->class == RENDER_CLASS) + sseu = rcs_sseu; + + ret = intel_context_set_gem(ce, ctx, sseu); + if (ret) { + err = ERR_PTR(ret); + goto free_engines; + } + } return e; @@ -705,6 +725,7 @@ __create_context(struct drm_i915_private *i915, { struct i915_gem_context *ctx; struct i915_gem_engines *e; + struct intel_sseu null_sseu = {}; int err; int i; @@ -722,7 +743,7 @@ __create_context(struct drm_i915_private *i915, INIT_LIST_HEAD(&ctx->stale.engines); mutex_init(&ctx->engines_mutex); - e = default_engines(ctx); + e = default_engines(ctx, null_sseu); if (IS_ERR(e)) { err = PTR_ERR(e); goto err_free; @@ -1508,6 +1529,7 @@ set_engines__load_balance(struct i915_user_extension __user *base, void *data) struct intel_engine_cs *stack[16]; struct intel_engine_cs **siblings; struct intel_context *ce; + struct intel_sseu null_sseu = {}; u16 num_siblings, idx; unsigned int n; int err; @@ -1580,7 +1602,7 @@ set_engines__load_balance(struct i915_user_extension __user *base, void *data) goto out_siblings; } - intel_context_set_gem(ce, set->ctx); + intel_context_set_gem(ce, set->ctx, null_sseu); if (cmpxchg(&set->engines->engines[idx], NULL, ce)) { intel_context_put(ce); @@ -1688,6 +1710,7 @@ set_engines(struct i915_gem_context *ctx, struct drm_i915_private *i915 = ctx->i915; struct i915_context_param_engines __user *user = u64_to_user_ptr(args->value); + struct intel_sseu null_sseu = {}; struct set_engines set = { .ctx = ctx }; unsigned int num_engines, n; u64 extensions; @@ -1697,7 +1720,7 @@ set_engines(struct i915_gem_context *ctx, if (!i915_gem_context_user_engines(ctx)) return 0; - set.engines = default_engines(ctx); + set.engines = default_engines(ctx, null_sseu); if (IS_ERR(set.engines)) return PTR_ERR(set.engines); @@ -1754,7 +1777,7 @@ set_engines(struct i915_gem_context *ctx, return PTR_ERR(ce); } - intel_context_set_gem(ce, ctx); + intel_context_set_gem(ce, ctx, null_sseu); set.engines->engines[n] = ce; } diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index e0f512ef7f3c..cbeefd060e97 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -14,6 +14,7 @@ mock_context(struct drm_i915_private *i915, { struct i915_gem_context *ctx; struct i915_gem_engines *e; + struct intel_sseu null_sseu = {}; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) @@ -31,7 +32,7 @@ mock_context(struct drm_i915_private *i915, i915_gem_context_set_persistence(ctx); mutex_init(&ctx->engines_mutex); - e = default_engines(ctx); + e = default_engines(ctx, null_sseu); if (IS_ERR(e)) goto err_free; RCU_INIT_POINTER(ctx->engines, e); @@ -112,6 +113,7 @@ live_context_for_engine(struct intel_engine_cs *engine, struct file *file) { struct i915_gem_engines *engines; struct i915_gem_context *ctx; + struct intel_sseu null_sseu = {}; struct intel_context *ce; engines = alloc_engines(1); @@ -130,7 +132,7 @@ live_context_for_engine(struct intel_engine_cs *engine, struct file *file) return ERR_CAST(ce); } - intel_context_set_gem(ce, ctx); + intel_context_set_gem(ce, ctx, null_sseu); engines->engines[0] = ce; engines->num_engines = 1; -- cgit v1.2.3 From bc2ceb7a08758357ed2ff1d876dc9d60bdefb080 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:24 -0500 Subject: drm/i915: Add an i915_gem_vm_lookup helper This is the VM equivalent of i915_gem_context_lookup. It's only used once in this patch but future patches will need to duplicate this lookup code so it's better to have it in a helper. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-20-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 6 +----- drivers/gpu/drm/i915/i915_drv.h | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 206721dccd24..3c59d1e4080c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1311,11 +1311,7 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, if (upper_32_bits(args->value)) return -ENOENT; - rcu_read_lock(); - vm = xa_load(&file_priv->vm_xa, args->value); - if (vm && !kref_get_unless_zero(&vm->ref)) - vm = NULL; - rcu_read_unlock(); + vm = i915_gem_vm_lookup(file_priv, args->value); if (!vm) return -ENOENT; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 91a15100602d..3724bf917e0b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1861,6 +1861,20 @@ i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id) return ctx; } +static inline struct i915_address_space * +i915_gem_vm_lookup(struct drm_i915_file_private *file_priv, u32 id) +{ + struct i915_address_space *vm; + + rcu_read_lock(); + vm = xa_load(&file_priv->vm_xa, id); + if (vm && !kref_get_unless_zero(&vm->ref)) + vm = NULL; + rcu_read_unlock(); + + return vm; +} + /* i915_gem_evict.c */ int __must_check i915_gem_evict_something(struct i915_address_space *vm, u64 min_size, u64 alignment, -- cgit v1.2.3 From def25b7bc342d7eb6643408300127404608d3df2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:25 -0500 Subject: drm/i915/gem: Make an alignment check more sensible What we really want to check is that size of the engines array, i.e. args->size - sizeof(*user) is divisible by the element size, i.e. sizeof(*user->engines) because that's what's required for computing the array length right below the check. However, we're currently not doing this and instead doing a compile-time check that sizeof(*user) is divisible by sizeof(*user->engines) and avoiding the subtraction. As far as I can tell, the only reason for the more confusing pair of checks is to avoid a single subtraction of a constant. The other thing the BUILD_BUG_ON might be trying to implicitly check is that offsetof(user->engines) == sizeof(*user) and we don't have any weird padding throwing us off. However, that's not the check it's doing and it's not even a reliable way to do that check. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-21-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 3c59d1e4080c..f135fbc97c5a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1723,9 +1723,8 @@ set_engines(struct i915_gem_context *ctx, goto replace; } - BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->engines))); if (args->size < sizeof(*user) || - !IS_ALIGNED(args->size, sizeof(*user->engines))) { + !IS_ALIGNED(args->size - sizeof(*user), sizeof(*user->engines))) { drm_dbg(&i915->drm, "Invalid size for engine array: %d\n", args->size); return -EINVAL; -- cgit v1.2.3 From d4433c7600f794623d6802395542cf4ca4f1b1f9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:26 -0500 Subject: drm/i915/gem: Use the proto-context to handle create parameters (v5) This means that the proto-context needs to grow support for engine configuration information as well as setparam logic. Fortunately, we'll be deleting a lot of setparam logic on the primary context shortly so it will hopefully balance out. There's an extra bit of fun here when it comes to setting SSEU and the way it interacts with PARAM_ENGINES. Unfortunately, thanks to SET_CONTEXT_PARAM and not being allowed to pick the order in which we handle certain parameters, we have think about those interactions. v2 (Daniel Vetter): - Add a proto_context_free_user_engines helper - Comment on SSEU in the commit message - Use proto_context_set_persistence in set_proto_ctx_param v3 (Daniel Vetter): - Fix a doc comment - Do an explicit HAS_FULL_PPGTT check in set_proto_ctx_vm instead of relying on pc->vm != NULL. - Handle errors for CONTEXT_PARAM_PERSISTENCE - Don't allow more resetting user engines - Rework initialization of UCONTEXT_PERSISTENCE v4 (Jason Ekstrand): - Move hand-rolled initialization of UCONTEXT_PERSISTENCE to an earlier patch v5 (Jason Ekstrand): - Move proto_context_set_persistence to this patch Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-22-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 577 +++++++++++++++++++++- drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 58 +++ 2 files changed, 618 insertions(+), 17 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index f135fbc97c5a..4972b8c91d94 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -193,11 +193,59 @@ static int validate_priority(struct drm_i915_private *i915, static void proto_context_close(struct i915_gem_proto_context *pc) { + int i; + if (pc->vm) i915_vm_put(pc->vm); + if (pc->user_engines) { + for (i = 0; i < pc->num_user_engines; i++) + kfree(pc->user_engines[i].siblings); + kfree(pc->user_engines); + } kfree(pc); } +static int proto_context_set_persistence(struct drm_i915_private *i915, + struct i915_gem_proto_context *pc, + bool persist) +{ + if (persist) { + /* + * Only contexts that are short-lived [that will expire or be + * reset] are allowed to survive past termination. We require + * hangcheck to ensure that the persistent requests are healthy. + */ + if (!i915->params.enable_hangcheck) + return -EINVAL; + + pc->user_flags |= BIT(UCONTEXT_PERSISTENCE); + } else { + /* To cancel a context we use "preempt-to-idle" */ + if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) + return -ENODEV; + + /* + * If the cancel fails, we then need to reset, cleanly! + * + * If the per-engine reset fails, all hope is lost! We resort + * to a full GPU reset in that unlikely case, but realistically + * if the engine could not reset, the full reset does not fare + * much better. The damage has been done. + * + * However, if we cannot reset an engine by itself, we cannot + * cleanup a hanging persistent context without causing + * colateral damage, and we should not pretend we can by + * exposing the interface. + */ + if (!intel_has_reset_engine(&i915->gt)) + return -ENODEV; + + pc->user_flags &= ~BIT(UCONTEXT_PERSISTENCE); + } + + return 0; +} + static struct i915_gem_proto_context * proto_context_create(struct drm_i915_private *i915, unsigned int flags) { @@ -207,6 +255,8 @@ proto_context_create(struct drm_i915_private *i915, unsigned int flags) if (!pc) return ERR_PTR(-ENOMEM); + pc->num_user_engines = -1; + pc->user_engines = NULL; pc->user_flags = BIT(UCONTEXT_BANNABLE) | BIT(UCONTEXT_RECOVERABLE); if (i915->params.enable_hangcheck) @@ -228,6 +278,430 @@ proto_close: return err; } +static int set_proto_ctx_vm(struct drm_i915_file_private *fpriv, + struct i915_gem_proto_context *pc, + const struct drm_i915_gem_context_param *args) +{ + struct drm_i915_private *i915 = fpriv->dev_priv; + struct i915_address_space *vm; + + if (args->size) + return -EINVAL; + + if (!HAS_FULL_PPGTT(i915)) + return -ENODEV; + + if (upper_32_bits(args->value)) + return -ENOENT; + + vm = i915_gem_vm_lookup(fpriv, args->value); + if (!vm) + return -ENOENT; + + if (pc->vm) + i915_vm_put(pc->vm); + pc->vm = vm; + + return 0; +} + +struct set_proto_ctx_engines { + struct drm_i915_private *i915; + unsigned num_engines; + struct i915_gem_proto_engine *engines; +}; + +static int +set_proto_ctx_engines_balance(struct i915_user_extension __user *base, + void *data) +{ + struct i915_context_engines_load_balance __user *ext = + container_of_user(base, typeof(*ext), base); + const struct set_proto_ctx_engines *set = data; + struct drm_i915_private *i915 = set->i915; + struct intel_engine_cs **siblings; + u16 num_siblings, idx; + unsigned int n; + int err; + + if (!HAS_EXECLISTS(i915)) + return -ENODEV; + + if (intel_uc_uses_guc_submission(&i915->gt.uc)) + return -ENODEV; /* not implement yet */ + + if (get_user(idx, &ext->engine_index)) + return -EFAULT; + + if (idx >= set->num_engines) { + drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", + idx, set->num_engines); + return -EINVAL; + } + + idx = array_index_nospec(idx, set->num_engines); + if (set->engines[idx].type != I915_GEM_ENGINE_TYPE_INVALID) { + drm_dbg(&i915->drm, + "Invalid placement[%d], already occupied\n", idx); + return -EEXIST; + } + + if (get_user(num_siblings, &ext->num_siblings)) + return -EFAULT; + + err = check_user_mbz(&ext->flags); + if (err) + return err; + + err = check_user_mbz(&ext->mbz64); + if (err) + return err; + + if (num_siblings == 0) + return 0; + + siblings = kmalloc_array(num_siblings, sizeof(*siblings), GFP_KERNEL); + if (!siblings) + return -ENOMEM; + + for (n = 0; n < num_siblings; n++) { + struct i915_engine_class_instance ci; + + if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) { + err = -EFAULT; + goto err_siblings; + } + + siblings[n] = intel_engine_lookup_user(i915, + ci.engine_class, + ci.engine_instance); + if (!siblings[n]) { + drm_dbg(&i915->drm, + "Invalid sibling[%d]: { class:%d, inst:%d }\n", + n, ci.engine_class, ci.engine_instance); + err = -EINVAL; + goto err_siblings; + } + } + + if (num_siblings == 1) { + set->engines[idx].type = I915_GEM_ENGINE_TYPE_PHYSICAL; + set->engines[idx].engine = siblings[0]; + kfree(siblings); + } else { + set->engines[idx].type = I915_GEM_ENGINE_TYPE_BALANCED; + set->engines[idx].num_siblings = num_siblings; + set->engines[idx].siblings = siblings; + } + + return 0; + +err_siblings: + kfree(siblings); + + return err; +} + +static int +set_proto_ctx_engines_bond(struct i915_user_extension __user *base, void *data) +{ + struct i915_context_engines_bond __user *ext = + container_of_user(base, typeof(*ext), base); + const struct set_proto_ctx_engines *set = data; + struct drm_i915_private *i915 = set->i915; + struct i915_engine_class_instance ci; + struct intel_engine_cs *master; + u16 idx, num_bonds; + int err, n; + + if (get_user(idx, &ext->virtual_index)) + return -EFAULT; + + if (idx >= set->num_engines) { + drm_dbg(&i915->drm, + "Invalid index for virtual engine: %d >= %d\n", + idx, set->num_engines); + return -EINVAL; + } + + idx = array_index_nospec(idx, set->num_engines); + if (set->engines[idx].type == I915_GEM_ENGINE_TYPE_INVALID) { + drm_dbg(&i915->drm, "Invalid engine at %d\n", idx); + return -EINVAL; + } + + if (set->engines[idx].type != I915_GEM_ENGINE_TYPE_PHYSICAL) { + drm_dbg(&i915->drm, + "Bonding with virtual engines not allowed\n"); + return -EINVAL; + } + + err = check_user_mbz(&ext->flags); + if (err) + return err; + + for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) { + err = check_user_mbz(&ext->mbz64[n]); + if (err) + return err; + } + + if (copy_from_user(&ci, &ext->master, sizeof(ci))) + return -EFAULT; + + master = intel_engine_lookup_user(i915, + ci.engine_class, + ci.engine_instance); + if (!master) { + drm_dbg(&i915->drm, + "Unrecognised master engine: { class:%u, instance:%u }\n", + ci.engine_class, ci.engine_instance); + return -EINVAL; + } + + if (get_user(num_bonds, &ext->num_bonds)) + return -EFAULT; + + for (n = 0; n < num_bonds; n++) { + struct intel_engine_cs *bond; + + if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) + return -EFAULT; + + bond = intel_engine_lookup_user(i915, + ci.engine_class, + ci.engine_instance); + if (!bond) { + drm_dbg(&i915->drm, + "Unrecognised engine[%d] for bonding: { class:%d, instance: %d }\n", + n, ci.engine_class, ci.engine_instance); + return -EINVAL; + } + } + + return 0; +} + +static const i915_user_extension_fn set_proto_ctx_engines_extensions[] = { + [I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_proto_ctx_engines_balance, + [I915_CONTEXT_ENGINES_EXT_BOND] = set_proto_ctx_engines_bond, +}; + +static int set_proto_ctx_engines(struct drm_i915_file_private *fpriv, + struct i915_gem_proto_context *pc, + const struct drm_i915_gem_context_param *args) +{ + struct drm_i915_private *i915 = fpriv->dev_priv; + struct set_proto_ctx_engines set = { .i915 = i915 }; + struct i915_context_param_engines __user *user = + u64_to_user_ptr(args->value); + unsigned int n; + u64 extensions; + int err; + + if (pc->num_user_engines >= 0) { + drm_dbg(&i915->drm, "Cannot set engines twice"); + return -EINVAL; + } + + if (args->size < sizeof(*user) || + !IS_ALIGNED(args->size - sizeof(*user), sizeof(*user->engines))) { + drm_dbg(&i915->drm, "Invalid size for engine array: %d\n", + args->size); + return -EINVAL; + } + + set.num_engines = (args->size - sizeof(*user)) / sizeof(*user->engines); + /* RING_MASK has no shift so we can use it directly here */ + if (set.num_engines > I915_EXEC_RING_MASK + 1) + return -EINVAL; + + set.engines = kmalloc_array(set.num_engines, sizeof(*set.engines), GFP_KERNEL); + if (!set.engines) + return -ENOMEM; + + for (n = 0; n < set.num_engines; n++) { + struct i915_engine_class_instance ci; + struct intel_engine_cs *engine; + + if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) { + kfree(set.engines); + return -EFAULT; + } + + memset(&set.engines[n], 0, sizeof(set.engines[n])); + + if (ci.engine_class == (u16)I915_ENGINE_CLASS_INVALID && + ci.engine_instance == (u16)I915_ENGINE_CLASS_INVALID_NONE) + continue; + + engine = intel_engine_lookup_user(i915, + ci.engine_class, + ci.engine_instance); + if (!engine) { + drm_dbg(&i915->drm, + "Invalid engine[%d]: { class:%d, instance:%d }\n", + n, ci.engine_class, ci.engine_instance); + kfree(set.engines); + return -ENOENT; + } + + set.engines[n].type = I915_GEM_ENGINE_TYPE_PHYSICAL; + set.engines[n].engine = engine; + } + + err = -EFAULT; + if (!get_user(extensions, &user->extensions)) + err = i915_user_extensions(u64_to_user_ptr(extensions), + set_proto_ctx_engines_extensions, + ARRAY_SIZE(set_proto_ctx_engines_extensions), + &set); + if (err) { + kfree(set.engines); + return err; + } + + pc->num_user_engines = set.num_engines; + pc->user_engines = set.engines; + + return 0; +} + +static int set_proto_ctx_sseu(struct drm_i915_file_private *fpriv, + struct i915_gem_proto_context *pc, + struct drm_i915_gem_context_param *args) +{ + struct drm_i915_private *i915 = fpriv->dev_priv; + struct drm_i915_gem_context_param_sseu user_sseu; + struct intel_sseu *sseu; + int ret; + + if (args->size < sizeof(user_sseu)) + return -EINVAL; + + if (GRAPHICS_VER(i915) != 11) + return -ENODEV; + + if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value), + sizeof(user_sseu))) + return -EFAULT; + + if (user_sseu.rsvd) + return -EINVAL; + + if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)) + return -EINVAL; + + if (!!(user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX) != (pc->num_user_engines >= 0)) + return -EINVAL; + + if (pc->num_user_engines >= 0) { + int idx = user_sseu.engine.engine_instance; + struct i915_gem_proto_engine *pe; + + if (idx >= pc->num_user_engines) + return -EINVAL; + + pe = &pc->user_engines[idx]; + + /* Only render engine supports RPCS configuration. */ + if (pe->engine->class != RENDER_CLASS) + return -EINVAL; + + sseu = &pe->sseu; + } else { + /* Only render engine supports RPCS configuration. */ + if (user_sseu.engine.engine_class != I915_ENGINE_CLASS_RENDER) + return -EINVAL; + + /* There is only one render engine */ + if (user_sseu.engine.engine_instance != 0) + return -EINVAL; + + sseu = &pc->legacy_rcs_sseu; + } + + ret = i915_gem_user_to_context_sseu(&i915->gt, &user_sseu, sseu); + if (ret) + return ret; + + args->size = sizeof(user_sseu); + + return 0; +} + +static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, + struct i915_gem_proto_context *pc, + struct drm_i915_gem_context_param *args) +{ + int ret = 0; + + switch (args->param) { + case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: + if (args->size) + ret = -EINVAL; + else if (args->value) + pc->user_flags |= BIT(UCONTEXT_NO_ERROR_CAPTURE); + else + pc->user_flags &= ~BIT(UCONTEXT_NO_ERROR_CAPTURE); + break; + + case I915_CONTEXT_PARAM_BANNABLE: + if (args->size) + ret = -EINVAL; + else if (!capable(CAP_SYS_ADMIN) && !args->value) + ret = -EPERM; + else if (args->value) + pc->user_flags |= BIT(UCONTEXT_BANNABLE); + else + pc->user_flags &= ~BIT(UCONTEXT_BANNABLE); + break; + + case I915_CONTEXT_PARAM_RECOVERABLE: + if (args->size) + ret = -EINVAL; + else if (args->value) + pc->user_flags |= BIT(UCONTEXT_RECOVERABLE); + else + pc->user_flags &= ~BIT(UCONTEXT_RECOVERABLE); + break; + + case I915_CONTEXT_PARAM_PRIORITY: + ret = validate_priority(fpriv->dev_priv, args); + if (!ret) + pc->sched.priority = args->value; + break; + + case I915_CONTEXT_PARAM_SSEU: + ret = set_proto_ctx_sseu(fpriv, pc, args); + break; + + case I915_CONTEXT_PARAM_VM: + ret = set_proto_ctx_vm(fpriv, pc, args); + break; + + case I915_CONTEXT_PARAM_ENGINES: + ret = set_proto_ctx_engines(fpriv, pc, args); + break; + + case I915_CONTEXT_PARAM_PERSISTENCE: + if (args->size) + ret = -EINVAL; + ret = proto_context_set_persistence(fpriv->dev_priv, pc, + args->value); + break; + + case I915_CONTEXT_PARAM_NO_ZEROMAP: + case I915_CONTEXT_PARAM_BAN_PERIOD: + case I915_CONTEXT_PARAM_RINGSIZE: + default: + ret = -EINVAL; + break; + } + + return ret; +} + static struct i915_address_space * context_get_vm_rcu(struct i915_gem_context *ctx) { @@ -421,6 +895,56 @@ free_engines: return err; } +static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx, + unsigned int num_engines, + struct i915_gem_proto_engine *pe) +{ + struct i915_gem_engines *e, *err; + unsigned int n; + + e = alloc_engines(num_engines); + for (n = 0; n < num_engines; n++) { + struct intel_context *ce; + int ret; + + switch (pe[n].type) { + case I915_GEM_ENGINE_TYPE_PHYSICAL: + ce = intel_context_create(pe[n].engine); + break; + + case I915_GEM_ENGINE_TYPE_BALANCED: + ce = intel_execlists_create_virtual(pe[n].siblings, + pe[n].num_siblings); + break; + + case I915_GEM_ENGINE_TYPE_INVALID: + default: + GEM_WARN_ON(pe[n].type != I915_GEM_ENGINE_TYPE_INVALID); + continue; + } + + if (IS_ERR(ce)) { + err = ERR_CAST(ce); + goto free_engines; + } + + e->engines[n] = ce; + + ret = intel_context_set_gem(ce, ctx, pe->sseu); + if (ret) { + err = ERR_PTR(ret); + goto free_engines; + } + } + e->num_engines = num_engines; + + return e; + +free_engines: + free_engines(e); + return err; +} + void i915_gem_context_release(struct kref *ref) { struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref); @@ -725,7 +1249,6 @@ __create_context(struct drm_i915_private *i915, { struct i915_gem_context *ctx; struct i915_gem_engines *e; - struct intel_sseu null_sseu = {}; int err; int i; @@ -743,7 +1266,7 @@ __create_context(struct drm_i915_private *i915, INIT_LIST_HEAD(&ctx->stale.engines); mutex_init(&ctx->engines_mutex); - e = default_engines(ctx, null_sseu); + e = default_engines(ctx, pc->legacy_rcs_sseu); if (IS_ERR(e)) { err = PTR_ERR(e); goto err_free; @@ -881,6 +1404,24 @@ i915_gem_create_context(struct drm_i915_private *i915, i915_vm_put(&ppgtt->vm); } + if (pc->num_user_engines >= 0) { + struct i915_gem_engines *engines; + + engines = user_engines(ctx, pc->num_user_engines, + pc->user_engines); + if (IS_ERR(engines)) { + context_close(ctx); + return ERR_CAST(engines); + } + + mutex_lock(&ctx->engines_mutex); + i915_gem_context_set_user_engines(ctx); + engines = rcu_replace_pointer(ctx->engines, engines, 1); + mutex_unlock(&ctx->engines_mutex); + + free_engines(engines); + } + if (pc->single_timeline) { ret = drm_syncobj_create(&ctx->syncobj, DRM_SYNCOBJ_CREATE_SIGNALED, @@ -1915,7 +2456,7 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, } struct create_ext { - struct i915_gem_context *ctx; + struct i915_gem_proto_context *pc; struct drm_i915_file_private *fpriv; }; @@ -1930,7 +2471,7 @@ static int create_setparam(struct i915_user_extension __user *ext, void *data) if (local.param.ctx_id) return -EINVAL; - return ctx_setparam(arg->fpriv, arg->ctx, &local.param); + return set_proto_ctx_param(arg->fpriv, arg->pc, &local.param); } static int invalid_ext(struct i915_user_extension __user *ext, void *data) @@ -1953,7 +2494,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, { struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_gem_context_create_ext *args = data; - struct i915_gem_proto_context *pc; + struct i915_gem_context *ctx; struct create_ext ext_data; int ret; u32 id; @@ -1976,25 +2517,27 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, return -EIO; } - pc = proto_context_create(i915, args->flags); - if (IS_ERR(pc)) - return PTR_ERR(pc); - - ext_data.ctx = i915_gem_create_context(i915, pc); - proto_context_close(pc); - if (IS_ERR(ext_data.ctx)) - return PTR_ERR(ext_data.ctx); + ext_data.pc = proto_context_create(i915, args->flags); + if (IS_ERR(ext_data.pc)) + return PTR_ERR(ext_data.pc); if (args->flags & I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS) { ret = i915_user_extensions(u64_to_user_ptr(args->extensions), create_extensions, ARRAY_SIZE(create_extensions), &ext_data); - if (ret) - goto err_ctx; + if (ret) { + proto_context_close(ext_data.pc); + return ret; + } } - ret = gem_context_register(ext_data.ctx, ext_data.fpriv, &id); + ctx = i915_gem_create_context(i915, ext_data.pc); + proto_context_close(ext_data.pc); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ret = gem_context_register(ctx, ext_data.fpriv, &id); if (ret < 0) goto err_ctx; @@ -2004,7 +2547,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, return 0; err_ctx: - context_close(ext_data.ctx); + context_close(ctx); return ret; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index e0bdf3e298a6..a1a117d9ed97 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -66,6 +66,55 @@ struct i915_gem_engines_iter { const struct i915_gem_engines *engines; }; +/** + * enum i915_gem_engine_type - Describes the type of an i915_gem_proto_engine + */ +enum i915_gem_engine_type { + /** @I915_GEM_ENGINE_TYPE_INVALID: An invalid engine */ + I915_GEM_ENGINE_TYPE_INVALID = 0, + + /** @I915_GEM_ENGINE_TYPE_PHYSICAL: A single physical engine */ + I915_GEM_ENGINE_TYPE_PHYSICAL, + + /** @I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set */ + I915_GEM_ENGINE_TYPE_BALANCED, +}; + +/** + * struct i915_gem_proto_engine - prototype engine + * + * This struct describes an engine that a context may contain. Engines + * have three types: + * + * - I915_GEM_ENGINE_TYPE_INVALID: Invalid engines can be created but they + * show up as a NULL in i915_gem_engines::engines[i] and any attempt to + * use them by the user results in -EINVAL. They are also useful during + * proto-context construction because the client may create invalid + * engines and then set them up later as virtual engines. + * + * - I915_GEM_ENGINE_TYPE_PHYSICAL: A single physical engine, described by + * i915_gem_proto_engine::engine. + * + * - I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set, described + * i915_gem_proto_engine::num_siblings and i915_gem_proto_engine::siblings. + */ +struct i915_gem_proto_engine { + /** @type: Type of this engine */ + enum i915_gem_engine_type type; + + /** @engine: Engine, for physical */ + struct intel_engine_cs *engine; + + /** @num_siblings: Number of balanced siblings */ + unsigned int num_siblings; + + /** @siblings: Balanced siblings */ + struct intel_engine_cs **siblings; + + /** @sseu: Client-set SSEU parameters */ + struct intel_sseu sseu; +}; + /** * struct i915_gem_proto_context - prototype context * @@ -84,6 +133,15 @@ struct i915_gem_proto_context { /** @sched: See &i915_gem_context.sched */ struct i915_sched_attr sched; + /** @num_user_engines: Number of user-specified engines or -1 */ + int num_user_engines; + + /** @user_engines: User-specified engines */ + struct i915_gem_proto_engine *user_engines; + + /** @legacy_rcs_sseu: Client-set SSEU parameters for the legacy RCS */ + struct intel_sseu legacy_rcs_sseu; + /** @single_timeline: See See &i915_gem_context.syncobj */ bool single_timeline; }; -- cgit v1.2.3 From 046d1660daee31e49b20df538ca9e2c22bd2bfb3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:27 -0500 Subject: drm/i915/gem: Return an error ptr from context_lookup We're about to start doing lazy context creation which means contexts get created in i915_gem_context_lookup and we may start having more errors than -ENOENT. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-23-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 12 ++++++------ drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 4 ++-- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_perf.c | 4 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 4972b8c91d94..7045e3afa711 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -2636,8 +2636,8 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, int ret = 0; ctx = i915_gem_context_lookup(file_priv, args->ctx_id); - if (!ctx) - return -ENOENT; + if (IS_ERR(ctx)) + return PTR_ERR(ctx); switch (args->param) { case I915_CONTEXT_PARAM_GTT_SIZE: @@ -2705,8 +2705,8 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, int ret; ctx = i915_gem_context_lookup(file_priv, args->ctx_id); - if (!ctx) - return -ENOENT; + if (IS_ERR(ctx)) + return PTR_ERR(ctx); ret = ctx_setparam(file_priv, ctx, args); @@ -2725,8 +2725,8 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, return -EINVAL; ctx = i915_gem_context_lookup(file->driver_priv, args->ctx_id); - if (!ctx) - return -ENOENT; + if (IS_ERR(ctx)) + return PTR_ERR(ctx); /* * We opt for unserialised reads here. This may result in tearing diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 70e19a22a51a..cb493f1fcbe7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -739,8 +739,8 @@ static int eb_select_context(struct i915_execbuffer *eb) struct i915_gem_context *ctx; ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1); - if (unlikely(!ctx)) - return -ENOENT; + if (unlikely(IS_ERR(ctx))) + return PTR_ERR(ctx); eb->gem_context = ctx; if (rcu_access_pointer(ctx->vm)) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3724bf917e0b..be6398318072 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1858,7 +1858,7 @@ i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id) ctx = NULL; rcu_read_unlock(); - return ctx; + return ctx ? ctx : ERR_PTR(-ENOENT); } static inline struct i915_address_space * diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 9f94914958c3..b4ec114a4698 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -3414,10 +3414,10 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, struct drm_i915_file_private *file_priv = file->driver_priv; specific_ctx = i915_gem_context_lookup(file_priv, ctx_handle); - if (!specific_ctx) { + if (IS_ERR(specific_ctx)) { DRM_DEBUG("Failed to look up context with ID %u for opening perf stream\n", ctx_handle); - ret = -ENOENT; + ret = PTR_ERR(specific_ctx); goto err; } } -- cgit v1.2.3 From 8579d37eff4d3ec2987128b156a90bebc3ab5aca Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:28 -0500 Subject: drm/i915/gt: Drop i915_address_space::file (v2) There's a big comment saying how useful it is but no one is using this for anything anymore. It was added in 2bfa996e031b ("drm/i915: Store owning file on the i915_address_space") and used for debugfs at the time as well as telling the difference between the global GTT and a PPGTT. In f6e8aa387171 ("drm/i915: Report the number of closed vma held by each context in debugfs") we removed one use of it by switching to a context walk and comparing with the VM in the context. Finally, VM stats for debugfs were entirely nuked in db80a1294c23 ("drm/i915/gem: Remove per-client stats from debugfs/i915_gem_objects") v2 (Daniel Vetter): - Delete a struct drm_i915_file_private pre-declaration - Add a comment to the commit message about history Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-24-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 9 --------- drivers/gpu/drm/i915/gt/intel_gtt.h | 11 ----------- drivers/gpu/drm/i915/selftests/mock_gtt.c | 1 - 3 files changed, 21 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 7045e3afa711..5a1402544d48 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1453,17 +1453,10 @@ static int gem_context_register(struct i915_gem_context *ctx, u32 *id) { struct drm_i915_private *i915 = ctx->i915; - struct i915_address_space *vm; int ret; ctx->file_priv = fpriv; - mutex_lock(&ctx->mutex); - vm = i915_gem_context_vm(ctx); - if (vm) - WRITE_ONCE(vm->file, fpriv); /* XXX */ - mutex_unlock(&ctx->mutex); - ctx->pid = get_task_pid(current, PIDTYPE_PID); snprintf(ctx->name, sizeof(ctx->name), "%s[%d]", current->comm, pid_nr(ctx->pid)); @@ -1562,8 +1555,6 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); - ppgtt->vm.file = file_priv; - if (args->extensions) { err = i915_user_extensions(u64_to_user_ptr(args->extensions), NULL, 0, diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 9bd89f2a01ff..bc7153018ebd 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -140,7 +140,6 @@ typedef u64 gen8_pte_t; enum i915_cache_level; -struct drm_i915_file_private; struct drm_i915_gem_object; struct i915_fence_reg; struct i915_vma; @@ -220,16 +219,6 @@ struct i915_address_space { struct intel_gt *gt; struct drm_i915_private *i915; struct device *dma; - /* - * Every address space belongs to a struct file - except for the global - * GTT that is owned by the driver (and so @file is set to NULL). In - * principle, no information should leak from one context to another - * (or between files/processes etc) unless explicitly shared by the - * owner. Tracking the owner is important in order to free up per-file - * objects along with the file, to aide resource tracking, and to - * assign blame. - */ - struct drm_i915_file_private *file; u64 total; /* size addr space maps (ex. 2GB for ggtt) */ u64 reserved; /* size addr space reserved */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index 5c7ae40bba63..cc047ec594f9 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -73,7 +73,6 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name) ppgtt->vm.gt = &i915->gt; ppgtt->vm.i915 = i915; ppgtt->vm.total = round_down(U64_MAX, PAGE_SIZE); - ppgtt->vm.file = ERR_PTR(-ENODEV); ppgtt->vm.dma = i915->drm.dev; i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT); -- cgit v1.2.3 From a4c1cdd34e2cda620c9749ae6adec49b4b011d47 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:29 -0500 Subject: drm/i915/gem: Delay context creation (v3) The current context uAPI allows for two methods of setting context parameters: SET_CONTEXT_PARAM and CONTEXT_CREATE_EXT_SETPARAM. The former is allowed to be called at any time while the later happens as part of GEM_CONTEXT_CREATE. Currently, everything settable via one is settable via the other. While some params are fairly simple and setting them on a live context is harmless such as the context priority, others are far trickier such as the VM or the set of engines. In order to swap out the VM, for instance, we have to delay until all current in-flight work is complete, swap in the new VM, and then continue. This leads to a plethora of potential race conditions we'd really rather avoid. In previous patches, we added a i915_gem_proto_context struct which is capable of storing and tracking all such create parameters. This commit delays the creation of the actual context until after the client is done configuring it with SET_CONTEXT_PARAM. From the perspective of the client, it has the same u32 context ID the whole time. From the perspective of i915, however, it's an i915_gem_proto_context right up until the point where we attempt to do something which the proto-context can't handle. Then the real context gets created. This is accomplished via a little xarray dance. When GEM_CONTEXT_CREATE is called, we create a proto-context, reserve a slot in context_xa but leave it NULL, the proto-context in the corresponding slot in proto_context_xa. Then, whenever we go to look up a context, we first check context_xa. If it's there, we return the i915_gem_context and we're done. If it's not, we look in proto_context_xa and, if we find it there, we create the actual context and kill the proto-context. In order for this dance to work properly, everything which ever touches a proto-context is guarded by drm_i915_file_private::proto_context_lock, including context creation. Yes, this means context creation now takes a giant global lock but it can't really be helped and that should never be on any driver's fast-path anyway. v2 (Daniel Vetter): - Commit message grammatical fixes. - Use WARN_ON instead of GEM_BUG_ON - Rename lazy_create_context_locked to finalize_create_context_locked - Rework the control-flow logic in the setparam ioctl - Better documentation all around v3 (kernel test robot): - Make finalize_create_context_locked static Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-25-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 203 +++++++++++++++++----- drivers/gpu/drm/i915/gem/i915_gem_context.h | 3 + drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 54 ++++++ drivers/gpu/drm/i915/gem/selftests/mock_context.c | 5 +- drivers/gpu/drm/i915/i915_drv.h | 76 ++++++-- 5 files changed, 283 insertions(+), 58 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 5a1402544d48..c4f89e4b1665 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -278,6 +278,42 @@ proto_close: return err; } +static int proto_context_register_locked(struct drm_i915_file_private *fpriv, + struct i915_gem_proto_context *pc, + u32 *id) +{ + int ret; + void *old; + + lockdep_assert_held(&fpriv->proto_context_lock); + + ret = xa_alloc(&fpriv->context_xa, id, NULL, xa_limit_32b, GFP_KERNEL); + if (ret) + return ret; + + old = xa_store(&fpriv->proto_context_xa, *id, pc, GFP_KERNEL); + if (xa_is_err(old)) { + xa_erase(&fpriv->context_xa, *id); + return xa_err(old); + } + WARN_ON(old); + + return 0; +} + +static int proto_context_register(struct drm_i915_file_private *fpriv, + struct i915_gem_proto_context *pc, + u32 *id) +{ + int ret; + + mutex_lock(&fpriv->proto_context_lock); + ret = proto_context_register_locked(fpriv, pc, id); + mutex_unlock(&fpriv->proto_context_lock); + + return ret; +} + static int set_proto_ctx_vm(struct drm_i915_file_private *fpriv, struct i915_gem_proto_context *pc, const struct drm_i915_gem_context_param *args) @@ -1448,12 +1484,12 @@ void i915_gem_init__contexts(struct drm_i915_private *i915) init_contexts(&i915->gem.contexts); } -static int gem_context_register(struct i915_gem_context *ctx, - struct drm_i915_file_private *fpriv, - u32 *id) +static void gem_context_register(struct i915_gem_context *ctx, + struct drm_i915_file_private *fpriv, + u32 id) { struct drm_i915_private *i915 = ctx->i915; - int ret; + void *old; ctx->file_priv = fpriv; @@ -1462,19 +1498,12 @@ static int gem_context_register(struct i915_gem_context *ctx, current->comm, pid_nr(ctx->pid)); /* And finally expose ourselves to userspace via the idr */ - ret = xa_alloc(&fpriv->context_xa, id, ctx, xa_limit_32b, GFP_KERNEL); - if (ret) - goto err_pid; + old = xa_store(&fpriv->context_xa, id, ctx, GFP_KERNEL); + WARN_ON(old); spin_lock(&i915->gem.contexts.lock); list_add_tail(&ctx->link, &i915->gem.contexts.list); spin_unlock(&i915->gem.contexts.lock); - - return 0; - -err_pid: - put_pid(fetch_and_zero(&ctx->pid)); - return ret; } int i915_gem_context_open(struct drm_i915_private *i915, @@ -1484,9 +1513,12 @@ int i915_gem_context_open(struct drm_i915_private *i915, struct i915_gem_proto_context *pc; struct i915_gem_context *ctx; int err; - u32 id; - xa_init_flags(&file_priv->context_xa, XA_FLAGS_ALLOC); + mutex_init(&file_priv->proto_context_lock); + xa_init_flags(&file_priv->proto_context_xa, XA_FLAGS_ALLOC); + + /* 0 reserved for the default context */ + xa_init_flags(&file_priv->context_xa, XA_FLAGS_ALLOC1); /* 0 reserved for invalid/unassigned ppgtt */ xa_init_flags(&file_priv->vm_xa, XA_FLAGS_ALLOC1); @@ -1504,28 +1536,31 @@ int i915_gem_context_open(struct drm_i915_private *i915, goto err; } - err = gem_context_register(ctx, file_priv, &id); - if (err < 0) - goto err_ctx; + gem_context_register(ctx, file_priv, 0); - GEM_BUG_ON(id); return 0; -err_ctx: - context_close(ctx); err: xa_destroy(&file_priv->vm_xa); xa_destroy(&file_priv->context_xa); + xa_destroy(&file_priv->proto_context_xa); + mutex_destroy(&file_priv->proto_context_lock); return err; } void i915_gem_context_close(struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; + struct i915_gem_proto_context *pc; struct i915_address_space *vm; struct i915_gem_context *ctx; unsigned long idx; + xa_for_each(&file_priv->proto_context_xa, idx, pc) + proto_context_close(pc); + xa_destroy(&file_priv->proto_context_xa); + mutex_destroy(&file_priv->proto_context_lock); + xa_for_each(&file_priv->context_xa, idx, ctx) context_close(ctx); xa_destroy(&file_priv->context_xa); @@ -2480,12 +2515,73 @@ static bool client_is_banned(struct drm_i915_file_private *file_priv) return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED; } +static inline struct i915_gem_context * +__context_lookup(struct drm_i915_file_private *file_priv, u32 id) +{ + struct i915_gem_context *ctx; + + rcu_read_lock(); + ctx = xa_load(&file_priv->context_xa, id); + if (ctx && !kref_get_unless_zero(&ctx->ref)) + ctx = NULL; + rcu_read_unlock(); + + return ctx; +} + +static struct i915_gem_context * +finalize_create_context_locked(struct drm_i915_file_private *file_priv, + struct i915_gem_proto_context *pc, u32 id) +{ + struct i915_gem_context *ctx; + void *old; + + lockdep_assert_held(&file_priv->proto_context_lock); + + ctx = i915_gem_create_context(file_priv->dev_priv, pc); + if (IS_ERR(ctx)) + return ctx; + + gem_context_register(ctx, file_priv, id); + + old = xa_erase(&file_priv->proto_context_xa, id); + GEM_BUG_ON(old != pc); + proto_context_close(pc); + + /* One for the xarray and one for the caller */ + return i915_gem_context_get(ctx); +} + +struct i915_gem_context * +i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id) +{ + struct i915_gem_proto_context *pc; + struct i915_gem_context *ctx; + + ctx = __context_lookup(file_priv, id); + if (ctx) + return ctx; + + mutex_lock(&file_priv->proto_context_lock); + /* Try one more time under the lock */ + ctx = __context_lookup(file_priv, id); + if (!ctx) { + pc = xa_load(&file_priv->proto_context_xa, id); + if (!pc) + ctx = ERR_PTR(-ENOENT); + else + ctx = finalize_create_context_locked(file_priv, pc, id); + } + mutex_unlock(&file_priv->proto_context_lock); + + return ctx; +} + int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_gem_context_create_ext *args = data; - struct i915_gem_context *ctx; struct create_ext ext_data; int ret; u32 id; @@ -2517,28 +2613,21 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, create_extensions, ARRAY_SIZE(create_extensions), &ext_data); - if (ret) { - proto_context_close(ext_data.pc); - return ret; - } + if (ret) + goto err_pc; } - ctx = i915_gem_create_context(i915, ext_data.pc); - proto_context_close(ext_data.pc); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - ret = gem_context_register(ctx, ext_data.fpriv, &id); + ret = proto_context_register(ext_data.fpriv, ext_data.pc, &id); if (ret < 0) - goto err_ctx; + goto err_pc; args->ctx_id = id; drm_dbg(&i915->drm, "HW context %d created\n", args->ctx_id); return 0; -err_ctx: - context_close(ctx); +err_pc: + proto_context_close(ext_data.pc); return ret; } @@ -2547,6 +2636,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_context_destroy *args = data; struct drm_i915_file_private *file_priv = file->driver_priv; + struct i915_gem_proto_context *pc; struct i915_gem_context *ctx; if (args->pad != 0) @@ -2555,11 +2645,24 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, if (!args->ctx_id) return -ENOENT; + /* We need to hold the proto-context lock here to prevent races + * with finalize_create_context_locked(). + */ + mutex_lock(&file_priv->proto_context_lock); ctx = xa_erase(&file_priv->context_xa, args->ctx_id); - if (!ctx) + pc = xa_erase(&file_priv->proto_context_xa, args->ctx_id); + mutex_unlock(&file_priv->proto_context_lock); + + if (!ctx && !pc) return -ENOENT; + GEM_WARN_ON(ctx && pc); + + if (pc) + proto_context_close(pc); + + if (ctx) + context_close(ctx); - context_close(ctx); return 0; } @@ -2692,16 +2795,30 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, { struct drm_i915_file_private *file_priv = file->driver_priv; struct drm_i915_gem_context_param *args = data; + struct i915_gem_proto_context *pc; struct i915_gem_context *ctx; - int ret; + int ret = 0; - ctx = i915_gem_context_lookup(file_priv, args->ctx_id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); + mutex_lock(&file_priv->proto_context_lock); + ctx = __context_lookup(file_priv, args->ctx_id); + if (!ctx) { + /* FIXME: We should consider disallowing SET_CONTEXT_PARAM + * for most things on future platforms. Clients should be + * using CONTEXT_CREATE_EXT_PARAM instead. + */ + pc = xa_load(&file_priv->proto_context_xa, args->ctx_id); + if (pc) + ret = set_proto_ctx_param(file_priv, pc, args); + else + ret = -ENOENT; + } + mutex_unlock(&file_priv->proto_context_lock); - ret = ctx_setparam(file_priv, ctx, args); + if (ctx) { + ret = ctx_setparam(file_priv, ctx, args); + i915_gem_context_put(ctx); + } - i915_gem_context_put(ctx); return ret; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index b5c908f3f4f2..20411db84914 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -133,6 +133,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +struct i915_gem_context * +i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id); + static inline struct i915_gem_context * i915_gem_context_get(struct i915_gem_context *ctx) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index a1a117d9ed97..94c03a97cb77 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -122,6 +122,60 @@ struct i915_gem_proto_engine { * a struct i915_gem_context. This is used to gather parameters provided * either through creation flags or via SET_CONTEXT_PARAM so that, when we * create the final i915_gem_context, those parameters can be immutable. + * + * The context uAPI allows for two methods of setting context parameters: + * SET_CONTEXT_PARAM and CONTEXT_CREATE_EXT_SETPARAM. The former is + * allowed to be called at any time while the later happens as part of + * GEM_CONTEXT_CREATE. When these were initially added, Currently, + * everything settable via one is settable via the other. While some + * params are fairly simple and setting them on a live context is harmless + * such the context priority, others are far trickier such as the VM or the + * set of engines. To avoid some truly nasty race conditions, we don't + * allow setting the VM or the set of engines on live contexts. + * + * The way we dealt with this without breaking older userspace that sets + * the VM or engine set via SET_CONTEXT_PARAM is to delay the creation of + * the actual context until after the client is done configuring it with + * SET_CONTEXT_PARAM. From the perspective of the client, it has the same + * u32 context ID the whole time. From the perspective of i915, however, + * it's an i915_gem_proto_context right up until the point where we attempt + * to do something which the proto-context can't handle at which point the + * real context gets created. + * + * This is accomplished via a little xarray dance. When GEM_CONTEXT_CREATE + * is called, we create a proto-context, reserve a slot in context_xa but + * leave it NULL, the proto-context in the corresponding slot in + * proto_context_xa. Then, whenever we go to look up a context, we first + * check context_xa. If it's there, we return the i915_gem_context and + * we're done. If it's not, we look in proto_context_xa and, if we find it + * there, we create the actual context and kill the proto-context. + * + * At the time we made this change (April, 2021), we did a fairly complete + * audit of existing userspace to ensure this wouldn't break anything: + * + * - Mesa/i965 didn't use the engines or VM APIs at all + * + * - Mesa/ANV used the engines API but via CONTEXT_CREATE_EXT_SETPARAM and + * didn't use the VM API. + * + * - Mesa/iris didn't use the engines or VM APIs at all + * + * - The open-source compute-runtime didn't yet use the engines API but + * did use the VM API via SET_CONTEXT_PARAM. However, CONTEXT_SETPARAM + * was always the second ioctl on that context, immediately following + * GEM_CONTEXT_CREATE. + * + * - The media driver sets engines and bonding/balancing via + * SET_CONTEXT_PARAM. However, CONTEXT_SETPARAM to set the VM was + * always the second ioctl on that context, immediately following + * GEM_CONTEXT_CREATE and setting engines immediately followed that. + * + * In order for this dance to work properly, any modification to an + * i915_gem_proto_context that is exposed to the client via + * drm_i915_file_private::proto_context_xa must be guarded by + * drm_i915_file_private::proto_context_lock. The exception is when a + * proto-context has not yet been exposed such as when handling + * CONTEXT_CREATE_SET_PARAM during GEM_CONTEXT_CREATE. */ struct i915_gem_proto_context { /** @vm: See &i915_gem_context.vm */ diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index cbeefd060e97..61aaac4a334c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -81,6 +81,7 @@ void mock_init_contexts(struct drm_i915_private *i915) struct i915_gem_context * live_context(struct drm_i915_private *i915, struct file *file) { + struct drm_i915_file_private *fpriv = to_drm_file(file)->driver_priv; struct i915_gem_proto_context *pc; struct i915_gem_context *ctx; int err; @@ -97,10 +98,12 @@ live_context(struct drm_i915_private *i915, struct file *file) i915_gem_context_set_no_error_capture(ctx); - err = gem_context_register(ctx, to_drm_file(file)->driver_priv, &id); + err = xa_alloc(&fpriv->context_xa, &id, NULL, xa_limit_32b, GFP_KERNEL); if (err < 0) goto err_ctx; + gem_context_register(ctx, fpriv, id); + return ctx; err_ctx: diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index be6398318072..9254ba39da9f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -202,6 +202,68 @@ struct drm_i915_file_private { struct rcu_head rcu; }; + /** @proto_context_lock: Guards all struct i915_gem_proto_context + * operations + * + * This not only guards @proto_context_xa, but is always held + * whenever we manipulate any struct i915_gem_proto_context, + * including finalizing it on first actual use of the GEM context. + * + * See i915_gem_proto_context. + */ + struct mutex proto_context_lock; + + /** @proto_context_xa: xarray of struct i915_gem_proto_context + * + * Historically, the context uAPI allowed for two methods of + * setting context parameters: SET_CONTEXT_PARAM and + * CONTEXT_CREATE_EXT_SETPARAM. The former is allowed to be called + * at any time while the later happens as part of + * GEM_CONTEXT_CREATE. Everything settable via one was settable + * via the other. While some params are fairly simple and setting + * them on a live context is harmless such as the context priority, + * others are far trickier such as the VM or the set of engines. + * In order to swap out the VM, for instance, we have to delay + * until all current in-flight work is complete, swap in the new + * VM, and then continue. This leads to a plethora of potential + * race conditions we'd really rather avoid. + * + * We have since disallowed setting these more complex parameters + * on active contexts. This works by delaying the creation of the + * actual context until after the client is done configuring it + * with SET_CONTEXT_PARAM. From the perspective of the client, it + * has the same u32 context ID the whole time. From the + * perspective of i915, however, it's a struct i915_gem_proto_context + * right up until the point where we attempt to do something which + * the proto-context can't handle. Then the struct i915_gem_context + * gets created. + * + * This is accomplished via a little xarray dance. When + * GEM_CONTEXT_CREATE is called, we create a struct + * i915_gem_proto_context, reserve a slot in @context_xa but leave + * it NULL, and place the proto-context in the corresponding slot + * in @proto_context_xa. Then, in i915_gem_context_lookup(), we + * first check @context_xa. If it's there, we return the struct + * i915_gem_context and we're done. If it's not, we look in + * @proto_context_xa and, if we find it there, we create the actual + * context and kill the proto-context. + * + * In order for this dance to work properly, everything which ever + * touches a struct i915_gem_proto_context is guarded by + * @proto_context_lock, including context creation. Yes, this + * means context creation now takes a giant global lock but it + * can't really be helped and that should never be on any driver's + * fast-path anyway. + */ + struct xarray proto_context_xa; + + /** @context_xa: xarray of fully created i915_gem_context + * + * Write access to this xarray is guarded by @proto_context_lock. + * Otherwise, writers may race with finalize_create_context_locked(). + * + * See @proto_context_xa. + */ struct xarray context_xa; struct xarray vm_xa; @@ -1847,20 +1909,6 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags); -static inline struct i915_gem_context * -i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id) -{ - struct i915_gem_context *ctx; - - rcu_read_lock(); - ctx = xa_load(&file_priv->context_xa, id); - if (ctx && !kref_get_unless_zero(&ctx->ref)) - ctx = NULL; - rcu_read_unlock(); - - return ctx ? ctx : ERR_PTR(-ENOENT); -} - static inline struct i915_address_space * i915_gem_vm_lookup(struct drm_i915_file_private *file_priv, u32 id) { -- cgit v1.2.3 From ccbc1b97948ab671335e950271e39766729736c3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:30 -0500 Subject: drm/i915/gem: Don't allow changing the VM on running contexts (v4) When the APIs were added to manage VMs more directly from userspace, the questionable choice was made to allow changing out the VM on a context at any time. This is horribly racy and there's absolutely no reason why any userspace would want to do this outside of testing that exact race. By removing support for CONTEXT_PARAM_VM from ctx_setparam, we make it impossible to change out the VM after the context has been fully created. This lets us delete a bunch of deferred task code as well as a duplicated (and slightly different) copy of the code which programs the PPGTT registers. v2 (Jason Ekstrand): - Expand the commit message v3 (Daniel Vetter): - Don't drop the __rcu on the vm pointer v4 (Jason Ekstrand): - Make it more obvious that I915_CONTEXT_PARAM_VM returns -EINVAL Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-26-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 263 +-------------------- .../gpu/drm/i915/gem/selftests/i915_gem_context.c | 119 ---------- .../gpu/drm/i915/selftests/i915_mock_selftests.h | 1 - 3 files changed, 1 insertion(+), 382 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index c4f89e4b1665..40acecfbbe5b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1633,120 +1633,6 @@ int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data, return 0; } -struct context_barrier_task { - struct i915_active base; - void (*task)(void *data); - void *data; -}; - -static void cb_retire(struct i915_active *base) -{ - struct context_barrier_task *cb = container_of(base, typeof(*cb), base); - - if (cb->task) - cb->task(cb->data); - - i915_active_fini(&cb->base); - kfree(cb); -} - -I915_SELFTEST_DECLARE(static intel_engine_mask_t context_barrier_inject_fault); -static int context_barrier_task(struct i915_gem_context *ctx, - intel_engine_mask_t engines, - bool (*skip)(struct intel_context *ce, void *data), - int (*pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data), - int (*emit)(struct i915_request *rq, void *data), - void (*task)(void *data), - void *data) -{ - struct context_barrier_task *cb; - struct i915_gem_engines_iter it; - struct i915_gem_engines *e; - struct i915_gem_ww_ctx ww; - struct intel_context *ce; - int err = 0; - - GEM_BUG_ON(!task); - - cb = kmalloc(sizeof(*cb), GFP_KERNEL); - if (!cb) - return -ENOMEM; - - i915_active_init(&cb->base, NULL, cb_retire, 0); - err = i915_active_acquire(&cb->base); - if (err) { - kfree(cb); - return err; - } - - e = __context_engines_await(ctx, NULL); - if (!e) { - i915_active_release(&cb->base); - return -ENOENT; - } - - for_each_gem_engine(ce, e, it) { - struct i915_request *rq; - - if (I915_SELFTEST_ONLY(context_barrier_inject_fault & - ce->engine->mask)) { - err = -ENXIO; - break; - } - - if (!(ce->engine->mask & engines)) - continue; - - if (skip && skip(ce, data)) - continue; - - i915_gem_ww_ctx_init(&ww, true); -retry: - err = intel_context_pin_ww(ce, &ww); - if (err) - goto err; - - if (pin) - err = pin(ce, &ww, data); - if (err) - goto err_unpin; - - rq = i915_request_create(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_unpin; - } - - err = 0; - if (emit) - err = emit(rq, data); - if (err == 0) - err = i915_active_add_request(&cb->base, rq); - - i915_request_add(rq); -err_unpin: - intel_context_unpin(ce); -err: - if (err == -EDEADLK) { - err = i915_gem_ww_ctx_backoff(&ww); - if (!err) - goto retry; - } - i915_gem_ww_ctx_fini(&ww); - - if (err) - break; - } - i915_sw_fence_complete(&e->fence); - - cb->task = err ? NULL : task; /* caller needs to unwind instead */ - cb->data = data; - - i915_active_release(&cb->base); - - return err; -} - static int get_ppgtt(struct drm_i915_file_private *file_priv, struct i915_gem_context *ctx, struct drm_i915_gem_context_param *args) @@ -1779,150 +1665,6 @@ err_put: return err; } -static void set_ppgtt_barrier(void *data) -{ - struct i915_address_space *old = data; - - if (GRAPHICS_VER(old->i915) < 8) - gen6_ppgtt_unpin_all(i915_vm_to_ppgtt(old)); - - i915_vm_close(old); -} - -static int pin_ppgtt_update(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data) -{ - struct i915_address_space *vm = ce->vm; - - if (!HAS_LOGICAL_RING_CONTEXTS(vm->i915)) - /* ppGTT is not part of the legacy context image */ - return gen6_ppgtt_pin(i915_vm_to_ppgtt(vm), ww); - - return 0; -} - -static int emit_ppgtt_update(struct i915_request *rq, void *data) -{ - struct i915_address_space *vm = rq->context->vm; - struct intel_engine_cs *engine = rq->engine; - u32 base = engine->mmio_base; - u32 *cs; - int i; - - if (i915_vm_is_4lvl(vm)) { - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - const dma_addr_t pd_daddr = px_dma(ppgtt->pd); - - cs = intel_ring_begin(rq, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(2); - - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0)); - *cs++ = upper_32_bits(pd_daddr); - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0)); - *cs++ = lower_32_bits(pd_daddr); - - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - } else if (HAS_LOGICAL_RING_CONTEXTS(engine->i915)) { - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - int err; - - /* Magic required to prevent forcewake errors! */ - err = engine->emit_flush(rq, EMIT_INVALIDATE); - if (err) - return err; - - cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED; - for (i = GEN8_3LVL_PDPES; i--; ) { - const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); - - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)); - *cs++ = upper_32_bits(pd_daddr); - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)); - *cs++ = lower_32_bits(pd_daddr); - } - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - } - - return 0; -} - -static bool skip_ppgtt_update(struct intel_context *ce, void *data) -{ - if (HAS_LOGICAL_RING_CONTEXTS(ce->engine->i915)) - return !ce->state; - else - return !atomic_read(&ce->pin_count); -} - -static int set_ppgtt(struct drm_i915_file_private *file_priv, - struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - struct i915_address_space *vm, *old; - int err; - - if (args->size) - return -EINVAL; - - if (!rcu_access_pointer(ctx->vm)) - return -ENODEV; - - if (upper_32_bits(args->value)) - return -ENOENT; - - vm = i915_gem_vm_lookup(file_priv, args->value); - if (!vm) - return -ENOENT; - - err = mutex_lock_interruptible(&ctx->mutex); - if (err) - goto out; - - if (i915_gem_context_is_closed(ctx)) { - err = -ENOENT; - goto unlock; - } - - if (vm == rcu_access_pointer(ctx->vm)) - goto unlock; - - old = __set_ppgtt(ctx, vm); - - /* Teardown the existing obj:vma cache, it will have to be rebuilt. */ - lut_close(ctx); - - /* - * We need to flush any requests using the current ppgtt before - * we release it as the requests do not hold a reference themselves, - * only indirectly through the context. - */ - err = context_barrier_task(ctx, ALL_ENGINES, - skip_ppgtt_update, - pin_ppgtt_update, - emit_ppgtt_update, - set_ppgtt_barrier, - old); - if (err) { - i915_vm_close(__set_ppgtt(ctx, old)); - i915_vm_close(old); - lut_close(ctx); /* force a rebuild of the old obj:vma cache */ - } - -unlock: - mutex_unlock(&ctx->mutex); -out: - i915_vm_put(vm); - return err; -} - int i915_gem_user_to_context_sseu(struct intel_gt *gt, const struct drm_i915_gem_context_param_sseu *user, @@ -2458,10 +2200,6 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, ret = set_sseu(ctx, args); break; - case I915_CONTEXT_PARAM_VM: - ret = set_ppgtt(fpriv, ctx, args); - break; - case I915_CONTEXT_PARAM_ENGINES: ret = set_engines(ctx, args); break; @@ -2473,6 +2211,7 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, case I915_CONTEXT_PARAM_NO_ZEROMAP: case I915_CONTEXT_PARAM_BAN_PERIOD: case I915_CONTEXT_PARAM_RINGSIZE: + case I915_CONTEXT_PARAM_VM: default: ret = -EINVAL; break; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index dbcfa28a9d91..92544a174cc9 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -1875,125 +1875,6 @@ out_file: return err; } -static bool skip_unused_engines(struct intel_context *ce, void *data) -{ - return !ce->state; -} - -static void mock_barrier_task(void *data) -{ - unsigned int *counter = data; - - ++*counter; -} - -static int mock_context_barrier(void *arg) -{ -#undef pr_fmt -#define pr_fmt(x) "context_barrier_task():" # x - struct drm_i915_private *i915 = arg; - struct i915_gem_context *ctx; - struct i915_request *rq; - unsigned int counter; - int err; - - /* - * The context barrier provides us with a callback after it emits - * a request; useful for retiring old state after loading new. - */ - - ctx = mock_context(i915, "mock"); - if (!ctx) - return -ENOMEM; - - counter = 0; - err = context_barrier_task(ctx, 0, NULL, NULL, NULL, - mock_barrier_task, &counter); - if (err) { - pr_err("Failed at line %d, err=%d\n", __LINE__, err); - goto out; - } - if (counter == 0) { - pr_err("Did not retire immediately with 0 engines\n"); - err = -EINVAL; - goto out; - } - - counter = 0; - err = context_barrier_task(ctx, ALL_ENGINES, skip_unused_engines, - NULL, NULL, mock_barrier_task, &counter); - if (err) { - pr_err("Failed at line %d, err=%d\n", __LINE__, err); - goto out; - } - if (counter == 0) { - pr_err("Did not retire immediately for all unused engines\n"); - err = -EINVAL; - goto out; - } - - rq = igt_request_alloc(ctx, i915->gt.engine[RCS0]); - if (IS_ERR(rq)) { - pr_err("Request allocation failed!\n"); - goto out; - } - i915_request_add(rq); - - counter = 0; - context_barrier_inject_fault = BIT(RCS0); - err = context_barrier_task(ctx, ALL_ENGINES, NULL, NULL, NULL, - mock_barrier_task, &counter); - context_barrier_inject_fault = 0; - if (err == -ENXIO) - err = 0; - else - pr_err("Did not hit fault injection!\n"); - if (counter != 0) { - pr_err("Invoked callback on error!\n"); - err = -EIO; - } - if (err) - goto out; - - counter = 0; - err = context_barrier_task(ctx, ALL_ENGINES, skip_unused_engines, - NULL, NULL, mock_barrier_task, &counter); - if (err) { - pr_err("Failed at line %d, err=%d\n", __LINE__, err); - goto out; - } - mock_device_flush(i915); - if (counter == 0) { - pr_err("Did not retire on each active engines\n"); - err = -EINVAL; - goto out; - } - -out: - mock_context_close(ctx); - return err; -#undef pr_fmt -#define pr_fmt(x) x -} - -int i915_gem_context_mock_selftests(void) -{ - static const struct i915_subtest tests[] = { - SUBTEST(mock_context_barrier), - }; - struct drm_i915_private *i915; - int err; - - i915 = mock_gem_device(); - if (!i915) - return -ENOMEM; - - err = i915_subtests(tests, i915); - - mock_destroy_device(i915); - return err; -} - int i915_gem_context_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 6759b56086fb..793fb28a770d 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -32,6 +32,5 @@ selftest(vma, i915_vma_mock_selftests) selftest(evict, i915_gem_evict_mock_selftests) selftest(gtt, i915_gem_gtt_mock_selftests) selftest(hugepages, i915_gem_huge_page_mock_selftests) -selftest(contexts, i915_gem_context_mock_selftests) selftest(memory_region, intel_memory_region_mock_selftests) selftest(buddy, i915_buddy_mock_selftests) -- cgit v1.2.3 From d9d29c747df844e946ecd6089893be80c5d32403 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:31 -0500 Subject: drm/i915/gem: Don't allow changing the engine set on running contexts (v3) When the APIs were added to manage the engine set on a GEM context directly from userspace, the questionable choice was made to allow changing the engine set on a context at any time. This is horribly racy and there's absolutely no reason why any userspace would want to do this outside of trying to exercise interesting race conditions. By removing support for CONTEXT_PARAM_ENGINES from ctx_setparam, we make it impossible to change the engine set after the context has been fully created. This doesn't yet let us delete all the deferred engine clean-up code as that's still used for handling the case where the client dies or calls GEM_CONTEXT_DESTROY while work is in flight. However, moving to an API where the engine set is effectively immutable gives us more options to potentially clean that code up a bit going forward. It also removes a whole class of ways in which a client can hurt itself or try to get around kernel context banning. v2 (Jason Ekstrand): - Expand the commit mesage v3 (Jason Ekstrand): - Make it more obvious that I915_CONTEXT_PARAM_ENGINES returns -EINVAL Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-27-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 304 +--------------------------- 1 file changed, 1 insertion(+), 303 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 40acecfbbe5b..5f5375b15c53 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1819,305 +1819,6 @@ out_ce: return ret; } -struct set_engines { - struct i915_gem_context *ctx; - struct i915_gem_engines *engines; -}; - -static int -set_engines__load_balance(struct i915_user_extension __user *base, void *data) -{ - struct i915_context_engines_load_balance __user *ext = - container_of_user(base, typeof(*ext), base); - const struct set_engines *set = data; - struct drm_i915_private *i915 = set->ctx->i915; - struct intel_engine_cs *stack[16]; - struct intel_engine_cs **siblings; - struct intel_context *ce; - struct intel_sseu null_sseu = {}; - u16 num_siblings, idx; - unsigned int n; - int err; - - if (!HAS_EXECLISTS(i915)) - return -ENODEV; - - if (intel_uc_uses_guc_submission(&i915->gt.uc)) - return -ENODEV; /* not implement yet */ - - if (get_user(idx, &ext->engine_index)) - return -EFAULT; - - if (idx >= set->engines->num_engines) { - drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", - idx, set->engines->num_engines); - return -EINVAL; - } - - idx = array_index_nospec(idx, set->engines->num_engines); - if (set->engines->engines[idx]) { - drm_dbg(&i915->drm, - "Invalid placement[%d], already occupied\n", idx); - return -EEXIST; - } - - if (get_user(num_siblings, &ext->num_siblings)) - return -EFAULT; - - err = check_user_mbz(&ext->flags); - if (err) - return err; - - err = check_user_mbz(&ext->mbz64); - if (err) - return err; - - siblings = stack; - if (num_siblings > ARRAY_SIZE(stack)) { - siblings = kmalloc_array(num_siblings, - sizeof(*siblings), - GFP_KERNEL); - if (!siblings) - return -ENOMEM; - } - - for (n = 0; n < num_siblings; n++) { - struct i915_engine_class_instance ci; - - if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) { - err = -EFAULT; - goto out_siblings; - } - - siblings[n] = intel_engine_lookup_user(i915, - ci.engine_class, - ci.engine_instance); - if (!siblings[n]) { - drm_dbg(&i915->drm, - "Invalid sibling[%d]: { class:%d, inst:%d }\n", - n, ci.engine_class, ci.engine_instance); - err = -EINVAL; - goto out_siblings; - } - } - - ce = intel_execlists_create_virtual(siblings, n); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - goto out_siblings; - } - - intel_context_set_gem(ce, set->ctx, null_sseu); - - if (cmpxchg(&set->engines->engines[idx], NULL, ce)) { - intel_context_put(ce); - err = -EEXIST; - goto out_siblings; - } - -out_siblings: - if (siblings != stack) - kfree(siblings); - - return err; -} - -static int -set_engines__bond(struct i915_user_extension __user *base, void *data) -{ - struct i915_context_engines_bond __user *ext = - container_of_user(base, typeof(*ext), base); - const struct set_engines *set = data; - struct drm_i915_private *i915 = set->ctx->i915; - struct i915_engine_class_instance ci; - struct intel_engine_cs *virtual; - struct intel_engine_cs *master; - u16 idx, num_bonds; - int err, n; - - if (get_user(idx, &ext->virtual_index)) - return -EFAULT; - - if (idx >= set->engines->num_engines) { - drm_dbg(&i915->drm, - "Invalid index for virtual engine: %d >= %d\n", - idx, set->engines->num_engines); - return -EINVAL; - } - - idx = array_index_nospec(idx, set->engines->num_engines); - if (!set->engines->engines[idx]) { - drm_dbg(&i915->drm, "Invalid engine at %d\n", idx); - return -EINVAL; - } - virtual = set->engines->engines[idx]->engine; - - if (intel_engine_is_virtual(virtual)) { - drm_dbg(&i915->drm, - "Bonding with virtual engines not allowed\n"); - return -EINVAL; - } - - err = check_user_mbz(&ext->flags); - if (err) - return err; - - for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) { - err = check_user_mbz(&ext->mbz64[n]); - if (err) - return err; - } - - if (copy_from_user(&ci, &ext->master, sizeof(ci))) - return -EFAULT; - - master = intel_engine_lookup_user(i915, - ci.engine_class, ci.engine_instance); - if (!master) { - drm_dbg(&i915->drm, - "Unrecognised master engine: { class:%u, instance:%u }\n", - ci.engine_class, ci.engine_instance); - return -EINVAL; - } - - if (get_user(num_bonds, &ext->num_bonds)) - return -EFAULT; - - for (n = 0; n < num_bonds; n++) { - struct intel_engine_cs *bond; - - if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) - return -EFAULT; - - bond = intel_engine_lookup_user(i915, - ci.engine_class, - ci.engine_instance); - if (!bond) { - drm_dbg(&i915->drm, - "Unrecognised engine[%d] for bonding: { class:%d, instance: %d }\n", - n, ci.engine_class, ci.engine_instance); - return -EINVAL; - } - } - - return 0; -} - -static const i915_user_extension_fn set_engines__extensions[] = { - [I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance, - [I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond, -}; - -static int -set_engines(struct i915_gem_context *ctx, - const struct drm_i915_gem_context_param *args) -{ - struct drm_i915_private *i915 = ctx->i915; - struct i915_context_param_engines __user *user = - u64_to_user_ptr(args->value); - struct intel_sseu null_sseu = {}; - struct set_engines set = { .ctx = ctx }; - unsigned int num_engines, n; - u64 extensions; - int err; - - if (!args->size) { /* switch back to legacy user_ring_map */ - if (!i915_gem_context_user_engines(ctx)) - return 0; - - set.engines = default_engines(ctx, null_sseu); - if (IS_ERR(set.engines)) - return PTR_ERR(set.engines); - - goto replace; - } - - if (args->size < sizeof(*user) || - !IS_ALIGNED(args->size - sizeof(*user), sizeof(*user->engines))) { - drm_dbg(&i915->drm, "Invalid size for engine array: %d\n", - args->size); - return -EINVAL; - } - - num_engines = (args->size - sizeof(*user)) / sizeof(*user->engines); - /* RING_MASK has no shift so we can use it directly here */ - if (num_engines > I915_EXEC_RING_MASK + 1) - return -EINVAL; - - set.engines = alloc_engines(num_engines); - if (!set.engines) - return -ENOMEM; - - for (n = 0; n < num_engines; n++) { - struct i915_engine_class_instance ci; - struct intel_engine_cs *engine; - struct intel_context *ce; - - if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) { - __free_engines(set.engines, n); - return -EFAULT; - } - - if (ci.engine_class == (u16)I915_ENGINE_CLASS_INVALID && - ci.engine_instance == (u16)I915_ENGINE_CLASS_INVALID_NONE) { - set.engines->engines[n] = NULL; - continue; - } - - engine = intel_engine_lookup_user(ctx->i915, - ci.engine_class, - ci.engine_instance); - if (!engine) { - drm_dbg(&i915->drm, - "Invalid engine[%d]: { class:%d, instance:%d }\n", - n, ci.engine_class, ci.engine_instance); - __free_engines(set.engines, n); - return -ENOENT; - } - - ce = intel_context_create(engine); - if (IS_ERR(ce)) { - __free_engines(set.engines, n); - return PTR_ERR(ce); - } - - intel_context_set_gem(ce, ctx, null_sseu); - - set.engines->engines[n] = ce; - } - set.engines->num_engines = num_engines; - - err = -EFAULT; - if (!get_user(extensions, &user->extensions)) - err = i915_user_extensions(u64_to_user_ptr(extensions), - set_engines__extensions, - ARRAY_SIZE(set_engines__extensions), - &set); - if (err) { - free_engines(set.engines); - return err; - } - -replace: - mutex_lock(&ctx->engines_mutex); - if (i915_gem_context_is_closed(ctx)) { - mutex_unlock(&ctx->engines_mutex); - free_engines(set.engines); - return -ENOENT; - } - if (args->size) - i915_gem_context_set_user_engines(ctx); - else - i915_gem_context_clear_user_engines(ctx); - set.engines = rcu_replace_pointer(ctx->engines, set.engines, 1); - mutex_unlock(&ctx->engines_mutex); - - /* Keep track of old engine sets for kill_context() */ - engines_idle_release(ctx, set.engines); - - return 0; -} - static int set_persistence(struct i915_gem_context *ctx, const struct drm_i915_gem_context_param *args) @@ -2200,10 +1901,6 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, ret = set_sseu(ctx, args); break; - case I915_CONTEXT_PARAM_ENGINES: - ret = set_engines(ctx, args); - break; - case I915_CONTEXT_PARAM_PERSISTENCE: ret = set_persistence(ctx, args); break; @@ -2212,6 +1909,7 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, case I915_CONTEXT_PARAM_BAN_PERIOD: case I915_CONTEXT_PARAM_RINGSIZE: case I915_CONTEXT_PARAM_VM: + case I915_CONTEXT_PARAM_ENGINES: default: ret = -EINVAL; break; -- cgit v1.2.3 From 5888d588597408ea8208e89497dbbdb72d405071 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:32 -0500 Subject: drm/i915/selftests: Take a VM in kernel_context() This better models where we want to go with contexts in general where things like the VM and engine set are create parameters instead of being set after the fact. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-28-jason@jlekstrand.net --- .../gpu/drm/i915/gem/selftests/i915_gem_context.c | 4 ++-- drivers/gpu/drm/i915/gem/selftests/mock_context.c | 9 ++++++++- drivers/gpu/drm/i915/gem/selftests/mock_context.h | 4 +++- drivers/gpu/drm/i915/gt/selftest_execlists.c | 20 ++++++++++---------- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 2 +- 5 files changed, 24 insertions(+), 15 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 92544a174cc9..3e59746afdc8 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -680,7 +680,7 @@ static int igt_ctx_exec(void *arg) struct i915_gem_context *ctx; struct intel_context *ce; - ctx = kernel_context(i915); + ctx = kernel_context(i915, NULL); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out_file; @@ -813,7 +813,7 @@ static int igt_shared_ctx_exec(void *arg) struct i915_gem_context *ctx; struct intel_context *ce; - ctx = kernel_context(i915); + ctx = kernel_context(i915, NULL); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out_test; diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index 61aaac4a334c..500ef27ba477 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -150,7 +150,8 @@ live_context_for_engine(struct intel_engine_cs *engine, struct file *file) } struct i915_gem_context * -kernel_context(struct drm_i915_private *i915) +kernel_context(struct drm_i915_private *i915, + struct i915_address_space *vm) { struct i915_gem_context *ctx; struct i915_gem_proto_context *pc; @@ -159,6 +160,12 @@ kernel_context(struct drm_i915_private *i915) if (IS_ERR(pc)) return ERR_CAST(pc); + if (vm) { + if (pc->vm) + i915_vm_put(pc->vm); + pc->vm = i915_vm_get(vm); + } + ctx = i915_gem_create_context(i915, pc); proto_context_close(pc); if (IS_ERR(ctx)) diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.h b/drivers/gpu/drm/i915/gem/selftests/mock_context.h index 2a6121d33352..7a02fd9b5866 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.h +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.h @@ -10,6 +10,7 @@ struct file; struct drm_i915_private; struct intel_engine_cs; +struct i915_address_space; void mock_init_contexts(struct drm_i915_private *i915); @@ -25,7 +26,8 @@ live_context(struct drm_i915_private *i915, struct file *file); struct i915_gem_context * live_context_for_engine(struct intel_engine_cs *engine, struct file *file); -struct i915_gem_context *kernel_context(struct drm_i915_private *i915); +struct i915_gem_context *kernel_context(struct drm_i915_private *i915, + struct i915_address_space *vm); void kernel_context_close(struct i915_gem_context *ctx); #endif /* !__MOCK_CONTEXT_H */ diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 6abce18d0ef3..73ddc6e14730 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -1539,12 +1539,12 @@ static int live_busywait_preempt(void *arg) * preempt the busywaits used to synchronise between rings. */ - ctx_hi = kernel_context(gt->i915); + ctx_hi = kernel_context(gt->i915, NULL); if (!ctx_hi) return -ENOMEM; ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; - ctx_lo = kernel_context(gt->i915); + ctx_lo = kernel_context(gt->i915, NULL); if (!ctx_lo) goto err_ctx_hi; ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; @@ -1741,12 +1741,12 @@ static int live_preempt(void *arg) if (igt_spinner_init(&spin_lo, gt)) goto err_spin_hi; - ctx_hi = kernel_context(gt->i915); + ctx_hi = kernel_context(gt->i915, NULL); if (!ctx_hi) goto err_spin_lo; ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; - ctx_lo = kernel_context(gt->i915); + ctx_lo = kernel_context(gt->i915, NULL); if (!ctx_lo) goto err_ctx_hi; ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; @@ -1833,11 +1833,11 @@ static int live_late_preempt(void *arg) if (igt_spinner_init(&spin_lo, gt)) goto err_spin_hi; - ctx_hi = kernel_context(gt->i915); + ctx_hi = kernel_context(gt->i915, NULL); if (!ctx_hi) goto err_spin_lo; - ctx_lo = kernel_context(gt->i915); + ctx_lo = kernel_context(gt->i915, NULL); if (!ctx_lo) goto err_ctx_hi; @@ -1927,7 +1927,7 @@ struct preempt_client { static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c) { - c->ctx = kernel_context(gt->i915); + c->ctx = kernel_context(gt->i915, NULL); if (!c->ctx) return -ENOMEM; @@ -3384,12 +3384,12 @@ static int live_preempt_timeout(void *arg) if (igt_spinner_init(&spin_lo, gt)) return -ENOMEM; - ctx_hi = kernel_context(gt->i915); + ctx_hi = kernel_context(gt->i915, NULL); if (!ctx_hi) goto err_spin_lo; ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; - ctx_lo = kernel_context(gt->i915); + ctx_lo = kernel_context(gt->i915, NULL); if (!ctx_lo) goto err_ctx_hi; ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; @@ -3676,7 +3676,7 @@ static int live_preempt_smoke(void *arg) } for (n = 0; n < smoke.ncontext; n++) { - smoke.contexts[n] = kernel_context(smoke.gt->i915); + smoke.contexts[n] = kernel_context(smoke.gt->i915, NULL); if (!smoke.contexts[n]) goto err_ctx; } diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 6a0b04bdac58..7aea10aa1fb4 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -42,7 +42,7 @@ static int hang_init(struct hang *h, struct intel_gt *gt) memset(h, 0, sizeof(*h)); h->gt = gt; - h->ctx = kernel_context(gt->i915); + h->ctx = kernel_context(gt->i915, NULL); if (IS_ERR(h->ctx)) return PTR_ERR(h->ctx); -- cgit v1.2.3 From f92906e220f1f130995a67817cfec7f305a55bfc Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:33 -0500 Subject: i915/gem/selftests: Assign the VM at context creation in igt_shared_ctx_exec We want to delete __assign_ppgtt and, generally, stop setting the VM after context creation. This is the one place I could find in the selftests where we set a VM after the fact. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-29-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 3e59746afdc8..8eb5050f8cb3 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -813,16 +813,12 @@ static int igt_shared_ctx_exec(void *arg) struct i915_gem_context *ctx; struct intel_context *ce; - ctx = kernel_context(i915, NULL); + ctx = kernel_context(i915, ctx_vm(parent)); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out_test; } - mutex_lock(&ctx->mutex); - __assign_ppgtt(ctx, ctx_vm(parent)); - mutex_unlock(&ctx->mutex); - ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); GEM_BUG_ON(IS_ERR(ce)); -- cgit v1.2.3 From 0eee9977f9d3d8f1e40175dada55b3d00121ac79 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:34 -0500 Subject: drm/i915/gem: Roll all of context creation together Now that we have the whole engine set and VM at context creation time, we can just assign those fields instead of creating first and handling the VM and engines later. This lets us avoid creating useless VMs and engine sets and lets us get rid of the complex VM setting code. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-30-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 176 ++++++++-------------- drivers/gpu/drm/i915/gem/selftests/mock_context.c | 33 ++-- 2 files changed, 73 insertions(+), 136 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 5f5375b15c53..c67e305f5bc7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1279,56 +1279,6 @@ static int __context_set_persistence(struct i915_gem_context *ctx, bool state) return 0; } -static struct i915_gem_context * -__create_context(struct drm_i915_private *i915, - const struct i915_gem_proto_context *pc) -{ - struct i915_gem_context *ctx; - struct i915_gem_engines *e; - int err; - int i; - - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return ERR_PTR(-ENOMEM); - - kref_init(&ctx->ref); - ctx->i915 = i915; - ctx->sched = pc->sched; - mutex_init(&ctx->mutex); - INIT_LIST_HEAD(&ctx->link); - - spin_lock_init(&ctx->stale.lock); - INIT_LIST_HEAD(&ctx->stale.engines); - - mutex_init(&ctx->engines_mutex); - e = default_engines(ctx, pc->legacy_rcs_sseu); - if (IS_ERR(e)) { - err = PTR_ERR(e); - goto err_free; - } - RCU_INIT_POINTER(ctx->engines, e); - - INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); - mutex_init(&ctx->lut_mutex); - - /* NB: Mark all slices as needing a remap so that when the context first - * loads it will restore whatever remap state already exists. If there - * is no remap info, it will be a NOP. */ - ctx->remap_slice = ALL_L3_SLICES(i915); - - ctx->user_flags = pc->user_flags; - - for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) - ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; - - return ctx; - -err_free: - kfree(ctx); - return ERR_PTR(err); -} - static inline struct i915_gem_engines * __context_engines_await(const struct i915_gem_context *ctx, bool *user_engines) @@ -1372,54 +1322,31 @@ context_apply_all(struct i915_gem_context *ctx, i915_sw_fence_complete(&e->fence); } -static void __apply_ppgtt(struct intel_context *ce, void *vm) -{ - i915_vm_put(ce->vm); - ce->vm = i915_vm_get(vm); -} - -static struct i915_address_space * -__set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm) -{ - struct i915_address_space *old; - - old = rcu_replace_pointer(ctx->vm, - i915_vm_open(vm), - lockdep_is_held(&ctx->mutex)); - GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old)); - - context_apply_all(ctx, __apply_ppgtt, vm); - - return old; -} - -static void __assign_ppgtt(struct i915_gem_context *ctx, - struct i915_address_space *vm) -{ - if (vm == rcu_access_pointer(ctx->vm)) - return; - - vm = __set_ppgtt(ctx, vm); - if (vm) - i915_vm_close(vm); -} - static struct i915_gem_context * i915_gem_create_context(struct drm_i915_private *i915, const struct i915_gem_proto_context *pc) { struct i915_gem_context *ctx; - int ret; + struct i915_address_space *vm = NULL; + struct i915_gem_engines *e; + int err; + int i; - ctx = __create_context(i915, pc); - if (IS_ERR(ctx)) - return ctx; + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return ERR_PTR(-ENOMEM); + + kref_init(&ctx->ref); + ctx->i915 = i915; + ctx->sched = pc->sched; + mutex_init(&ctx->mutex); + INIT_LIST_HEAD(&ctx->link); + + spin_lock_init(&ctx->stale.lock); + INIT_LIST_HEAD(&ctx->stale.engines); if (pc->vm) { - /* __assign_ppgtt() requires this mutex to be held */ - mutex_lock(&ctx->mutex); - __assign_ppgtt(ctx, pc->vm); - mutex_unlock(&ctx->mutex); + vm = i915_vm_get(pc->vm); } else if (HAS_FULL_PPGTT(i915)) { struct i915_ppgtt *ppgtt; @@ -1427,50 +1354,65 @@ i915_gem_create_context(struct drm_i915_private *i915, if (IS_ERR(ppgtt)) { drm_dbg(&i915->drm, "PPGTT setup failed (%ld)\n", PTR_ERR(ppgtt)); - context_close(ctx); - return ERR_CAST(ppgtt); + err = PTR_ERR(ppgtt); + goto err_ctx; } + vm = &ppgtt->vm; + } + if (vm) { + RCU_INIT_POINTER(ctx->vm, i915_vm_open(vm)); - /* __assign_ppgtt() requires this mutex to be held */ - mutex_lock(&ctx->mutex); - __assign_ppgtt(ctx, &ppgtt->vm); - mutex_unlock(&ctx->mutex); - - /* __assign_ppgtt() takes another reference for us */ - i915_vm_put(&ppgtt->vm); + /* i915_vm_open() takes a reference */ + i915_vm_put(vm); } + mutex_init(&ctx->engines_mutex); if (pc->num_user_engines >= 0) { - struct i915_gem_engines *engines; + i915_gem_context_set_user_engines(ctx); + e = user_engines(ctx, pc->num_user_engines, pc->user_engines); + } else { + i915_gem_context_clear_user_engines(ctx); + e = default_engines(ctx, pc->legacy_rcs_sseu); + } + if (IS_ERR(e)) { + err = PTR_ERR(e); + goto err_vm; + } + RCU_INIT_POINTER(ctx->engines, e); - engines = user_engines(ctx, pc->num_user_engines, - pc->user_engines); - if (IS_ERR(engines)) { - context_close(ctx); - return ERR_CAST(engines); - } + INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); + mutex_init(&ctx->lut_mutex); - mutex_lock(&ctx->engines_mutex); - i915_gem_context_set_user_engines(ctx); - engines = rcu_replace_pointer(ctx->engines, engines, 1); - mutex_unlock(&ctx->engines_mutex); + /* NB: Mark all slices as needing a remap so that when the context first + * loads it will restore whatever remap state already exists. If there + * is no remap info, it will be a NOP. */ + ctx->remap_slice = ALL_L3_SLICES(i915); - free_engines(engines); - } + ctx->user_flags = pc->user_flags; + + for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) + ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; if (pc->single_timeline) { - ret = drm_syncobj_create(&ctx->syncobj, + err = drm_syncobj_create(&ctx->syncobj, DRM_SYNCOBJ_CREATE_SIGNALED, NULL); - if (ret) { - context_close(ctx); - return ERR_PTR(ret); - } + if (err) + goto err_engines; } trace_i915_context_create(ctx); return ctx; + +err_engines: + free_engines(e); +err_vm: + if (ctx->vm) + i915_vm_close(ctx->vm); +err_ctx: + kfree(ctx); + return ERR_PTR(err); } static void init_contexts(struct i915_gem_contexts *gc) diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index 500ef27ba477..fee070df1c97 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -31,15 +31,6 @@ mock_context(struct drm_i915_private *i915, i915_gem_context_set_persistence(ctx); - mutex_init(&ctx->engines_mutex); - e = default_engines(ctx, null_sseu); - if (IS_ERR(e)) - goto err_free; - RCU_INIT_POINTER(ctx->engines, e); - - INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); - mutex_init(&ctx->lut_mutex); - if (name) { struct i915_ppgtt *ppgtt; @@ -47,25 +38,29 @@ mock_context(struct drm_i915_private *i915, ppgtt = mock_ppgtt(i915, name); if (!ppgtt) - goto err_put; - - mutex_lock(&ctx->mutex); - __set_ppgtt(ctx, &ppgtt->vm); - mutex_unlock(&ctx->mutex); + goto err_free; + ctx->vm = i915_vm_open(&ppgtt->vm); i915_vm_put(&ppgtt->vm); } + mutex_init(&ctx->engines_mutex); + e = default_engines(ctx, null_sseu); + if (IS_ERR(e)) + goto err_vm; + RCU_INIT_POINTER(ctx->engines, e); + + INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); + mutex_init(&ctx->lut_mutex); + return ctx; +err_vm: + if (ctx->vm) + i915_vm_close(ctx->vm); err_free: kfree(ctx); return NULL; - -err_put: - i915_gem_context_set_closed(ctx); - i915_gem_context_put(ctx); - return NULL; } void mock_context_close(struct i915_gem_context *ctx) -- cgit v1.2.3 From ca06f93638362bf83584cdf33897822bf1578cf9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:35 -0500 Subject: drm/i915: Finalize contexts in GEM_CONTEXT_CREATE on version 13+ All the proto-context stuff for context creation exists to allow older userspace drivers to set VMs and engine sets via SET_CONTEXT_PARAM. Drivers need to update to use CONTEXT_CREATE_EXT_* for this going forward. Force the issue by blocking the old mechanism on any future hardware generations. Signed-off-by: Jason Ekstrand Cc: Jon Bloomfield Cc: Carl Zhang Cc: Michal Mrozek Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-31-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 39 ++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index c67e305f5bc7..7d6f52d8a801 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1996,9 +1996,28 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, goto err_pc; } - ret = proto_context_register(ext_data.fpriv, ext_data.pc, &id); - if (ret < 0) - goto err_pc; + if (GRAPHICS_VER(i915) > 12) { + struct i915_gem_context *ctx; + + /* Get ourselves a context ID */ + ret = xa_alloc(&ext_data.fpriv->context_xa, &id, NULL, + xa_limit_32b, GFP_KERNEL); + if (ret) + goto err_pc; + + ctx = i915_gem_create_context(i915, ext_data.pc); + if (IS_ERR(ctx)) { + ret = PTR_ERR(ctx); + goto err_pc; + } + + proto_context_close(ext_data.pc); + gem_context_register(ctx, ext_data.fpriv, id); + } else { + ret = proto_context_register(ext_data.fpriv, ext_data.pc, &id); + if (ret < 0) + goto err_pc; + } args->ctx_id = id; drm_dbg(&i915->drm, "HW context %d created\n", args->ctx_id); @@ -2181,15 +2200,17 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, mutex_lock(&file_priv->proto_context_lock); ctx = __context_lookup(file_priv, args->ctx_id); if (!ctx) { - /* FIXME: We should consider disallowing SET_CONTEXT_PARAM - * for most things on future platforms. Clients should be - * using CONTEXT_CREATE_EXT_PARAM instead. - */ pc = xa_load(&file_priv->proto_context_xa, args->ctx_id); - if (pc) + if (pc) { + /* Contexts should be finalized inside + * GEM_CONTEXT_CREATE starting with graphics + * version 13. + */ + WARN_ON(GRAPHICS_VER(file_priv->dev_priv) > 12); ret = set_proto_ctx_param(file_priv, pc, args); - else + } else { ret = -ENOENT; + } } mutex_unlock(&file_priv->proto_context_lock); -- cgit v1.2.3 From b3f450d9e1f386cda3e15f76efcb1b763d9a2856 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 5 Jul 2021 14:53:06 +0100 Subject: drm/i915: use consistent CPU mappings for pin_map users MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For discrete, users of pin_map() needs to obey the same rules at the TTM backend, where we map system only objects as WB, and everything else as WC. The simplest for now is to just force the correct mapping type as per the new rules for discrete. Suggested-by: Thomas Hellström Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Maarten Lankhorst Cc: Daniel Vetter Cc: Ramalingam C Reviewed-by: Ramalingam C Link: https://patchwork.freedesktop.org/patch/msgid/20210705135310.1502437-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 34 ++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gem/i915_gem_object.h | 4 ++++ drivers/gpu/drm/i915/gem/i915_gem_pages.c | 31 +++++++++++++++++++++++++-- 3 files changed, 67 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 547cc9dad90d..9da7b288b7ed 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -625,6 +625,40 @@ int i915_gem_object_migrate(struct drm_i915_gem_object *obj, return obj->ops->migrate(obj, mr); } +/** + * i915_gem_object_placement_possible - Check whether the object can be + * placed at certain memory type + * @obj: Pointer to the object + * @type: The memory type to check + * + * Return: True if the object can be placed in @type. False otherwise. + */ +bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj, + enum intel_memory_type type) +{ + unsigned int i; + + if (!obj->mm.n_placements) { + switch (type) { + case INTEL_MEMORY_LOCAL: + return i915_gem_object_has_iomem(obj); + case INTEL_MEMORY_SYSTEM: + return i915_gem_object_has_pages(obj); + default: + /* Ignore stolen for now */ + GEM_BUG_ON(1); + return false; + } + } + + for (i = 0; i < obj->mm.n_placements; i++) { + if (obj->mm.placements[i]->type == type) + return true; + } + + return false; +} + void i915_gem_init__objects(struct drm_i915_private *i915) { INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index d423d8cac4f2..8be4fadeee48 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -12,6 +12,7 @@ #include #include "display/intel_frontbuffer.h" +#include "intel_memory_region.h" #include "i915_gem_object_types.h" #include "i915_gem_gtt.h" #include "i915_gem_ww.h" @@ -607,6 +608,9 @@ bool i915_gem_object_can_migrate(struct drm_i915_gem_object *obj, int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj, unsigned int flags); +bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj, + enum intel_memory_type type); + #ifdef CONFIG_MMU_NOTIFIER static inline bool i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index f2f850e31b8e..0c9d28423d45 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -321,8 +321,7 @@ static void *i915_gem_object_map_pfn(struct drm_i915_gem_object *obj, dma_addr_t addr; void *vaddr; - if (type != I915_MAP_WC) - return ERR_PTR(-ENODEV); + GEM_BUG_ON(type != I915_MAP_WC); if (n_pfn > ARRAY_SIZE(stack)) { /* Too big for stack -- allocate temporary array instead */ @@ -374,6 +373,34 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, } GEM_BUG_ON(!i915_gem_object_has_pages(obj)); + /* + * For discrete our CPU mappings needs to be consistent in order to + * function correctly on !x86. When mapping things through TTM, we use + * the same rules to determine the caching type. + * + * The caching rules, starting from DG1: + * + * - If the object can be placed in device local-memory, then the + * pages should be allocated and mapped as write-combined only. + * + * - Everything else is always allocated and mapped as write-back, + * with the guarantee that everything is also coherent with the + * GPU. + * + * Internal users of lmem are already expected to get this right, so no + * fudging needed there. + */ + if (i915_gem_object_placement_possible(obj, INTEL_MEMORY_LOCAL)) { + if (type != I915_MAP_WC && !obj->mm.n_placements) { + ptr = ERR_PTR(-ENODEV); + goto err_unpin; + } + + type = I915_MAP_WC; + } else if (IS_DGFX(to_i915(obj->base.dev))) { + type = I915_MAP_WB; + } + ptr = page_unpack_bits(obj->mm.mapping, &has_type); if (ptr && has_type != type) { if (pinned) { -- cgit v1.2.3 From 78d2ad7eb4e1f0e9cd5d79788446b6092c21d3e0 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 30 Jun 2021 19:44:13 +0300 Subject: drm/i915/gt: Fix -EDEADLK handling regression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The conversion to ww mutexes failed to address the fence code which already returns -EDEADLK when we run out of fences. Ww mutexes on the other hand treat -EDEADLK as an internal errno value indicating a need to restart the operation due to a deadlock. So now when the fence code returns -EDEADLK the higher level code erroneously restarts everything instead of returning the error to userspace as is expected. To remedy this let's switch the fence code to use a different errno value for this. -ENOBUFS seems like a semi-reasonable unique choice. Apart from igt the only user of this I could find is sna, and even there all we do is dump the current fence registers from debugfs into the X server log. So no user visible functionality is affected. If we really cared about preserving this we could of course convert back to -EDEADLK higher up, but doesn't seem like that's worth the hassle here. Not quite sure which commit specifically broke this, but I'll just attribute it to the general gem ww mutex work. Cc: stable@vger.kernel.org Cc: Maarten Lankhorst Cc: Thomas Hellström Testcase: igt/gem_pread/exhaustion Testcase: igt/gem_pwrite/basic-exhaustion Testcase: igt/gem_fenced_exec_thrash/too-many-fences Fixes: 80f0b679d6f0 ("drm/i915: Add an implementation for i915_gem_ww_ctx locking, v2.") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210630164413.25481-1-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c index cac7f3f44642..f8948de72036 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c @@ -348,7 +348,7 @@ static struct i915_fence_reg *fence_find(struct i915_ggtt *ggtt) if (intel_has_pending_fb_unpin(ggtt->vm.i915)) return ERR_PTR(-EAGAIN); - return ERR_PTR(-EDEADLK); + return ERR_PTR(-ENOBUFS); } int __i915_vma_pin_fence(struct i915_vma *vma) -- cgit v1.2.3 From c492405860f78263e3a359c0a88385957a2729e9 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Mon, 12 Jul 2021 17:38:50 -0700 Subject: drm/i915: Settle on "adl-x" in WA comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most of the places are using this format so lets consolidate it. v2: - split patch in two: display and non-display because of conflicts between drm-intel-gt-next x drm-intel-next Reviewed-by: Matt Roper Signed-off-by: José Roberto de Souza Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20210713003854.143197-2-jose.souza@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 2 +- drivers/gpu/drm/i915/intel_pm.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index d9a5a445ceec..e5e3f820074a 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1078,7 +1078,7 @@ gen12_gt_workarounds_init(struct drm_i915_private *i915, { icl_wa_init_mcr(i915, wal); - /* Wa_14011060649:tgl,rkl,dg1,adls,adl-p */ + /* Wa_14011060649:tgl,rkl,dg1,adl-s,adl-p */ wa_14011060649(i915, wal); } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index ef8d9b2284b3..37d6791e04c9 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7351,7 +7351,7 @@ static void icl_init_clock_gating(struct drm_i915_private *dev_priv) static void gen12lp_init_clock_gating(struct drm_i915_private *dev_priv) { - /* Wa_1409120013:tgl,rkl,adl_s,dg1 */ + /* Wa_1409120013:tgl,rkl,adl-s,dg1 */ if (IS_TIGERLAKE(dev_priv) || IS_ROCKETLAKE(dev_priv) || IS_ALDERLAKE_S(dev_priv) || IS_DG1(dev_priv)) intel_uncore_write(&dev_priv->uncore, ILK_DPFC_CHICKEN, @@ -7362,7 +7362,7 @@ static void gen12lp_init_clock_gating(struct drm_i915_private *dev_priv) intel_uncore_write(&dev_priv->uncore, GEN9_CLKGATE_DIS_3, intel_uncore_read(&dev_priv->uncore, GEN9_CLKGATE_DIS_3) | TGL_VRH_GATING_DIS); - /* Wa_14011059788:tgl,rkl,adl_s,dg1,adl-p */ + /* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */ intel_uncore_rmw(&dev_priv->uncore, GEN10_DFR_RATIO_EN_AND_CHICKEN, 0, DFR_DISABLE); -- cgit v1.2.3 From 28ec02c9cbebf3feeaf21a59df9dfbc02bda3362 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Mon, 12 Jul 2021 17:38:51 -0700 Subject: drm/i915: Implement Wa_1508744258 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Same bit was required for Wa_14012131227 in DG1 now it is also required as Wa_1508744258 to TGL, RKL, DG1, ADL-S and ADL-P. Cc: Gwan-gyeong Mun Reviewed-by: Matt Roper Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20210713003854.143197-3-jose.souza@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index e5e3f820074a..c346229e2be0 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -670,6 +670,13 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, FF_MODE2_GS_TIMER_MASK, FF_MODE2_GS_TIMER_224, 0); + + /* + * Wa_14012131227:dg1 + * Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p + */ + wa_masked_en(wal, GEN7_COMMON_SLICE_CHICKEN1, + GEN9_RHWO_OPTIMIZATION_DISABLE); } static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine, -- cgit v1.2.3 From 5d4ed4f8b5efd347d761ee98c3a6e4e42e23f67e Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Mon, 12 Jul 2021 17:38:52 -0700 Subject: drm/i915/adl_s: Extend Wa_1406941453 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BSpec: 54370 Cc: Gwan-gyeong Mun Reviewed-by: Matt Roper Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20210713003854.143197-4-jose.souza@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index c346229e2be0..72562c233ad2 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1677,8 +1677,9 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN8_RC_SEMA_IDLE_MSG_DISABLE); } - if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { - /* Wa_1406941453:tgl,rkl,dg1 */ + if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) || + IS_ALDERLAKE_S(i915)) { + /* Wa_1406941453:tgl,rkl,dg1,adl-s */ wa_masked_en(wal, GEN10_SAMPLER_MODE, ENABLE_SMALLPL); -- cgit v1.2.3 From 1ccf7294b76d28d5151f024351c747ccf101d66e Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 8 Jul 2021 09:20:49 -0700 Subject: drm/i915/guc: Relax CTB response timeout In upcoming patch we will allow more CTB requests to be sent in parallel to the GuC for processing, so we shouldn't assume any more that GuC will always reply without 10ms. Use bigger value hardcoded value of 1s instead. v2: Add CONFIG_DRM_I915_GUC_CTB_TIMEOUT config option v3: (Daniel Vetter) - Use hardcoded value of 1s rather than config option v4: (Michal) - Use defines for timeout values Signed-off-by: Matthew Brost Cc: Michal Wajdeczko Reviewed-by: Michal Wajdeczko Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210708162055.129996-2-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 43409044528e..b86575b99537 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -474,14 +474,18 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status) /* * Fast commands should complete in less than 10us, so sample quickly * up to that length of time, then switch to a slower sleep-wait loop. - * No GuC command should ever take longer than 10ms. + * No GuC command should ever take longer than 10ms but many GuC + * commands can be inflight at time, so use a 1s timeout on the slower + * sleep-wait loop. */ +#define GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS 10 +#define GUC_CTB_RESPONSE_TIMEOUT_LONG_MS 1000 #define done \ (FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) == \ GUC_HXG_ORIGIN_GUC) - err = wait_for_us(done, 10); + err = wait_for_us(done, GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS); if (err) - err = wait_for(done, 10); + err = wait_for(done, GUC_CTB_RESPONSE_TIMEOUT_LONG_MS); #undef done if (unlikely(err)) -- cgit v1.2.3 From dd9c0f3cbbe6fdfe7402b9c6ea35f04b260901bf Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 8 Jul 2021 09:20:50 -0700 Subject: drm/i915/guc: Improve error message for unsolicited CT response Improve the error message when a unsolicited CT response is received by printing fence that couldn't be found, the last fence, and all requests with a response outstanding. Signed-off-by: Matthew Brost Reviewed-by: Michal Wajdeczko Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210708162055.129996-3-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index b86575b99537..80db59b45c45 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -732,12 +732,16 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r found = true; break; } - spin_unlock_irqrestore(&ct->requests.lock, flags); - if (!found) { CT_ERROR(ct, "Unsolicited response (fence %u)\n", fence); - return -ENOKEY; + CT_ERROR(ct, "Could not find fence=%u, last_fence=%u\n", fence, + ct->requests.last_fence); + list_for_each_entry(req, &ct->requests.pending, link) + CT_ERROR(ct, "request %u awaits response\n", + req->fence); + err = -ENOKEY; } + spin_unlock_irqrestore(&ct->requests.lock, flags); if (unlikely(err)) return err; -- cgit v1.2.3 From c26e289f1d8d5b8716f825ac5d798897aca5a124 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 8 Jul 2021 09:20:51 -0700 Subject: drm/i915/guc: Increase size of CTB buffers With the introduction of non-blocking CTBs more than one CTB can be in flight at a time. Increasing the size of the CTBs should reduce how often software hits the case where no space is available in the CTB buffer. Cc: John Harrison Signed-off-by: Matthew Brost Reviewed-by: Michal Wajdeczko Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210708162055.129996-4-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 80db59b45c45..43e03aa2dde8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -58,11 +58,16 @@ static inline struct drm_device *ct_to_drm(struct intel_guc_ct *ct) * +--------+-----------------------------------------------+------+ * * Size of each `CT Buffer`_ must be multiple of 4K. - * As we don't expect too many messages, for now use minimum sizes. + * We don't expect too many messages in flight at any time, unless we are + * using the GuC submission. In that case each request requires a minimum + * 2 dwords which gives us a maximum 256 queue'd requests. Hopefully this + * enough space to avoid backpressure on the driver. We increase the size + * of the receive buffer (relative to the send) to ensure a G2H response + * CTB has a landing spot. */ #define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K) #define CTB_H2G_BUFFER_SIZE (SZ_4K) -#define CTB_G2H_BUFFER_SIZE (SZ_4K) +#define CTB_G2H_BUFFER_SIZE (4 * CTB_H2G_BUFFER_SIZE) struct ct_request { struct list_head link; @@ -643,7 +648,7 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) /* beware of buffer wrap case */ if (unlikely(available < 0)) available += size; - CT_DEBUG(ct, "available %d (%u:%u)\n", available, head, tail); + CT_DEBUG(ct, "available %d (%u:%u:%u)\n", available, head, tail, size); GEM_BUG_ON(available < 0); header = cmds[head]; -- cgit v1.2.3 From 1681924d8bdeb248451fd1d47c18648ffaeed625 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 8 Jul 2021 09:20:52 -0700 Subject: drm/i915/guc: Add non blocking CTB send function Add non blocking CTB send function, intel_guc_send_nb. GuC submission will send CTBs in the critical path and does not need to wait for these CTBs to complete before moving on, hence the need for this new function. The non-blocking CTB now must have a flow control mechanism to ensure the buffer isn't overrun. A lazy spin wait is used as we believe the flow control condition should be rare with a properly sized buffer. The function, intel_guc_send_nb, is exported in this patch but unused. Several patches later in the series make use of this function. v2: (Michal) - Use define for H2G room calculations - Move INTEL_GUC_SEND_NB define (Daniel Vetter) - Use msleep_interruptible rather than cond_resched v3: (Michal) - Move includes to following patch - s/INTEL_GUC_SEND_NB/INTEL_GUC_CT_SEND_NB/g v4: (John H) - Update comment, add type local variable Signed-off-by: John Harrison Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210708162055.129996-5-matthew.brost@intel.com --- .../drm/i915/gt/uc/abi/guc_communication_ctb_abi.h | 3 +- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 11 ++- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 88 +++++++++++++++++++--- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 4 +- 4 files changed, 91 insertions(+), 15 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h index e933ca02d0eb..99e1fad5ca20 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h @@ -79,7 +79,8 @@ static_assert(sizeof(struct guc_ct_buffer_desc) == 64); * +---+-------+--------------------------------------------------------------+ */ -#define GUC_CTB_MSG_MIN_LEN 1u +#define GUC_CTB_HDR_LEN 1u +#define GUC_CTB_MSG_MIN_LEN GUC_CTB_HDR_LEN #define GUC_CTB_MSG_MAX_LEN 256u #define GUC_CTB_MSG_0_FENCE (0xffff << 16) #define GUC_CTB_MSG_0_FORMAT (0xf << 12) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 4abc59f6f3cd..72e4653222e2 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -74,7 +74,14 @@ static inline struct intel_guc *log_to_guc(struct intel_guc_log *log) static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) { - return intel_guc_ct_send(&guc->ct, action, len, NULL, 0); + return intel_guc_ct_send(&guc->ct, action, len, NULL, 0, 0); +} + +static +inline int intel_guc_send_nb(struct intel_guc *guc, const u32 *action, u32 len) +{ + return intel_guc_ct_send(&guc->ct, action, len, NULL, 0, + INTEL_GUC_CT_SEND_NB); } static inline int @@ -82,7 +89,7 @@ intel_guc_send_and_receive(struct intel_guc *guc, const u32 *action, u32 len, u32 *response_buf, u32 response_buf_size) { return intel_guc_ct_send(&guc->ct, action, len, - response_buf, response_buf_size); + response_buf, response_buf_size, 0); } static inline void intel_guc_to_host_event_handler(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 43e03aa2dde8..3d6cba8d91ad 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -3,6 +3,8 @@ * Copyright © 2016-2019 Intel Corporation */ +#include + #include "i915_drv.h" #include "intel_guc_ct.h" #include "gt/intel_gt.h" @@ -373,7 +375,7 @@ static void write_barrier(struct intel_guc_ct *ct) static int ct_write(struct intel_guc_ct *ct, const u32 *action, u32 len /* in dwords */, - u32 fence) + u32 fence, u32 flags) { struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; struct guc_ct_buffer_desc *desc = ctb->desc; @@ -383,6 +385,7 @@ static int ct_write(struct intel_guc_ct *ct, u32 used; u32 header; u32 hxg; + u32 type; u32 *cmds = ctb->cmds; unsigned int i; @@ -408,8 +411,8 @@ static int ct_write(struct intel_guc_ct *ct, else used = tail - head; - /* make sure there is a space including extra dw for the fence */ - if (unlikely(used + len + 1 >= size)) + /* make sure there is a space including extra dw for the header */ + if (unlikely(used + len + GUC_CTB_HDR_LEN >= size)) return -ENOSPC; /* @@ -421,9 +424,11 @@ static int ct_write(struct intel_guc_ct *ct, FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) | FIELD_PREP(GUC_CTB_MSG_0_FENCE, fence); - hxg = FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | - FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION | - GUC_HXG_REQUEST_MSG_0_DATA0, action[0]); + type = (flags & INTEL_GUC_CT_SEND_NB) ? GUC_HXG_TYPE_EVENT : + GUC_HXG_TYPE_REQUEST; + hxg = FIELD_PREP(GUC_HXG_MSG_0_TYPE, type) | + FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | + GUC_HXG_EVENT_MSG_0_DATA0, action[0]); CT_DEBUG(ct, "writing (tail %u) %*ph %*ph %*ph\n", tail, 4, &header, 4, &hxg, 4 * (len - 1), &action[1]); @@ -500,6 +505,48 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status) return err; } +static inline bool h2g_has_room(struct intel_guc_ct_buffer *ctb, u32 len_dw) +{ + struct guc_ct_buffer_desc *desc = ctb->desc; + u32 head = READ_ONCE(desc->head); + u32 space; + + space = CIRC_SPACE(desc->tail, head, ctb->size); + + return space >= len_dw; +} + +static int ct_send_nb(struct intel_guc_ct *ct, + const u32 *action, + u32 len, + u32 flags) +{ + struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; + unsigned long spin_flags; + u32 fence; + int ret; + + spin_lock_irqsave(&ctb->lock, spin_flags); + + ret = h2g_has_room(ctb, len + GUC_CTB_HDR_LEN); + if (unlikely(!ret)) { + ret = -EBUSY; + goto out; + } + + fence = ct_get_next_fence(ct); + ret = ct_write(ct, action, len, fence, flags); + if (unlikely(ret)) + goto out; + + intel_guc_notify(ct_to_guc(ct)); + +out: + spin_unlock_irqrestore(&ctb->lock, spin_flags); + + return ret; +} + static int ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, @@ -507,8 +554,10 @@ static int ct_send(struct intel_guc_ct *ct, u32 response_buf_size, u32 *status) { + struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; struct ct_request request; unsigned long flags; + unsigned int sleep_period_ms = 1; u32 fence; int err; @@ -516,8 +565,24 @@ static int ct_send(struct intel_guc_ct *ct, GEM_BUG_ON(!len); GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK); GEM_BUG_ON(!response_buf && response_buf_size); + might_sleep(); + + /* + * We use a lazy spin wait loop here as we believe that if the CT + * buffers are sized correctly the flow control condition should be + * rare. + */ +retry: + spin_lock_irqsave(&ctb->lock, flags); + if (unlikely(!h2g_has_room(ctb, len + GUC_CTB_HDR_LEN))) { + spin_unlock_irqrestore(&ctb->lock, flags); - spin_lock_irqsave(&ct->ctbs.send.lock, flags); + if (msleep_interruptible(sleep_period_ms)) + return -EINTR; + sleep_period_ms = sleep_period_ms << 1; + + goto retry; + } fence = ct_get_next_fence(ct); request.fence = fence; @@ -529,9 +594,9 @@ static int ct_send(struct intel_guc_ct *ct, list_add_tail(&request.link, &ct->requests.pending); spin_unlock(&ct->requests.lock); - err = ct_write(ct, action, len, fence); + err = ct_write(ct, action, len, fence, 0); - spin_unlock_irqrestore(&ct->ctbs.send.lock, flags); + spin_unlock_irqrestore(&ctb->lock, flags); if (unlikely(err)) goto unlink; @@ -571,7 +636,7 @@ unlink: * Command Transport (CT) buffer based GuC send function. */ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, - u32 *response_buf, u32 response_buf_size) + u32 *response_buf, u32 response_buf_size, u32 flags) { u32 status = ~0; /* undefined */ int ret; @@ -581,6 +646,9 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, return -ENODEV; } + if (flags & INTEL_GUC_CT_SEND_NB) + return ct_send_nb(ct, action, len, flags); + ret = ct_send(ct, action, len, response_buf, response_buf_size, &status); if (unlikely(ret < 0)) { CT_ERROR(ct, "Sending action %#x failed (err=%d status=%#X)\n", diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h index 1ae2dde6db93..5bb8bef024c8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h @@ -42,7 +42,6 @@ struct intel_guc_ct_buffer { bool broken; }; - /** Top-level structure for Command Transport related data * * Includes a pair of CT buffers for bi-directional communication and tracking @@ -87,8 +86,9 @@ static inline bool intel_guc_ct_enabled(struct intel_guc_ct *ct) return ct->enabled; } +#define INTEL_GUC_CT_SEND_NB BIT(31) int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, - u32 *response_buf, u32 response_buf_size); + u32 *response_buf, u32 response_buf_size, u32 flags); void intel_guc_ct_event_handler(struct intel_guc_ct *ct); #endif /* _INTEL_GUC_CT_H_ */ -- cgit v1.2.3 From b43b9950486eb9b229493fc91cdabbbb4d07cfbc Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 8 Jul 2021 09:20:53 -0700 Subject: drm/i915/guc: Add stall timer to non blocking CTB send function Implement a stall timer which fails H2G CTBs once a period of time with no forward progress is reached to prevent deadlock. v2: (Michal) - Improve error message in ct_deadlock() - Set broken when ct_deadlock() returns true - Return -EPIPE on ct_deadlock() v3: (Michal) - Add ms to stall timer comment (Matthew) - Move broken check to intel_guc_ct_send() Signed-off-by: John Harrison Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210708162055.129996-6-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 62 +++++++++++++++++++++++++++---- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 4 ++ 2 files changed, 59 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 3d6cba8d91ad..db3e85b89573 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -4,6 +4,9 @@ */ #include +#include +#include +#include #include "i915_drv.h" #include "intel_guc_ct.h" @@ -316,6 +319,7 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct) goto err_deregister; ct->enabled = true; + ct->stall_time = KTIME_MAX; return 0; @@ -389,9 +393,6 @@ static int ct_write(struct intel_guc_ct *ct, u32 *cmds = ctb->cmds; unsigned int i; - if (unlikely(ctb->broken)) - return -EPIPE; - if (unlikely(desc->status)) goto corrupted; @@ -505,6 +506,25 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status) return err; } +#define GUC_CTB_TIMEOUT_MS 1500 +static inline bool ct_deadlocked(struct intel_guc_ct *ct) +{ + long timeout = GUC_CTB_TIMEOUT_MS; + bool ret = ktime_ms_delta(ktime_get(), ct->stall_time) > timeout; + + if (unlikely(ret)) { + struct guc_ct_buffer_desc *send = ct->ctbs.send.desc; + struct guc_ct_buffer_desc *recv = ct->ctbs.send.desc; + + CT_ERROR(ct, "Communication stalled for %lld ms, desc status=%#x,%#x\n", + ktime_ms_delta(ktime_get(), ct->stall_time), + send->status, recv->status); + ct->ctbs.send.broken = true; + } + + return ret; +} + static inline bool h2g_has_room(struct intel_guc_ct_buffer *ctb, u32 len_dw) { struct guc_ct_buffer_desc *desc = ctb->desc; @@ -516,6 +536,26 @@ static inline bool h2g_has_room(struct intel_guc_ct_buffer *ctb, u32 len_dw) return space >= len_dw; } +static int has_room_nb(struct intel_guc_ct *ct, u32 len_dw) +{ + struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; + + lockdep_assert_held(&ct->ctbs.send.lock); + + if (unlikely(!h2g_has_room(ctb, len_dw))) { + if (ct->stall_time == KTIME_MAX) + ct->stall_time = ktime_get(); + + if (unlikely(ct_deadlocked(ct))) + return -EPIPE; + else + return -EBUSY; + } + + ct->stall_time = KTIME_MAX; + return 0; +} + static int ct_send_nb(struct intel_guc_ct *ct, const u32 *action, u32 len, @@ -528,11 +568,9 @@ static int ct_send_nb(struct intel_guc_ct *ct, spin_lock_irqsave(&ctb->lock, spin_flags); - ret = h2g_has_room(ctb, len + GUC_CTB_HDR_LEN); - if (unlikely(!ret)) { - ret = -EBUSY; + ret = has_room_nb(ct, len + GUC_CTB_HDR_LEN); + if (unlikely(ret)) goto out; - } fence = ct_get_next_fence(ct); ret = ct_write(ct, action, len, fence, flags); @@ -575,8 +613,13 @@ static int ct_send(struct intel_guc_ct *ct, retry: spin_lock_irqsave(&ctb->lock, flags); if (unlikely(!h2g_has_room(ctb, len + GUC_CTB_HDR_LEN))) { + if (ct->stall_time == KTIME_MAX) + ct->stall_time = ktime_get(); spin_unlock_irqrestore(&ctb->lock, flags); + if (unlikely(ct_deadlocked(ct))) + return -EPIPE; + if (msleep_interruptible(sleep_period_ms)) return -EINTR; sleep_period_ms = sleep_period_ms << 1; @@ -584,6 +627,8 @@ retry: goto retry; } + ct->stall_time = KTIME_MAX; + fence = ct_get_next_fence(ct); request.fence = fence; request.status = 0; @@ -646,6 +691,9 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, return -ENODEV; } + if (unlikely(ct->ctbs.send.broken)) + return -EPIPE; + if (flags & INTEL_GUC_CT_SEND_NB) return ct_send_nb(ct, action, len, flags); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h index 5bb8bef024c8..bee03794c1eb 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h @@ -9,6 +9,7 @@ #include #include #include +#include #include "intel_guc_fwif.h" @@ -68,6 +69,9 @@ struct intel_guc_ct { struct list_head incoming; /* incoming requests */ struct work_struct worker; /* handler for incoming requests */ } requests; + + /** @stall_time: time of first time a CTB submission is stalled */ + ktime_t stall_time; }; void intel_guc_ct_init_early(struct intel_guc_ct *ct); -- cgit v1.2.3 From 75452167a2794c302c7cfd98d3aaa374ec548fe0 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 8 Jul 2021 09:20:54 -0700 Subject: drm/i915/guc: Optimize CTB writes and reads CTB writes are now in the path of command submission and should be optimized for performance. Rather than reading CTB descriptor values (e.g. head, tail) which could result in accesses across the PCIe bus, store shadow local copies and only read/write the descriptor values when absolutely necessary. Also store the current space in the each channel locally. v2: (Michal) - Add additional sanity checks for head / tail pointers - Use GUC_CTB_HDR_LEN rather than magic 1 v3: (Michal / John H) - Drop redundant check of head value v4: (John H) - Drop redundant checks of tail / head values v5: (Michal) - Address more nits v6: (Michal) - Add GEM_BUG_ON sanity check on ctb->space Signed-off-by: John Harrison Signed-off-by: Matthew Brost Reviewed-by: Michal Wajdeczko Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210708162055.129996-7-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 93 ++++++++++++++++++++----------- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 6 ++ 2 files changed, 67 insertions(+), 32 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index db3e85b89573..ad33708c2818 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -130,6 +130,10 @@ static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc) static void guc_ct_buffer_reset(struct intel_guc_ct_buffer *ctb) { ctb->broken = false; + ctb->tail = 0; + ctb->head = 0; + ctb->space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size); + guc_ct_buffer_desc_init(ctb->desc); } @@ -383,10 +387,8 @@ static int ct_write(struct intel_guc_ct *ct, { struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; struct guc_ct_buffer_desc *desc = ctb->desc; - u32 head = desc->head; - u32 tail = desc->tail; + u32 tail = ctb->tail; u32 size = ctb->size; - u32 used; u32 header; u32 hxg; u32 type; @@ -396,25 +398,22 @@ static int ct_write(struct intel_guc_ct *ct, if (unlikely(desc->status)) goto corrupted; - if (unlikely((tail | head) >= size)) { - CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n", - head, tail, size); + GEM_BUG_ON(tail > size); + +#ifdef CONFIG_DRM_I915_DEBUG_GUC + if (unlikely(tail != READ_ONCE(desc->tail))) { + CT_ERROR(ct, "Tail was modified %u != %u\n", + desc->tail, tail); + desc->status |= GUC_CTB_STATUS_MISMATCH; + goto corrupted; + } + if (unlikely(READ_ONCE(desc->head) >= size)) { + CT_ERROR(ct, "Invalid head offset %u >= %u)\n", + desc->head, size); desc->status |= GUC_CTB_STATUS_OVERFLOW; goto corrupted; } - - /* - * tail == head condition indicates empty. GuC FW does not support - * using up the entire buffer to get tail == head meaning full. - */ - if (tail < head) - used = (size - head) + tail; - else - used = tail - head; - - /* make sure there is a space including extra dw for the header */ - if (unlikely(used + len + GUC_CTB_HDR_LEN >= size)) - return -ENOSPC; +#endif /* * dw0: CT header (including fence) @@ -452,6 +451,11 @@ static int ct_write(struct intel_guc_ct *ct, */ write_barrier(ct); + /* update local copies */ + ctb->tail = tail; + GEM_BUG_ON(ctb->space < len + GUC_CTB_HDR_LEN); + ctb->space -= len + GUC_CTB_HDR_LEN; + /* now update descriptor */ WRITE_ONCE(desc->tail, tail); @@ -469,7 +473,7 @@ corrupted: * @req: pointer to pending request * @status: placeholder for status * - * For each sent request, Guc shall send bac CT response message. + * For each sent request, GuC shall send back CT response message. * Our message handler will update status of tracked request once * response message with given fence is received. Wait here and * check for valid response status value. @@ -525,24 +529,36 @@ static inline bool ct_deadlocked(struct intel_guc_ct *ct) return ret; } -static inline bool h2g_has_room(struct intel_guc_ct_buffer *ctb, u32 len_dw) +static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw) { + struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; struct guc_ct_buffer_desc *desc = ctb->desc; - u32 head = READ_ONCE(desc->head); + u32 head; u32 space; - space = CIRC_SPACE(desc->tail, head, ctb->size); + if (ctb->space >= len_dw) + return true; + + head = READ_ONCE(desc->head); + if (unlikely(head > ctb->size)) { + CT_ERROR(ct, "Invalid head offset %u >= %u)\n", + head, ctb->size); + desc->status |= GUC_CTB_STATUS_OVERFLOW; + ctb->broken = true; + return false; + } + + space = CIRC_SPACE(ctb->tail, head, ctb->size); + ctb->space = space; return space >= len_dw; } static int has_room_nb(struct intel_guc_ct *ct, u32 len_dw) { - struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; - lockdep_assert_held(&ct->ctbs.send.lock); - if (unlikely(!h2g_has_room(ctb, len_dw))) { + if (unlikely(!h2g_has_room(ct, len_dw))) { if (ct->stall_time == KTIME_MAX) ct->stall_time = ktime_get(); @@ -612,7 +628,7 @@ static int ct_send(struct intel_guc_ct *ct, */ retry: spin_lock_irqsave(&ctb->lock, flags); - if (unlikely(!h2g_has_room(ctb, len + GUC_CTB_HDR_LEN))) { + if (unlikely(!h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) { if (ct->stall_time == KTIME_MAX) ct->stall_time = ktime_get(); spin_unlock_irqrestore(&ctb->lock, flags); @@ -732,8 +748,8 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) { struct intel_guc_ct_buffer *ctb = &ct->ctbs.recv; struct guc_ct_buffer_desc *desc = ctb->desc; - u32 head = desc->head; - u32 tail = desc->tail; + u32 head = ctb->head; + u32 tail = READ_ONCE(desc->tail); u32 size = ctb->size; u32 *cmds = ctb->cmds; s32 available; @@ -747,9 +763,19 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) if (unlikely(desc->status)) goto corrupted; - if (unlikely((tail | head) >= size)) { - CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n", - head, tail, size); + GEM_BUG_ON(head > size); + +#ifdef CONFIG_DRM_I915_DEBUG_GUC + if (unlikely(head != READ_ONCE(desc->head))) { + CT_ERROR(ct, "Head was modified %u != %u\n", + desc->head, head); + desc->status |= GUC_CTB_STATUS_MISMATCH; + goto corrupted; + } +#endif + if (unlikely(tail >= size)) { + CT_ERROR(ct, "Invalid tail offset %u >= %u)\n", + tail, size); desc->status |= GUC_CTB_STATUS_OVERFLOW; goto corrupted; } @@ -802,6 +828,9 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) } CT_DEBUG(ct, "received %*ph\n", 4 * len, (*msg)->msg); + /* update local copies */ + ctb->head = head; + /* now update descriptor */ WRITE_ONCE(desc->head, head); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h index bee03794c1eb..edd1bba0445d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h @@ -33,6 +33,9 @@ struct intel_guc; * @desc: pointer to the buffer descriptor * @cmds: pointer to the commands buffer * @size: size of the commands buffer in dwords + * @head: local shadow copy of head in dwords + * @tail: local shadow copy of tail in dwords + * @space: local shadow copy of space in dwords * @broken: flag to indicate if descriptor data is broken */ struct intel_guc_ct_buffer { @@ -40,6 +43,9 @@ struct intel_guc_ct_buffer { struct guc_ct_buffer_desc *desc; u32 *cmds; u32 size; + u32 tail; + u32 head; + u32 space; bool broken; }; -- cgit v1.2.3 From 3101e9952bd6fbe9b2ba8bf46d153dcfad77e579 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 8 Jul 2021 09:20:55 -0700 Subject: drm/i915/guc: Module load failure test for CT buffer creation Add several module failure load inject points in the CT buffer creation code path. Signed-off-by: John Harrison Signed-off-by: Matthew Brost Reviewed-by: Michal Wajdeczko Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210708162055.129996-8-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index ad33708c2818..83ec60ea3f89 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -175,6 +175,10 @@ static int ct_register_buffer(struct intel_guc_ct *ct, u32 type, { int err; + err = i915_inject_probe_error(guc_to_gt(ct_to_guc(ct))->i915, -ENXIO); + if (unlikely(err)) + return err; + err = guc_action_register_ct_buffer(ct_to_guc(ct), type, desc_addr, buff_addr, size); if (unlikely(err)) @@ -226,6 +230,10 @@ int intel_guc_ct_init(struct intel_guc_ct *ct) u32 *cmds; int err; + err = i915_inject_probe_error(guc_to_gt(guc)->i915, -ENXIO); + if (err) + return err; + GEM_BUG_ON(ct->vma); blob_size = 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE + CTB_G2H_BUFFER_SIZE; -- cgit v1.2.3 From 8f88ca76b3942d82e2c1cea8735ec368d89ecc15 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 13 Jul 2021 14:04:31 +0100 Subject: drm/i915/gtt: drop the page table optimisation We skip filling out the pt with scratch entries if the va range covers the entire pt, since we later have to fill it with the PTEs for the object pages anyway. However this might leave open a small window where the PTEs don't point to anything valid for the HW to consume. When for example using 2M GTT pages this fill_px() showed up as being quite significant in perf measurements, and ends up being completely wasted since we ignore the pt and just use the pde directly. Anyway, currently we have our PTE construction split between alloc and insert, which is probably slightly iffy nowadays, since the alloc doesn't actually allocate anything anymore, instead it just sets up the page directories and points the PTEs at the scratch page. Later when we do the insert step we re-program the PTEs again. Better might be to squash the alloc and insert into a single step, then bringing back this optimisation(along with some others) should be possible. Fixes: 14826673247e ("drm/i915: Only initialize partially filled pagetables") Signed-off-by: Matthew Auld Cc: Jon Bloomfield Cc: Chris Wilson Cc: Daniel Vetter Cc: # v4.15+ Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210713130431.2392740-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 3d02c726c746..6e0e52eeb87a 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -303,10 +303,7 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * const vm, __i915_gem_object_pin_pages(pt->base); i915_gem_object_make_unshrinkable(pt->base); - if (lvl || - gen8_pt_count(*start, end) < I915_PDES || - intel_vgpu_active(vm->i915)) - fill_px(pt, vm->scratch[lvl]->encode); + fill_px(pt, vm->scratch[lvl]->encode); spin_lock(&pd->lock); if (likely(!pd->entry[idx])) { -- cgit v1.2.3 From db47fe727e1fc516cf60fc9ab8299605ef3c2d54 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Tue, 13 Jul 2021 12:36:24 -0700 Subject: drm/i915/step: s/_revid_tbl/_revids Simplify the stepping info array name. Cc: Jani Nikula Signed-off-by: Anusha Srivatsa Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210713193635.3390052-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_step.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/intel_step.c b/drivers/gpu/drm/i915/intel_step.c index ba9479a67521..93ccd42f2514 100644 --- a/drivers/gpu/drm/i915/intel_step.c +++ b/drivers/gpu/drm/i915/intel_step.c @@ -26,7 +26,7 @@ static const struct intel_step_info kbl_revids[] = { [7] = { .gt_step = STEP_G0, .display_step = STEP_C0 }, }; -static const struct intel_step_info tgl_uy_revid_step_tbl[] = { +static const struct intel_step_info tgl_uy_revids[] = { [0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, [1] = { .gt_step = STEP_B0, .display_step = STEP_C0 }, [2] = { .gt_step = STEP_B1, .display_step = STEP_C0 }, @@ -34,12 +34,12 @@ static const struct intel_step_info tgl_uy_revid_step_tbl[] = { }; /* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */ -static const struct intel_step_info tgl_revid_step_tbl[] = { +static const struct intel_step_info tgl_revids[] = { [0] = { .gt_step = STEP_A0, .display_step = STEP_B0 }, [1] = { .gt_step = STEP_B0, .display_step = STEP_D0 }, }; -static const struct intel_step_info adls_revid_step_tbl[] = { +static const struct intel_step_info adls_revids[] = { [0x0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, [0x1] = { .gt_step = STEP_A0, .display_step = STEP_A2 }, [0x4] = { .gt_step = STEP_B0, .display_step = STEP_B0 }, @@ -47,7 +47,7 @@ static const struct intel_step_info adls_revid_step_tbl[] = { [0xC] = { .gt_step = STEP_D0, .display_step = STEP_C0 }, }; -static const struct intel_step_info adlp_revid_step_tbl[] = { +static const struct intel_step_info adlp_revids[] = { [0x0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, [0x4] = { .gt_step = STEP_B0, .display_step = STEP_B0 }, [0x8] = { .gt_step = STEP_C0, .display_step = STEP_C0 }, @@ -62,17 +62,17 @@ void intel_step_init(struct drm_i915_private *i915) struct intel_step_info step = {}; if (IS_ALDERLAKE_P(i915)) { - revids = adlp_revid_step_tbl; - size = ARRAY_SIZE(adlp_revid_step_tbl); + revids = adlp_revids; + size = ARRAY_SIZE(adlp_revids); } else if (IS_ALDERLAKE_S(i915)) { - revids = adls_revid_step_tbl; - size = ARRAY_SIZE(adls_revid_step_tbl); + revids = adls_revids; + size = ARRAY_SIZE(adls_revids); } else if (IS_TGL_U(i915) || IS_TGL_Y(i915)) { - revids = tgl_uy_revid_step_tbl; - size = ARRAY_SIZE(tgl_uy_revid_step_tbl); + revids = tgl_uy_revids; + size = ARRAY_SIZE(tgl_uy_revids); } else if (IS_TIGERLAKE(i915)) { - revids = tgl_revid_step_tbl; - size = ARRAY_SIZE(tgl_revid_step_tbl); + revids = tgl_revids; + size = ARRAY_SIZE(tgl_revids); } else if (IS_KABYLAKE(i915)) { revids = kbl_revids; size = ARRAY_SIZE(kbl_revids); -- cgit v1.2.3 From c314b693954075791ed11dce3c68f920409b5de4 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 13 Jul 2021 12:36:25 -0700 Subject: drm/i915: Make pre-production detection use direct revid comparison Although we're converting our workarounds to use a revid->stepping lookup table, the function that detects pre-production hardware should continue to compare against PCI revision ID values directly. These are listed in the bspec as integers, so it's easier to confirm their correctness if we just use an integer literal rather than a symbolic name anyway. Bspec: 13620, 19131, 13626, 18329 Signed-off-by: Matt Roper Reviewed-by: Anusha Srivatsa Link: https://patchwork.freedesktop.org/patch/msgid/20210713193635.3390052-3-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 73de45472f60..f1dcee75751f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -271,10 +271,10 @@ static void intel_detect_preproduction_hw(struct drm_i915_private *dev_priv) bool pre = false; pre |= IS_HSW_EARLY_SDV(dev_priv); - pre |= IS_SKL_REVID(dev_priv, 0, SKL_REVID_F0); - pre |= IS_BXT_REVID(dev_priv, 0, BXT_REVID_B_LAST); - pre |= IS_KBL_GT_STEP(dev_priv, 0, STEP_A0); - pre |= IS_GLK_REVID(dev_priv, 0, GLK_REVID_A2); + pre |= IS_SKYLAKE(dev_priv) && INTEL_REVID(dev_priv) < 0x6; + pre |= IS_BROXTON(dev_priv) && INTEL_REVID(dev_priv) < 0xA; + pre |= IS_KABYLAKE(dev_priv) && INTEL_REVID(dev_priv) < 0x1; + pre |= IS_GEMINILAKE(dev_priv) && INTEL_REVID(dev_priv) < 0x3; if (pre) { drm_err(&dev_priv->drm, "This is a pre-production stepping. " -- cgit v1.2.3 From 0f93f5da1cdc40d78fa2df8a62168e2362a0b34e Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 13 Jul 2021 12:36:26 -0700 Subject: drm/i915/skl: Use revid->stepping tables Switch SKL to use a revid->stepping table as we're trying to do on all platforms going forward. Also drop the preproduction revisions and add the newer steppings we hadn't already handled. Note that SKL has a case where a newer revision ID corresponds to an older GT/disp stepping (0x9 -> STEP_J0, 0xA -> STEP_I1). Also, the lack of a revision ID 0x8 in the table is intentional and not an oversight. We'll re-write the KBL-specific comment to make it clear that these kind of quirks are expected. v2: - Since GT and display steppings are always identical on SKL use a macro to set both values at once in a more readable manner. (Anusha) - Drop preproduction steppings. Bspec: 13626 Cc: Anusha Srivatsa Signed-off-by: Matt Roper Reviewed-by: Anusha Srivatsa Link: https://patchwork.freedesktop.org/patch/msgid/20210713193635.3390052-4-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 11 +---------- drivers/gpu/drm/i915/intel_step.c | 30 ++++++++++++++++++++++++----- drivers/gpu/drm/i915/intel_step.h | 4 ++++ 4 files changed, 31 insertions(+), 16 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index b62d1e31a645..dc55f6562dd6 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -882,7 +882,7 @@ skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); /* WaInPlaceDecompressionHang:skl */ - if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER)) + if (IS_SKL_GT_STEP(i915, STEP_H0, STEP_FOREVER)) wa_write_or(wal, GEN9_GAMT_ECO_REG_RW_IA, GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 38ff2fb89744..8e9293f84e20 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1456,16 +1456,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_TGL_Y(dev_priv) \ IS_SUBPLATFORM(dev_priv, INTEL_TIGERLAKE, INTEL_SUBPLATFORM_ULX) -#define SKL_REVID_A0 0x0 -#define SKL_REVID_B0 0x1 -#define SKL_REVID_C0 0x2 -#define SKL_REVID_D0 0x3 -#define SKL_REVID_E0 0x4 -#define SKL_REVID_F0 0x5 -#define SKL_REVID_G0 0x6 -#define SKL_REVID_H0 0x7 - -#define IS_SKL_REVID(p, since, until) (IS_SKYLAKE(p) && IS_REVID(p, since, until)) +#define IS_SKL_GT_STEP(p, since, until) (IS_SKYLAKE(p) && IS_GT_STEP(p, since, until)) #define BXT_REVID_A0 0x0 #define BXT_REVID_A1 0x1 diff --git a/drivers/gpu/drm/i915/intel_step.c b/drivers/gpu/drm/i915/intel_step.c index 93ccd42f2514..4e6a2b3b4f8a 100644 --- a/drivers/gpu/drm/i915/intel_step.c +++ b/drivers/gpu/drm/i915/intel_step.c @@ -7,14 +7,31 @@ #include "intel_step.h" /* - * KBL revision ID ordering is bizarre; higher revision ID's map to lower - * steppings in some cases. So rather than test against the revision ID - * directly, let's map that into our own range of increasing ID's that we - * can test against in a regular manner. + * Some platforms have unusual ways of mapping PCI revision ID to GT/display + * steppings. E.g., in some cases a higher PCI revision may translate to a + * lower stepping of the GT and/or display IP. This file provides lookup + * tables to map the PCI revision into a standard set of stepping values that + * can be compared numerically. + * + * Also note that some revisions/steppings may have been set aside as + * placeholders but never materialized in real hardware; in those cases there + * may be jumps in the revision IDs or stepping values in the tables below. */ +/* + * Some platforms always have the same stepping value for GT and display; + * use a macro to define these to make it easier to identify the platforms + * where the two steppings can deviate. + */ +#define COMMON_STEP(x) .gt_step = STEP_##x, .display_step = STEP_##x + +static const struct intel_step_info skl_revids[] = { + [0x6] = { COMMON_STEP(G0) }, + [0x7] = { COMMON_STEP(H0) }, + [0x9] = { COMMON_STEP(J0) }, + [0xA] = { COMMON_STEP(I1) }, +}; -/* FIXME: what about REVID_E0 */ static const struct intel_step_info kbl_revids[] = { [0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, [1] = { .gt_step = STEP_B0, .display_step = STEP_B0 }, @@ -76,6 +93,9 @@ void intel_step_init(struct drm_i915_private *i915) } else if (IS_KABYLAKE(i915)) { revids = kbl_revids; size = ARRAY_SIZE(kbl_revids); + } else if (IS_SKYLAKE(i915)) { + revids = skl_revids; + size = ARRAY_SIZE(skl_revids); } /* Not using the stepping scheme for the platform yet. */ diff --git a/drivers/gpu/drm/i915/intel_step.h b/drivers/gpu/drm/i915/intel_step.h index 958a8bb5d677..88a77159703e 100644 --- a/drivers/gpu/drm/i915/intel_step.h +++ b/drivers/gpu/drm/i915/intel_step.h @@ -31,6 +31,10 @@ enum intel_step { STEP_E0, STEP_F0, STEP_G0, + STEP_H0, + STEP_I0, + STEP_I1, + STEP_J0, STEP_FUTURE, STEP_FOREVER, }; -- cgit v1.2.3 From 6eea6f16e697903b4aad39ee1fff4fa4d74e3a63 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 13 Jul 2021 12:36:27 -0700 Subject: drm/i915/kbl: Drop pre-production revision from stepping table We're long past the point where we need to care about pre-production hardware, and we already warn the user and taint the kernel if we detect the driver is being loaded on pre-production hardware. Bspec: 18329 Signed-off-by: Matt Roper Reviewed-by: Anusha Srivatsa Link: https://patchwork.freedesktop.org/patch/msgid/20210713193635.3390052-5-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_step.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/intel_step.c b/drivers/gpu/drm/i915/intel_step.c index 4e6a2b3b4f8a..1dd6944e7aca 100644 --- a/drivers/gpu/drm/i915/intel_step.c +++ b/drivers/gpu/drm/i915/intel_step.c @@ -33,7 +33,6 @@ static const struct intel_step_info skl_revids[] = { }; static const struct intel_step_info kbl_revids[] = { - [0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, [1] = { .gt_step = STEP_B0, .display_step = STEP_B0 }, [2] = { .gt_step = STEP_C0, .display_step = STEP_B0 }, [3] = { .gt_step = STEP_D0, .display_step = STEP_B0 }, -- cgit v1.2.3 From fd51fa8ac63835b99c084f6ef8264b6e322b7034 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 13 Jul 2021 12:36:28 -0700 Subject: drm/i915/bxt: Use revid->stepping tables Switch BXT to use a revid->stepping table as we're trying to do on all platforms going forward. Note that the REVID macros we had before weren't being used anywhere in the code and weren't even correct; the table values come from the bspec (and omits all the placeholder and preproduction revisions). Although nothing in the code is using the data from this table at the moment, we expect some upcoming DMC patches to start utilizing it. Bspec: 13620 Cc: Anusha Srivatsa Signed-off-by: Matt Roper Reviewed-by: Anusha Srivatsa Link: https://patchwork.freedesktop.org/patch/msgid/20210713193635.3390052-6-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 9 --------- drivers/gpu/drm/i915/intel_step.c | 10 ++++++++++ 2 files changed, 10 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8e9293f84e20..8030bd6931b5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1458,15 +1458,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_SKL_GT_STEP(p, since, until) (IS_SKYLAKE(p) && IS_GT_STEP(p, since, until)) -#define BXT_REVID_A0 0x0 -#define BXT_REVID_A1 0x1 -#define BXT_REVID_B0 0x3 -#define BXT_REVID_B_LAST 0x8 -#define BXT_REVID_C0 0x9 - -#define IS_BXT_REVID(dev_priv, since, until) \ - (IS_BROXTON(dev_priv) && IS_REVID(dev_priv, since, until)) - #define IS_KBL_GT_STEP(dev_priv, since, until) \ (IS_KABYLAKE(dev_priv) && IS_GT_STEP(dev_priv, since, until)) #define IS_KBL_DISPLAY_STEP(dev_priv, since, until) \ diff --git a/drivers/gpu/drm/i915/intel_step.c b/drivers/gpu/drm/i915/intel_step.c index 1dd6944e7aca..57c33a25b760 100644 --- a/drivers/gpu/drm/i915/intel_step.c +++ b/drivers/gpu/drm/i915/intel_step.c @@ -42,6 +42,13 @@ static const struct intel_step_info kbl_revids[] = { [7] = { .gt_step = STEP_G0, .display_step = STEP_C0 }, }; +static const struct intel_step_info bxt_revids[] = { + [0xA] = { COMMON_STEP(C0) }, + [0xB] = { COMMON_STEP(C0) }, + [0xC] = { COMMON_STEP(D0) }, + [0xD] = { COMMON_STEP(E0) }, +}; + static const struct intel_step_info tgl_uy_revids[] = { [0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, [1] = { .gt_step = STEP_B0, .display_step = STEP_C0 }, @@ -89,6 +96,9 @@ void intel_step_init(struct drm_i915_private *i915) } else if (IS_TIGERLAKE(i915)) { revids = tgl_revids; size = ARRAY_SIZE(tgl_revids); + } else if (IS_BROXTON(i915)) { + revids = bxt_revids; + size = ARRAY_SIZE(bxt_revids); } else if (IS_KABYLAKE(i915)) { revids = kbl_revids; size = ARRAY_SIZE(kbl_revids); -- cgit v1.2.3 From 3dd22d46c7f6ddfb8c5e5d7c45649cd922bdd8cb Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 13 Jul 2021 12:36:29 -0700 Subject: drm/i915/glk: Use revid->stepping tables Switch GLK to use a revid->stepping table as we're trying to do on all platforms going forward. Pre-production and placeholder revisions are omitted. Although nothing in the code is using the data from this table at the moment, we expect some upcoming DMC patches to start utilizing it. Bspec: 19131 Cc: Anusha Srivatsa Signed-off-by: Matt Roper Reviewed-by: Anusha Srivatsa Link: https://patchwork.freedesktop.org/patch/msgid/20210713193635.3390052-7-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 8 -------- drivers/gpu/drm/i915/intel_step.c | 7 +++++++ 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8030bd6931b5..60a33c505a24 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1463,14 +1463,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_KBL_DISPLAY_STEP(dev_priv, since, until) \ (IS_KABYLAKE(dev_priv) && IS_DISPLAY_STEP(dev_priv, since, until)) -#define GLK_REVID_A0 0x0 -#define GLK_REVID_A1 0x1 -#define GLK_REVID_A2 0x2 -#define GLK_REVID_B0 0x3 - -#define IS_GLK_REVID(dev_priv, since, until) \ - (IS_GEMINILAKE(dev_priv) && IS_REVID(dev_priv, since, until)) - #define CNL_REVID_A0 0x0 #define CNL_REVID_B0 0x1 #define CNL_REVID_C0 0x2 diff --git a/drivers/gpu/drm/i915/intel_step.c b/drivers/gpu/drm/i915/intel_step.c index 57c33a25b760..1bc0701092ab 100644 --- a/drivers/gpu/drm/i915/intel_step.c +++ b/drivers/gpu/drm/i915/intel_step.c @@ -49,6 +49,10 @@ static const struct intel_step_info bxt_revids[] = { [0xD] = { COMMON_STEP(E0) }, }; +static const struct intel_step_info glk_revids[] = { + [3] = { COMMON_STEP(B0) }, +}; + static const struct intel_step_info tgl_uy_revids[] = { [0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, [1] = { .gt_step = STEP_B0, .display_step = STEP_C0 }, @@ -96,6 +100,9 @@ void intel_step_init(struct drm_i915_private *i915) } else if (IS_TIGERLAKE(i915)) { revids = tgl_revids; size = ARRAY_SIZE(tgl_revids); + } else if (IS_GEMINILAKE(i915)) { + revids = glk_revids; + size = ARRAY_SIZE(glk_revids); } else if (IS_BROXTON(i915)) { revids = bxt_revids; size = ARRAY_SIZE(bxt_revids); -- cgit v1.2.3 From cc7a3393f2888726ad86f229d15543e6145d915f Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 13 Jul 2021 12:36:30 -0700 Subject: drm/i915/icl: Use revid->stepping tables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Switch ICL to use a revid->stepping table as we're trying to do on all platforms going forward. While we're at it, let's include some additional steppings that have popped up, even if we don't yet have any workarounds tied to those steppings (we probably need to audit our workaround list soon to see if any of the bounds have moved or if new workarounds have appeared). Note that the current bspec table is missing information about how to map PCI revision ID to GT/display steppings; it only provides an SoC stepping. The mapping to GT/display steppings (which aren't always the same as the SoC stepping) used to be in the bspec, but was apparently dropped during an update in Nov 2019; I've made my changes here based on an older bspec snapshot that still had the necessary information. We've requested that the missing information be restored. I'm only including the production revids in the table here since we're past the point at which we usually stop trying to support pre-production hardware. An appropriate check is added to intel_detect_preproduction_hw() to print an error and taint the kernel just in case someone still tries to load the driver on old pre-production hardware. v2: - Drop pre-production steppings and add error/taint at startup when loading on pre-production hardware. Bspec: 21141 # pre-Nov 2019 snapshot Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20210713193635.3390052-8-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 12 ++++++------ drivers/gpu/drm/i915/i915_drv.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 10 ++-------- drivers/gpu/drm/i915/intel_step.c | 7 +++++++ 4 files changed, 16 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index dc55f6562dd6..3d78b95053da 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -557,7 +557,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, /* Wa_1604370585:icl (pre-prod) * Formerly known as WaPushConstantDereferenceHoldDisable */ - if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) + if (IS_ICL_GT_STEP(i915, STEP_A0, STEP_B0)) wa_masked_en(wal, GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); @@ -573,12 +573,12 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, /* Wa_2006611047:icl (pre-prod) * Formerly known as WaDisableImprovedTdlClkGating */ - if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) + if (IS_ICL_GT_STEP(i915, STEP_A0, STEP_A0)) wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN11_TDL_CLOCK_GATING_FIX_DISABLE); /* Wa_2006665173:icl (pre-prod) */ - if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) + if (IS_ICL_GT_STEP(i915, STEP_A0, STEP_A0)) wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3, GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC); @@ -1058,13 +1058,13 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) GAMW_ECO_DEV_CTX_RELOAD_DISABLE); /* Wa_1405779004:icl (pre-prod) */ - if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) + if (IS_ICL_GT_STEP(i915, STEP_A0, STEP_A0)) wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, MSCUNIT_CLKGATE_DIS); /* Wa_1406838659:icl (pre-prod) */ - if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) + if (IS_ICL_GT_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, INF_UNIT_LEVEL_CLKGATE, CGPSF_CLKGATE_DIS); @@ -1743,7 +1743,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) PMFLUSHDONE_LNEBLK); /* Wa_1406609255:icl (pre-prod) */ - if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) + if (IS_ICL_GT_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, GEN7_SARCHKMD, GEN7_DISABLE_DEMAND_PREFETCH); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index f1dcee75751f..05e043dfeb1d 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -275,6 +275,7 @@ static void intel_detect_preproduction_hw(struct drm_i915_private *dev_priv) pre |= IS_BROXTON(dev_priv) && INTEL_REVID(dev_priv) < 0xA; pre |= IS_KABYLAKE(dev_priv) && INTEL_REVID(dev_priv) < 0x1; pre |= IS_GEMINILAKE(dev_priv) && INTEL_REVID(dev_priv) < 0x3; + pre |= IS_ICELAKE(dev_priv) && INTEL_REVID(dev_priv) < 0x7; if (pre) { drm_err(&dev_priv->drm, "This is a pre-production stepping. " diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 60a33c505a24..9e1e68881921 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1470,14 +1470,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_CNL_REVID(p, since, until) \ (IS_CANNONLAKE(p) && IS_REVID(p, since, until)) -#define ICL_REVID_A0 0x0 -#define ICL_REVID_A2 0x1 -#define ICL_REVID_B0 0x3 -#define ICL_REVID_B2 0x4 -#define ICL_REVID_C0 0x5 - -#define IS_ICL_REVID(p, since, until) \ - (IS_ICELAKE(p) && IS_REVID(p, since, until)) +#define IS_ICL_GT_STEP(p, since, until) \ + (IS_ICELAKE(p) && IS_GT_STEP(p, since, until)) #define EHL_REVID_A0 0x0 #define EHL_REVID_B0 0x1 diff --git a/drivers/gpu/drm/i915/intel_step.c b/drivers/gpu/drm/i915/intel_step.c index 1bc0701092ab..9ce032993a99 100644 --- a/drivers/gpu/drm/i915/intel_step.c +++ b/drivers/gpu/drm/i915/intel_step.c @@ -53,6 +53,10 @@ static const struct intel_step_info glk_revids[] = { [3] = { COMMON_STEP(B0) }, }; +static const struct intel_step_info icl_revids[] = { + [7] = { COMMON_STEP(D0) }, +}; + static const struct intel_step_info tgl_uy_revids[] = { [0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, [1] = { .gt_step = STEP_B0, .display_step = STEP_C0 }, @@ -100,6 +104,9 @@ void intel_step_init(struct drm_i915_private *i915) } else if (IS_TIGERLAKE(i915)) { revids = tgl_revids; size = ARRAY_SIZE(tgl_revids); + } else if (IS_ICELAKE(i915)) { + revids = icl_revids; + size = ARRAY_SIZE(icl_revids); } else if (IS_GEMINILAKE(i915)) { revids = glk_revids; size = ARRAY_SIZE(glk_revids); -- cgit v1.2.3 From 61b2dc4b58688d61237edfdc6045e570fd05fd25 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 13 Jul 2021 12:36:31 -0700 Subject: drm/i915/jsl_ehl: Use revid->stepping tables Switch JSL/EHL to use a revid->stepping table as we're trying to do on all platforms going forward. v2: - Use COMMON_STEP(). (Anusha) Bspec: 29153 Cc: Anusha Srivatsa Signed-off-by: Matt Roper Reviewed-by: Anusha Srivatsa Link: https://patchwork.freedesktop.org/patch/msgid/20210713193635.3390052-9-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 2 +- drivers/gpu/drm/i915/gt/intel_workarounds.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 9 ++++----- drivers/gpu/drm/i915/intel_step.c | 8 ++++++++ 4 files changed, 14 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 71ac57670043..9b5324d015ec 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -2674,7 +2674,7 @@ static bool ehl_combo_pll_div_frac_wa_needed(struct drm_i915_private *i915) { return ((IS_PLATFORM(i915, INTEL_ELKHARTLAKE) && - IS_JSL_EHL_REVID(i915, EHL_REVID_B0, REVID_FOREVER)) || + IS_JSL_EHL_DISPLAY_STEP(i915, STEP_B0, STEP_FOREVER)) || IS_TIGERLAKE(i915)) && i915->dpll.ref_clks.nssc == 38400; } diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 3d78b95053da..c376765bf758 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1078,7 +1078,7 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) /* Wa_1607087056:icl,ehl,jsl */ if (IS_ICELAKE(i915) || - IS_JSL_EHL_REVID(i915, EHL_REVID_A0, EHL_REVID_A0)) + IS_JSL_EHL_GT_STEP(i915, STEP_A0, STEP_A0)) wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9e1e68881921..45c2f2693933 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1473,11 +1473,10 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_ICL_GT_STEP(p, since, until) \ (IS_ICELAKE(p) && IS_GT_STEP(p, since, until)) -#define EHL_REVID_A0 0x0 -#define EHL_REVID_B0 0x1 - -#define IS_JSL_EHL_REVID(p, since, until) \ - (IS_JSL_EHL(p) && IS_REVID(p, since, until)) +#define IS_JSL_EHL_GT_STEP(p, since, until) \ + (IS_JSL_EHL(p) && IS_GT_STEP(p, since, until)) +#define IS_JSL_EHL_DISPLAY_STEP(p, since, until) \ + (IS_JSL_EHL(p) && IS_DISPLAY_STEP(p, since, until)) #define IS_TGL_DISPLAY_STEP(__i915, since, until) \ (IS_TIGERLAKE(__i915) && \ diff --git a/drivers/gpu/drm/i915/intel_step.c b/drivers/gpu/drm/i915/intel_step.c index 9ce032993a99..9de17bdfe62f 100644 --- a/drivers/gpu/drm/i915/intel_step.c +++ b/drivers/gpu/drm/i915/intel_step.c @@ -57,6 +57,11 @@ static const struct intel_step_info icl_revids[] = { [7] = { COMMON_STEP(D0) }, }; +static const struct intel_step_info jsl_ehl_revids[] = { + [0] = { COMMON_STEP(A0) }, + [1] = { COMMON_STEP(B0) }, +}; + static const struct intel_step_info tgl_uy_revids[] = { [0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, [1] = { .gt_step = STEP_B0, .display_step = STEP_C0 }, @@ -104,6 +109,9 @@ void intel_step_init(struct drm_i915_private *i915) } else if (IS_TIGERLAKE(i915)) { revids = tgl_revids; size = ARRAY_SIZE(tgl_revids); + } else if (IS_JSL_EHL(i915)) { + revids = jsl_ehl_revids; + size = ARRAY_SIZE(jsl_ehl_revids); } else if (IS_ICELAKE(i915)) { revids = icl_revids; size = ARRAY_SIZE(icl_revids); -- cgit v1.2.3 From 97cf9b58153985929ffb31de57fce9b1323fe283 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 13 Jul 2021 12:36:32 -0700 Subject: drm/i915/rkl: Use revid->stepping tables Switch RKL to use a revid->stepping table as we're trying to do on all platforms going forward. Bspec: 44501 Signed-off-by: Matt Roper Reviewed-by: Anusha Srivatsa Link: https://patchwork.freedesktop.org/patch/msgid/20210713193635.3390052-10-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/display/intel_psr.c | 4 ++-- drivers/gpu/drm/i915/i915_drv.h | 8 ++------ drivers/gpu/drm/i915/intel_step.c | 9 +++++++++ 3 files changed, 13 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 77865cf6641f..4318999248b8 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -550,7 +550,7 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp) if (intel_dp->psr.psr2_sel_fetch_enabled) { /* WA 1408330847 */ if (IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_A0) || - IS_RKL_REVID(dev_priv, RKL_REVID_A0, RKL_REVID_A0)) + IS_RKL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_A0)) intel_de_rmw(dev_priv, CHICKEN_PAR1_1, DIS_RAM_BYPASS_PSR2_MAN_TRACK, DIS_RAM_BYPASS_PSR2_MAN_TRACK); @@ -1221,7 +1221,7 @@ static void intel_psr_disable_locked(struct intel_dp *intel_dp) /* WA 1408330847 */ if (intel_dp->psr.psr2_sel_fetch_enabled && (IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_A0) || - IS_RKL_REVID(dev_priv, RKL_REVID_A0, RKL_REVID_A0))) + IS_RKL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_A0))) intel_de_rmw(dev_priv, CHICKEN_PAR1_1, DIS_RAM_BYPASS_PSR2_MAN_TRACK, 0); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 45c2f2693933..bc6a8967bba1 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1490,12 +1490,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, (IS_TIGERLAKE(__i915) && !(IS_TGL_U(__i915) || IS_TGL_Y(__i915)) && \ IS_GT_STEP(__i915, since, until)) -#define RKL_REVID_A0 0x0 -#define RKL_REVID_B0 0x1 -#define RKL_REVID_C0 0x4 - -#define IS_RKL_REVID(p, since, until) \ - (IS_ROCKETLAKE(p) && IS_REVID(p, since, until)) +#define IS_RKL_DISPLAY_STEP(p, since, until) \ + (IS_ROCKETLAKE(p) && IS_DISPLAY_STEP(p, since, until)) #define DG1_REVID_A0 0x0 #define DG1_REVID_B0 0x1 diff --git a/drivers/gpu/drm/i915/intel_step.c b/drivers/gpu/drm/i915/intel_step.c index 9de17bdfe62f..93edfbef2903 100644 --- a/drivers/gpu/drm/i915/intel_step.c +++ b/drivers/gpu/drm/i915/intel_step.c @@ -75,6 +75,12 @@ static const struct intel_step_info tgl_revids[] = { [1] = { .gt_step = STEP_B0, .display_step = STEP_D0 }, }; +static const struct intel_step_info rkl_revids[] = { + [0] = { COMMON_STEP(A0) }, + [1] = { COMMON_STEP(B0) }, + [4] = { COMMON_STEP(C0) }, +}; + static const struct intel_step_info adls_revids[] = { [0x0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, [0x1] = { .gt_step = STEP_A0, .display_step = STEP_A2 }, @@ -103,6 +109,9 @@ void intel_step_init(struct drm_i915_private *i915) } else if (IS_ALDERLAKE_S(i915)) { revids = adls_revids; size = ARRAY_SIZE(adls_revids); + } else if (IS_ROCKETLAKE(i915)) { + revids = rkl_revids; + size = ARRAY_SIZE(rkl_revids); } else if (IS_TGL_U(i915) || IS_TGL_Y(i915)) { revids = tgl_uy_revids; size = ARRAY_SIZE(tgl_uy_revids); -- cgit v1.2.3 From dae751f40c1913751bbdaed18224ff707f562319 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 13 Jul 2021 12:36:33 -0700 Subject: drm/i915/dg1: Use revid->stepping tables Switch DG1 to use a revid->stepping table as we're trying to do on all platforms going forward. This removes the last use of IS_REVID() and REVID_FOREVER, so remove those now-unused macros as well to prevent their accidental use on future platforms. v2: - Use COMMON_STEP() macro in table. (Anusha) Bspec: 44463 Cc: Anusha Srivatsa Signed-off-by: Matt Roper Reviewed-by: Anusha Srivatsa Link: https://patchwork.freedesktop.org/patch/msgid/20210713193635.3390052-11-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/display/intel_display_power.c | 2 +- drivers/gpu/drm/i915/gt/intel_region_lmem.c | 2 +- drivers/gpu/drm/i915/gt/intel_workarounds.c | 11 ++++++----- drivers/gpu/drm/i915/i915_drv.h | 18 ++++-------------- drivers/gpu/drm/i915/intel_pm.c | 2 +- drivers/gpu/drm/i915/intel_step.c | 8 ++++++++ 6 files changed, 21 insertions(+), 22 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 4298ae684d7d..6a58959741cd 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -5798,7 +5798,7 @@ static void tgl_bw_buddy_init(struct drm_i915_private *dev_priv) int config, i; if (IS_ALDERLAKE_S(dev_priv) || - IS_DG1_REVID(dev_priv, DG1_REVID_A0, DG1_REVID_A0) || + IS_DG1_DISPLAY_STEP(dev_priv, STEP_A0, STEP_A0) || IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) /* Wa_1409767108:tgl,dg1,adl-s */ table = wa_1409767108_buddy_page_masks; diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index f7366b054f8e..82f39561f6b6 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -157,7 +157,7 @@ intel_gt_setup_fake_lmem(struct intel_gt *gt) static bool get_legacy_lowmem_region(struct intel_uncore *uncore, u64 *start, u32 *size) { - if (!IS_DG1_REVID(uncore->i915, DG1_REVID_A0, DG1_REVID_B0)) + if (!IS_DG1_GT_STEP(uncore->i915, STEP_A0, STEP_B0)) return false; *start = 0; diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index c376765bf758..78fd58c1eb6d 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1146,7 +1146,7 @@ dg1_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) gen12_gt_workarounds_init(i915, wal); /* Wa_1607087056:dg1 */ - if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0)) + if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_A0)) wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); @@ -1542,7 +1542,7 @@ static void dg1_whitelist_build(struct intel_engine_cs *engine) tgl_whitelist_build(engine); /* GEN:BUG:1409280441:dg1 */ - if (IS_DG1_REVID(engine->i915, DG1_REVID_A0, DG1_REVID_A0) && + if (IS_DG1_GT_STEP(engine->i915, STEP_A0, STEP_A0) && (engine->class == RENDER_CLASS || engine->class == COPY_ENGINE_CLASS)) whitelist_reg_ext(w, RING_ID(engine->mmio_base), @@ -1612,7 +1612,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; - if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) || + if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_A0) || IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_A0)) { /* * Wa_1607138336:tgl[a0],dg1[a0] @@ -1656,7 +1656,8 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) FF_DOP_CLOCK_GATE_DISABLE); } - if (IS_ALDERLAKE_S(i915) || IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) || + if (IS_ALDERLAKE_S(i915) || + IS_DG1_GT_STEP(i915, STEP_A0, STEP_A0) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* Wa_1409804808:tgl,rkl,dg1[a0],adl-s */ wa_masked_en(wal, GEN7_ROW_CHICKEN2, @@ -1670,7 +1671,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) } - if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) || + if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_A0) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* * Wa_1607030317:tgl diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bc6a8967bba1..d93159f3b419 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1264,19 +1264,10 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev) #define IS_DISPLAY_VER(i915, from, until) \ (DISPLAY_VER(i915) >= (from) && DISPLAY_VER(i915) <= (until)) -#define REVID_FOREVER 0xff #define INTEL_REVID(dev_priv) (to_pci_dev((dev_priv)->drm.dev)->revision) #define HAS_DSB(dev_priv) (INTEL_INFO(dev_priv)->display.has_dsb) -/* - * Return true if revision is in range [since,until] inclusive. - * - * Use 0 for open-ended since, and REVID_FOREVER for open-ended until. - */ -#define IS_REVID(p, since, until) \ - (INTEL_REVID(p) >= (since) && INTEL_REVID(p) <= (until)) - #define INTEL_DISPLAY_STEP(__i915) (RUNTIME_INFO(__i915)->step.display_step) #define INTEL_GT_STEP(__i915) (RUNTIME_INFO(__i915)->step.gt_step) @@ -1493,11 +1484,10 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_RKL_DISPLAY_STEP(p, since, until) \ (IS_ROCKETLAKE(p) && IS_DISPLAY_STEP(p, since, until)) -#define DG1_REVID_A0 0x0 -#define DG1_REVID_B0 0x1 - -#define IS_DG1_REVID(p, since, until) \ - (IS_DG1(p) && IS_REVID(p, since, until)) +#define IS_DG1_GT_STEP(p, since, until) \ + (IS_DG1(p) && IS_GT_STEP(p, since, until)) +#define IS_DG1_DISPLAY_STEP(p, since, until) \ + (IS_DG1(p) && IS_DISPLAY_STEP(p, since, until)) #define IS_ADLS_DISPLAY_STEP(__i915, since, until) \ (IS_ALDERLAKE_S(__i915) && \ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 45fefa0ed160..699c15704e1d 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7383,7 +7383,7 @@ static void dg1_init_clock_gating(struct drm_i915_private *dev_priv) gen12lp_init_clock_gating(dev_priv); /* Wa_1409836686:dg1[a0] */ - if (IS_DG1_REVID(dev_priv, DG1_REVID_A0, DG1_REVID_A0)) + if (IS_DG1_GT_STEP(dev_priv, STEP_A0, STEP_A0)) intel_uncore_write(&dev_priv->uncore, GEN9_CLKGATE_DIS_3, intel_uncore_read(&dev_priv->uncore, GEN9_CLKGATE_DIS_3) | DPT_GATING_DIS); } diff --git a/drivers/gpu/drm/i915/intel_step.c b/drivers/gpu/drm/i915/intel_step.c index 93edfbef2903..9fcf17708cc8 100644 --- a/drivers/gpu/drm/i915/intel_step.c +++ b/drivers/gpu/drm/i915/intel_step.c @@ -81,6 +81,11 @@ static const struct intel_step_info rkl_revids[] = { [4] = { COMMON_STEP(C0) }, }; +static const struct intel_step_info dg1_revids[] = { + [0] = { COMMON_STEP(A0) }, + [1] = { COMMON_STEP(B0) }, +}; + static const struct intel_step_info adls_revids[] = { [0x0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, [0x1] = { .gt_step = STEP_A0, .display_step = STEP_A2 }, @@ -109,6 +114,9 @@ void intel_step_init(struct drm_i915_private *i915) } else if (IS_ALDERLAKE_S(i915)) { revids = adls_revids; size = ARRAY_SIZE(adls_revids); + } else if (IS_DG1(i915)) { + revids = dg1_revids; + size = ARRAY_SIZE(dg1_revids); } else if (IS_ROCKETLAKE(i915)) { revids = rkl_revids; size = ARRAY_SIZE(rkl_revids); -- cgit v1.2.3 From 41eb74d51db7a889c074255f5e9028731c3669a6 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 13 Jul 2021 12:36:34 -0700 Subject: drm/i915/cnl: Drop all workarounds All of the Cannon Lake hardware that came out had graphics fused off, and our userspace drivers have already dropped their support for the platform; CNL-specific code in i915 that isn't inherited by subsequent platforms is effectively dead code. Let's remove all of the CNL-specific workarounds as a quick and easy first step. References: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6899 Signed-off-by: Matt Roper Reviewed-by: Anusha Srivatsa Link: https://patchwork.freedesktop.org/patch/msgid/20210713193635.3390052-12-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 57 ----------------------------- drivers/gpu/drm/i915/i915_drv.h | 7 ---- 2 files changed, 64 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 78fd58c1eb6d..e9dc2d571432 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -514,35 +514,6 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine, GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); } -static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine, - struct i915_wa_list *wal) -{ - /* WaForceContextSaveRestoreNonCoherent:cnl */ - wa_masked_en(wal, CNL_HDC_CHICKEN0, - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); - - /* WaDisableReplayBufferBankArbitrationOptimization:cnl */ - wa_masked_en(wal, COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - /* WaPushConstantDereferenceHoldDisable:cnl */ - wa_masked_en(wal, GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); - - /* FtrEnableFastAnisoL1BankingFix:cnl */ - wa_masked_en(wal, HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); - - /* WaDisable3DMidCmdPreemption:cnl */ - wa_masked_dis(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); - - /* WaDisableGPGPUMidCmdPreemption:cnl */ - wa_masked_field_set(wal, GEN8_CS_CHICKEN1, - GEN9_PREEMPT_GPGPU_LEVEL_MASK, - GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); - - /* WaDisableEarlyEOT:cnl */ - wa_masked_en(wal, GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT); -} - static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { @@ -703,8 +674,6 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, gen12_ctx_workarounds_init(engine, wal); else if (GRAPHICS_VER(i915) == 11) icl_ctx_workarounds_init(engine, wal); - else if (IS_CANNONLAKE(i915)) - cnl_ctx_workarounds_init(engine, wal); else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915)) cfl_ctx_workarounds_init(engine, wal); else if (IS_GEMINILAKE(i915)) @@ -1015,17 +984,6 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr); } -static void -cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) -{ - wa_init_mcr(i915, wal); - - /* WaInPlaceDecompressionHang:cnl */ - wa_write_or(wal, - GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); -} - static void icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { @@ -1175,8 +1133,6 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) gen12_gt_workarounds_init(i915, wal); else if (GRAPHICS_VER(i915) == 11) icl_gt_workarounds_init(i915, wal); - else if (IS_CANNONLAKE(i915)) - cnl_gt_workarounds_init(i915, wal); else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915)) cfl_gt_workarounds_init(i915, wal); else if (IS_GEMINILAKE(i915)) @@ -1438,17 +1394,6 @@ static void cml_whitelist_build(struct intel_engine_cs *engine) cfl_whitelist_build(engine); } -static void cnl_whitelist_build(struct intel_engine_cs *engine) -{ - struct i915_wa_list *w = &engine->whitelist; - - if (engine->class != RENDER_CLASS) - return; - - /* WaEnablePreemptionGranularityControlByUMD:cnl */ - whitelist_reg(w, GEN8_CS_CHICKEN1); -} - static void icl_whitelist_build(struct intel_engine_cs *engine) { struct i915_wa_list *w = &engine->whitelist; @@ -1562,8 +1507,6 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) tgl_whitelist_build(engine); else if (GRAPHICS_VER(i915) == 11) icl_whitelist_build(engine); - else if (IS_CANNONLAKE(i915)) - cnl_whitelist_build(engine); else if (IS_COMETLAKE(i915)) cml_whitelist_build(engine); else if (IS_COFFEELAKE(i915)) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d93159f3b419..5e640e2a6261 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1454,13 +1454,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_KBL_DISPLAY_STEP(dev_priv, since, until) \ (IS_KABYLAKE(dev_priv) && IS_DISPLAY_STEP(dev_priv, since, until)) -#define CNL_REVID_A0 0x0 -#define CNL_REVID_B0 0x1 -#define CNL_REVID_C0 0x2 - -#define IS_CNL_REVID(p, since, until) \ - (IS_CANNONLAKE(p) && IS_REVID(p, since, until)) - #define IS_ICL_GT_STEP(p, since, until) \ (IS_ICELAKE(p) && IS_GT_STEP(p, since, until)) -- cgit v1.2.3 From eee42141e498fa3df3ce524846d52f67a92b6845 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 13 Jul 2021 12:36:35 -0700 Subject: drm/i915/icl: Drop workarounds that only apply to pre-production steppings We're past the point at which we usually drop workarounds that were never needed on production hardware. The driver will already print an error and apply taint if loaded on pre-production hardware. Signed-off-by: Matt Roper Reviewed-by: Anusha Srivatsa Link: https://patchwork.freedesktop.org/patch/msgid/20210713193635.3390052-13-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 39 ----------------------------- drivers/gpu/drm/i915/i915_drv.h | 3 --- 2 files changed, 42 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index e9dc2d571432..2d531b43475c 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -517,21 +517,12 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine, static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - struct drm_i915_private *i915 = engine->i915; - /* WaDisableBankHangMode:icl */ wa_write(wal, GEN8_L3CNTLREG, intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) | GEN8_ERRDETBCTRL); - /* Wa_1604370585:icl (pre-prod) - * Formerly known as WaPushConstantDereferenceHoldDisable - */ - if (IS_ICL_GT_STEP(i915, STEP_A0, STEP_B0)) - wa_masked_en(wal, GEN7_ROW_CHICKEN2, - PUSH_CONSTANT_DEREF_DISABLE); - /* WaForceEnableNonCoherent:icl * This is not the same workaround as in early Gen9 platforms, where * lacking this could cause system hangs, but coherency performance @@ -541,18 +532,6 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, */ wa_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); - /* Wa_2006611047:icl (pre-prod) - * Formerly known as WaDisableImprovedTdlClkGating - */ - if (IS_ICL_GT_STEP(i915, STEP_A0, STEP_A0)) - wa_masked_en(wal, GEN7_ROW_CHICKEN2, - GEN11_TDL_CLOCK_GATING_FIX_DISABLE); - - /* Wa_2006665173:icl (pre-prod) */ - if (IS_ICL_GT_STEP(i915, STEP_A0, STEP_A0)) - wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3, - GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC); - /* WaEnableFloatBlendOptimization:icl */ wa_write_clr_set(wal, GEN10_CACHE_MODE_SS, @@ -1015,18 +994,6 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) GEN8_GAMW_ECO_DEV_RW_IA, GAMW_ECO_DEV_CTX_RELOAD_DISABLE); - /* Wa_1405779004:icl (pre-prod) */ - if (IS_ICL_GT_STEP(i915, STEP_A0, STEP_A0)) - wa_write_or(wal, - SLICE_UNIT_LEVEL_CLKGATE, - MSCUNIT_CLKGATE_DIS); - - /* Wa_1406838659:icl (pre-prod) */ - if (IS_ICL_GT_STEP(i915, STEP_A0, STEP_B0)) - wa_write_or(wal, - INF_UNIT_LEVEL_CLKGATE, - CGPSF_CLKGATE_DIS); - /* Wa_1406463099:icl * Formerly known as WaGamTlbPendError */ @@ -1686,12 +1653,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) PMFLUSH_GAPL3UNBLOCK | PMFLUSHDONE_LNEBLK); - /* Wa_1406609255:icl (pre-prod) */ - if (IS_ICL_GT_STEP(i915, STEP_A0, STEP_B0)) - wa_write_or(wal, - GEN7_SARCHKMD, - GEN7_DISABLE_DEMAND_PREFETCH); - /* Wa_1606682166:icl */ wa_write_or(wal, GEN7_SARCHKMD, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5e640e2a6261..c6d245367b03 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1454,9 +1454,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_KBL_DISPLAY_STEP(dev_priv, since, until) \ (IS_KABYLAKE(dev_priv) && IS_DISPLAY_STEP(dev_priv, since, until)) -#define IS_ICL_GT_STEP(p, since, until) \ - (IS_ICELAKE(p) && IS_GT_STEP(p, since, until)) - #define IS_JSL_EHL_GT_STEP(p, since, until) \ (IS_JSL_EHL(p) && IS_GT_STEP(p, since, until)) #define IS_JSL_EHL_DISPLAY_STEP(p, since, until) \ -- cgit v1.2.3 From 93b713304188844b8514074dc13ffd56d12235d3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 14 Jul 2021 14:34:15 -0500 Subject: drm/i915: Revert "drm/i915/gem: Asynchronous cmdparser" This reverts 686c7c35abc2 ("drm/i915/gem: Asynchronous cmdparser"). The justification for this commit in the git history was a vague comment about getting it out from under the struct_mutex. While this may improve perf for some workloads on Gen7 platforms where we rely on the command parser for features such as indirect rendering, no numbers were provided to prove such an improvement. It claims to closed two gitlab/bugzilla issues but with no explanation whatsoever as to why or what bug it's fixing. Meanwhile, by moving command parsing off to an async callback, it leaves us with a problem of what to do on error. When things were synchronous, EXECBUFFER2 would fail with an error code if parsing failed. When moving it to async, we needed another way to handle that error and the solution employed was to set an error on the dma_fence and then trust that said error gets propagated to the client eventually. Moving back to synchronous will help us untangle the fence error propagation mess. This also reverts most of 0edbb9ba1bfe ("drm/i915: Move cmd parser pinning to execbuffer") which is a refactor of some of our allocation paths for asynchronous parsing. Now that everything is synchronous, we don't need it. v2 (Daniel Vetter): - Add stabel Cc and Fixes tag Signed-off-by: Jason Ekstrand Cc: # v5.6+ Fixes: 9e31c1fe45d5 ("drm/i915: Propagate errors on awaiting already signaled fences") Cc: Maarten Lankhorst Reviewed-by: Jon Bloomfield Acked-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210714193419.1459723-2-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 227 ++------------------- .../drm/i915/gem/selftests/i915_gem_execbuffer.c | 4 + drivers/gpu/drm/i915/i915_cmd_parser.c | 132 ++++++------ drivers/gpu/drm/i915/i915_drv.h | 7 +- 4 files changed, 91 insertions(+), 279 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index cb493f1fcbe7..6ad166fa73e7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -25,10 +25,8 @@ #include "i915_gem_clflush.h" #include "i915_gem_context.h" #include "i915_gem_ioctls.h" -#include "i915_sw_fence_work.h" #include "i915_trace.h" #include "i915_user_extensions.h" -#include "i915_memcpy.h" struct eb_vma { struct i915_vma *vma; @@ -1460,6 +1458,10 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, int err; struct intel_engine_cs *engine = eb->engine; + /* If we need to copy for the cmdparser, we will stall anyway */ + if (eb_use_cmdparser(eb)) + return ERR_PTR(-EWOULDBLOCK); + if (!reloc_can_use_engine(engine)) { engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0]; if (!engine) @@ -2374,217 +2376,6 @@ shadow_batch_pin(struct i915_execbuffer *eb, return vma; } -struct eb_parse_work { - struct dma_fence_work base; - struct intel_engine_cs *engine; - struct i915_vma *batch; - struct i915_vma *shadow; - struct i915_vma *trampoline; - unsigned long batch_offset; - unsigned long batch_length; - unsigned long *jump_whitelist; - const void *batch_map; - void *shadow_map; -}; - -static int __eb_parse(struct dma_fence_work *work) -{ - struct eb_parse_work *pw = container_of(work, typeof(*pw), base); - int ret; - bool cookie; - - cookie = dma_fence_begin_signalling(); - ret = intel_engine_cmd_parser(pw->engine, - pw->batch, - pw->batch_offset, - pw->batch_length, - pw->shadow, - pw->jump_whitelist, - pw->shadow_map, - pw->batch_map); - dma_fence_end_signalling(cookie); - - return ret; -} - -static void __eb_parse_release(struct dma_fence_work *work) -{ - struct eb_parse_work *pw = container_of(work, typeof(*pw), base); - - if (!IS_ERR_OR_NULL(pw->jump_whitelist)) - kfree(pw->jump_whitelist); - - if (pw->batch_map) - i915_gem_object_unpin_map(pw->batch->obj); - else - i915_gem_object_unpin_pages(pw->batch->obj); - - i915_gem_object_unpin_map(pw->shadow->obj); - - if (pw->trampoline) - i915_active_release(&pw->trampoline->active); - i915_active_release(&pw->shadow->active); - i915_active_release(&pw->batch->active); -} - -static const struct dma_fence_work_ops eb_parse_ops = { - .name = "eb_parse", - .work = __eb_parse, - .release = __eb_parse_release, -}; - -static inline int -__parser_mark_active(struct i915_vma *vma, - struct intel_timeline *tl, - struct dma_fence *fence) -{ - struct intel_gt_buffer_pool_node *node = vma->private; - - return i915_active_ref(&node->active, tl->fence_context, fence); -} - -static int -parser_mark_active(struct eb_parse_work *pw, struct intel_timeline *tl) -{ - int err; - - mutex_lock(&tl->mutex); - - err = __parser_mark_active(pw->shadow, tl, &pw->base.dma); - if (err) - goto unlock; - - if (pw->trampoline) { - err = __parser_mark_active(pw->trampoline, tl, &pw->base.dma); - if (err) - goto unlock; - } - -unlock: - mutex_unlock(&tl->mutex); - return err; -} - -static int eb_parse_pipeline(struct i915_execbuffer *eb, - struct i915_vma *shadow, - struct i915_vma *trampoline) -{ - struct eb_parse_work *pw; - struct drm_i915_gem_object *batch = eb->batch->vma->obj; - bool needs_clflush; - int err; - - GEM_BUG_ON(overflows_type(eb->batch_start_offset, pw->batch_offset)); - GEM_BUG_ON(overflows_type(eb->batch_len, pw->batch_length)); - - pw = kzalloc(sizeof(*pw), GFP_KERNEL); - if (!pw) - return -ENOMEM; - - err = i915_active_acquire(&eb->batch->vma->active); - if (err) - goto err_free; - - err = i915_active_acquire(&shadow->active); - if (err) - goto err_batch; - - if (trampoline) { - err = i915_active_acquire(&trampoline->active); - if (err) - goto err_shadow; - } - - pw->shadow_map = i915_gem_object_pin_map(shadow->obj, I915_MAP_WB); - if (IS_ERR(pw->shadow_map)) { - err = PTR_ERR(pw->shadow_map); - goto err_trampoline; - } - - needs_clflush = - !(batch->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ); - - pw->batch_map = ERR_PTR(-ENODEV); - if (needs_clflush && i915_has_memcpy_from_wc()) - pw->batch_map = i915_gem_object_pin_map(batch, I915_MAP_WC); - - if (IS_ERR(pw->batch_map)) { - err = i915_gem_object_pin_pages(batch); - if (err) - goto err_unmap_shadow; - pw->batch_map = NULL; - } - - pw->jump_whitelist = - intel_engine_cmd_parser_alloc_jump_whitelist(eb->batch_len, - trampoline); - if (IS_ERR(pw->jump_whitelist)) { - err = PTR_ERR(pw->jump_whitelist); - goto err_unmap_batch; - } - - dma_fence_work_init(&pw->base, &eb_parse_ops); - - pw->engine = eb->engine; - pw->batch = eb->batch->vma; - pw->batch_offset = eb->batch_start_offset; - pw->batch_length = eb->batch_len; - pw->shadow = shadow; - pw->trampoline = trampoline; - - /* Mark active refs early for this worker, in case we get interrupted */ - err = parser_mark_active(pw, eb->context->timeline); - if (err) - goto err_commit; - - err = dma_resv_reserve_shared(pw->batch->resv, 1); - if (err) - goto err_commit; - - err = dma_resv_reserve_shared(shadow->resv, 1); - if (err) - goto err_commit; - - /* Wait for all writes (and relocs) into the batch to complete */ - err = i915_sw_fence_await_reservation(&pw->base.chain, - pw->batch->resv, NULL, false, - 0, I915_FENCE_GFP); - if (err < 0) - goto err_commit; - - /* Keep the batch alive and unwritten as we parse */ - dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma); - - /* Force execution to wait for completion of the parser */ - dma_resv_add_excl_fence(shadow->resv, &pw->base.dma); - - dma_fence_work_commit_imm(&pw->base); - return 0; - -err_commit: - i915_sw_fence_set_error_once(&pw->base.chain, err); - dma_fence_work_commit_imm(&pw->base); - return err; - -err_unmap_batch: - if (pw->batch_map) - i915_gem_object_unpin_map(batch); - else - i915_gem_object_unpin_pages(batch); -err_unmap_shadow: - i915_gem_object_unpin_map(shadow->obj); -err_trampoline: - if (trampoline) - i915_active_release(&trampoline->active); -err_shadow: - i915_active_release(&shadow->active); -err_batch: - i915_active_release(&eb->batch->vma->active); -err_free: - kfree(pw); - return err; -} - static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma) { /* @@ -2674,7 +2465,15 @@ static int eb_parse(struct i915_execbuffer *eb) goto err_trampoline; } - err = eb_parse_pipeline(eb, shadow, trampoline); + err = dma_resv_reserve_shared(shadow->resv, 1); + if (err) + goto err_trampoline; + + err = intel_engine_cmd_parser(eb->engine, + eb->batch->vma, + eb->batch_start_offset, + eb->batch_len, + shadow, trampoline); if (err) goto err_unpin_batch; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c index 4df505e4c53a..16162fc2782d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c @@ -125,6 +125,10 @@ static int igt_gpu_reloc(void *arg) intel_gt_pm_get(&eb.i915->gt); for_each_uabi_engine(eb.engine, eb.i915) { + if (intel_engine_requires_cmd_parser(eb.engine) || + intel_engine_using_cmd_parser(eb.engine)) + continue; + reloc_cache_init(&eb.reloc_cache, eb.i915); memset(map, POISON_INUSE, 4096); diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 3992c25a191d..00ec618d0159 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1145,19 +1145,41 @@ find_reg(const struct intel_engine_cs *engine, u32 addr) static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, struct drm_i915_gem_object *src_obj, unsigned long offset, unsigned long length, - void *dst, const void *src) + bool *needs_clflush_after) { - bool needs_clflush = - !(src_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ); - - if (src) { - GEM_BUG_ON(!needs_clflush); - i915_unaligned_memcpy_from_wc(dst, src + offset, length); - } else { - struct scatterlist *sg; + unsigned int src_needs_clflush; + unsigned int dst_needs_clflush; + void *dst, *src; + int ret; + + ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush); + if (ret) + return ERR_PTR(ret); + + dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB); + i915_gem_object_finish_access(dst_obj); + if (IS_ERR(dst)) + return dst; + + ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush); + if (ret) { + i915_gem_object_unpin_map(dst_obj); + return ERR_PTR(ret); + } + + src = ERR_PTR(-ENODEV); + if (src_needs_clflush && i915_has_memcpy_from_wc()) { + src = i915_gem_object_pin_map(src_obj, I915_MAP_WC); + if (!IS_ERR(src)) { + i915_unaligned_memcpy_from_wc(dst, + src + offset, + length); + i915_gem_object_unpin_map(src_obj); + } + } + if (IS_ERR(src)) { + unsigned long x, n, remain; void *ptr; - unsigned int x, sg_ofs; - unsigned long remain; /* * We can avoid clflushing partial cachelines before the write @@ -1168,40 +1190,34 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, * validate up to the end of the batch. */ remain = length; - if (!(dst_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) + if (dst_needs_clflush & CLFLUSH_BEFORE) remain = round_up(remain, boot_cpu_data.x86_clflush_size); ptr = dst; x = offset_in_page(offset); - sg = i915_gem_object_get_sg(src_obj, offset >> PAGE_SHIFT, &sg_ofs, false); - - while (remain) { - unsigned long sg_max = sg->length >> PAGE_SHIFT; - - for (; remain && sg_ofs < sg_max; sg_ofs++) { - unsigned long len = min(remain, PAGE_SIZE - x); - void *map; - - map = kmap_atomic(nth_page(sg_page(sg), sg_ofs)); - if (needs_clflush) - drm_clflush_virt_range(map + x, len); - memcpy(ptr, map + x, len); - kunmap_atomic(map); - - ptr += len; - remain -= len; - x = 0; - } - - sg_ofs = 0; - sg = sg_next(sg); + for (n = offset >> PAGE_SHIFT; remain; n++) { + int len = min(remain, PAGE_SIZE - x); + + src = kmap_atomic(i915_gem_object_get_page(src_obj, n)); + if (src_needs_clflush) + drm_clflush_virt_range(src + x, len); + memcpy(ptr, src + x, len); + kunmap_atomic(src); + + ptr += len; + remain -= len; + x = 0; } } + i915_gem_object_finish_access(src_obj); + memset32(dst + length, 0, (dst_obj->base.size - length) / sizeof(u32)); /* dst_obj is returned with vmap pinned */ + *needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER; + return dst; } @@ -1360,6 +1376,9 @@ static int check_bbstart(u32 *cmd, u32 offset, u32 length, if (target_cmd_index == offset) return 0; + if (IS_ERR(jump_whitelist)) + return PTR_ERR(jump_whitelist); + if (!test_bit(target_cmd_index, jump_whitelist)) { DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n", jump_target); @@ -1369,28 +1388,10 @@ static int check_bbstart(u32 *cmd, u32 offset, u32 length, return 0; } -/** - * intel_engine_cmd_parser_alloc_jump_whitelist() - preallocate jump whitelist for intel_engine_cmd_parser() - * @batch_length: length of the commands in batch_obj - * @trampoline: Whether jump trampolines are used. - * - * Preallocates a jump whitelist for parsing the cmd buffer in intel_engine_cmd_parser(). - * This has to be preallocated, because the command parser runs in signaling context, - * and may not allocate any memory. - * - * Return: NULL or pointer to a jump whitelist, or ERR_PTR() on failure. Use - * IS_ERR() to check for errors. Must bre freed() with kfree(). - * - * NULL is a valid value, meaning no allocation was required. - */ -unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length, - bool trampoline) +static unsigned long *alloc_whitelist(u32 batch_length) { unsigned long *jmp; - if (trampoline) - return NULL; - /* * We expect batch_length to be less than 256KiB for known users, * i.e. we need at most an 8KiB bitmap allocation which should be @@ -1425,21 +1426,21 @@ unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length, * Return: non-zero if the parser finds violations or otherwise fails; -EACCES * if the batch appears legal but should use hardware parsing */ + int intel_engine_cmd_parser(struct intel_engine_cs *engine, struct i915_vma *batch, unsigned long batch_offset, unsigned long batch_length, struct i915_vma *shadow, - unsigned long *jump_whitelist, - void *shadow_map, - const void *batch_map) + bool trampoline) { u32 *cmd, *batch_end, offset = 0; struct drm_i915_cmd_descriptor default_desc = noop_desc; const struct drm_i915_cmd_descriptor *desc = &default_desc; + bool needs_clflush_after = false; + unsigned long *jump_whitelist; u64 batch_addr, shadow_addr; int ret = 0; - bool trampoline = !jump_whitelist; GEM_BUG_ON(!IS_ALIGNED(batch_offset, sizeof(*cmd))); GEM_BUG_ON(!IS_ALIGNED(batch_length, sizeof(*cmd))); @@ -1447,8 +1448,18 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, batch->size)); GEM_BUG_ON(!batch_length); - cmd = copy_batch(shadow->obj, batch->obj, batch_offset, batch_length, - shadow_map, batch_map); + cmd = copy_batch(shadow->obj, batch->obj, + batch_offset, batch_length, + &needs_clflush_after); + if (IS_ERR(cmd)) { + DRM_DEBUG("CMD: Failed to copy batch\n"); + return PTR_ERR(cmd); + } + + jump_whitelist = NULL; + if (!trampoline) + /* Defer failure until attempted use */ + jump_whitelist = alloc_whitelist(batch_length); shadow_addr = gen8_canonical_addr(shadow->node.start); batch_addr = gen8_canonical_addr(batch->node.start + batch_offset); @@ -1549,6 +1560,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, i915_gem_object_flush_map(shadow->obj); + if (!IS_ERR_OR_NULL(jump_whitelist)) + kfree(jump_whitelist); + i915_gem_object_unpin_map(shadow->obj); return ret; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index adc7dcf042a0..d0027b0e8afc 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1902,17 +1902,12 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type); int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv); int intel_engine_init_cmd_parser(struct intel_engine_cs *engine); void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine); -unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length, - bool trampoline); - int intel_engine_cmd_parser(struct intel_engine_cs *engine, struct i915_vma *batch, unsigned long batch_offset, unsigned long batch_length, struct i915_vma *shadow, - unsigned long *jump_whitelist, - void *shadow_map, - const void *batch_map); + bool trampoline); #define I915_CMD_PARSER_TRAMPOLINE_SIZE 8 /* intel_device_info.c */ -- cgit v1.2.3 From 93a2711cddd5760e2f0f901817d71c93183c3b87 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 14 Jul 2021 14:34:16 -0500 Subject: Revert "drm/i915: Propagate errors on awaiting already signaled fences" This reverts commit 9e31c1fe45d555a948ff66f1f0e3fe1f83ca63f7. Ever since that commit, we've been having issues where a hang in one client can propagate to another. In particular, a hang in an app can propagate to the X server which causes the whole desktop to lock up. Error propagation along fences sound like a good idea, but as your bug shows, surprising consequences, since propagating errors across security boundaries is not a good thing. What we do have is track the hangs on the ctx, and report information to userspace using RESET_STATS. That's how arb_robustness works. Also, if my understanding is still correct, the EIO from execbuf is when your context is banned (because not recoverable or too many hangs). And in all these cases it's up to userspace to figure out what is all impacted and should be reported to the application, that's not on the kernel to guess and automatically propagate. What's more, we're also building more features on top of ctx error reporting with RESET_STATS ioctl: Encrypted buffers use the same, and the userspace fence wait also relies on that mechanism. So it is the path going forward for reporting gpu hangs and resets to userspace. So all together that's why I think we should just bury this idea again as not quite the direction we want to go to, hence why I think the revert is the right option here. For backporters: Please note that you _must_ have a backport of https://lore.kernel.org/dri-devel/20210602164149.391653-2-jason@jlekstrand.net/ for otherwise backporting just this patch opens up a security bug. v2: Augment commit message. Also restore Jason's sob that I accidentally lost. v3: Add a note for backporters Signed-off-by: Jason Ekstrand Reported-by: Marcin Slusarz Cc: # v5.6+ Cc: Jason Ekstrand Cc: Marcin Slusarz Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/3080 Fixes: 9e31c1fe45d5 ("drm/i915: Propagate errors on awaiting already signaled fences") Acked-by: Daniel Vetter Reviewed-by: Jon Bloomfield Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210714193419.1459723-3-jason@jlekstrand.net --- drivers/gpu/drm/i915/i915_request.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 86b4c9f2613d..09ebea9a0090 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1399,10 +1399,8 @@ i915_request_await_execution(struct i915_request *rq, do { fence = *child++; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { - i915_sw_fence_set_error_once(&rq->submit, fence->error); + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) continue; - } if (fence->context == rq->fence.context) continue; @@ -1499,10 +1497,8 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) do { fence = *child++; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { - i915_sw_fence_set_error_once(&rq->submit, fence->error); + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) continue; - } /* * Requests on the same timeline are explicitly ordered, along -- cgit v1.2.3 From 7d6a276e2fa9579e0fd63931a6e8388e3171cecd Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 14 Jul 2021 14:34:17 -0500 Subject: drm/i915: Remove allow_alloc from i915_gem_object_get_sg* This reverts the rest of 0edbb9ba1bfe ("drm/i915: Move cmd parser pinning to execbuffer"). Now that the only user of i915_gem_object_get_sg without allow_alloc has been removed, we can drop the parameter. This portion of the revert was broken into its own patch to aid review. Signed-off-by: Jason Ekstrand Cc: Maarten Lankhorst Reviewed-by: Jon Bloomfield Acked-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210714193419.1459723-4-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_object.h | 10 +++++----- drivers/gpu/drm/i915/gem/i915_gem_pages.c | 20 ++++---------------- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 2 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 2 +- 4 files changed, 11 insertions(+), 23 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 8be4fadeee48..f3ede43282dc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -342,22 +342,22 @@ struct scatterlist * __i915_gem_object_get_sg(struct drm_i915_gem_object *obj, struct i915_gem_object_page_iter *iter, unsigned int n, - unsigned int *offset, bool allow_alloc, bool dma); + unsigned int *offset, bool dma); static inline struct scatterlist * i915_gem_object_get_sg(struct drm_i915_gem_object *obj, unsigned int n, - unsigned int *offset, bool allow_alloc) + unsigned int *offset) { - return __i915_gem_object_get_sg(obj, &obj->mm.get_page, n, offset, allow_alloc, false); + return __i915_gem_object_get_sg(obj, &obj->mm.get_page, n, offset, false); } static inline struct scatterlist * i915_gem_object_get_sg_dma(struct drm_i915_gem_object *obj, unsigned int n, - unsigned int *offset, bool allow_alloc) + unsigned int *offset) { - return __i915_gem_object_get_sg(obj, &obj->mm.get_dma_page, n, offset, allow_alloc, true); + return __i915_gem_object_get_sg(obj, &obj->mm.get_dma_page, n, offset, true); } struct page * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 0c9d28423d45..8eb1c3a6fc9c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -494,7 +494,7 @@ __i915_gem_object_get_sg(struct drm_i915_gem_object *obj, struct i915_gem_object_page_iter *iter, unsigned int n, unsigned int *offset, - bool allow_alloc, bool dma) + bool dma) { struct scatterlist *sg; unsigned int idx, count; @@ -516,9 +516,6 @@ __i915_gem_object_get_sg(struct drm_i915_gem_object *obj, if (n < READ_ONCE(iter->sg_idx)) goto lookup; - if (!allow_alloc) - goto manual_lookup; - mutex_lock(&iter->lock); /* We prefer to reuse the last sg so that repeated lookup of this @@ -568,16 +565,7 @@ scan: if (unlikely(n < idx)) /* insertion completed by another thread */ goto lookup; - goto manual_walk; - -manual_lookup: - idx = 0; - sg = obj->mm.pages->sgl; - count = __sg_page_count(sg); - -manual_walk: - /* - * In case we failed to insert the entry into the radixtree, we need + /* In case we failed to insert the entry into the radixtree, we need * to look beyond the current sg. */ while (idx + count <= n) { @@ -624,7 +612,7 @@ i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); - sg = i915_gem_object_get_sg(obj, n, &offset, true); + sg = i915_gem_object_get_sg(obj, n, &offset); return nth_page(sg_page(sg), offset); } @@ -650,7 +638,7 @@ i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj, struct scatterlist *sg; unsigned int offset; - sg = i915_gem_object_get_sg_dma(obj, n, &offset, true); + sg = i915_gem_object_get_sg_dma(obj, n, &offset); if (len) *len = sg_dma_len(sg) - (offset << PAGE_SHIFT); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 6589411396d3..f253b11e9e36 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -589,7 +589,7 @@ static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo, GEM_WARN_ON(bo->ttm); - sg = __i915_gem_object_get_sg(obj, &obj->ttm.get_io_page, page_offset, &ofs, true, true); + sg = __i915_gem_object_get_sg(obj, &obj->ttm.get_io_page, page_offset, &ofs, true); return ((base + sg_dma_address(sg)) >> PAGE_SHIFT) + ofs; } diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 20e46b843324..9d445ad9a342 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -1494,7 +1494,7 @@ intel_partial_pages(const struct i915_ggtt_view *view, if (ret) goto err_sg_alloc; - iter = i915_gem_object_get_sg_dma(obj, view->partial.offset, &offset, true); + iter = i915_gem_object_get_sg_dma(obj, view->partial.offset, &offset); GEM_BUG_ON(!iter); sg = st->sgl; -- cgit v1.2.3 From dc194184d0ce1ba7837f91e0af20e95923049d4d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 14 Jul 2021 14:34:18 -0500 Subject: drm/i915: Drop error handling from dma_fence_work Asynchronous command parsing was the only thing which ever returned a non-zero error. With that gone, we can drop the error handling from dma_fence_work. Signed-off-by: Jason Ekstrand Reviewed-by: Jon Bloomfield Acked-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210714193419.1459723-5-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_clflush.c | 4 +--- drivers/gpu/drm/i915/i915_sw_fence_work.c | 5 +---- drivers/gpu/drm/i915/i915_sw_fence_work.h | 2 +- drivers/gpu/drm/i915/i915_vma.c | 3 +-- 4 files changed, 4 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index daf9284ef1f5..f0435c6feb68 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -24,13 +24,11 @@ static void __do_clflush(struct drm_i915_gem_object *obj) i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); } -static int clflush_work(struct dma_fence_work *base) +static void clflush_work(struct dma_fence_work *base) { struct clflush *clflush = container_of(base, typeof(*clflush), base); __do_clflush(clflush->obj); - - return 0; } static void clflush_release(struct dma_fence_work *base) diff --git a/drivers/gpu/drm/i915/i915_sw_fence_work.c b/drivers/gpu/drm/i915/i915_sw_fence_work.c index a3a81bb8f2c3..5b33ef23d54c 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence_work.c +++ b/drivers/gpu/drm/i915/i915_sw_fence_work.c @@ -16,11 +16,8 @@ static void fence_complete(struct dma_fence_work *f) static void fence_work(struct work_struct *work) { struct dma_fence_work *f = container_of(work, typeof(*f), work); - int err; - err = f->ops->work(f); - if (err) - dma_fence_set_error(&f->dma, err); + f->ops->work(f); fence_complete(f); dma_fence_put(&f->dma); diff --git a/drivers/gpu/drm/i915/i915_sw_fence_work.h b/drivers/gpu/drm/i915/i915_sw_fence_work.h index 2c409f11c5c5..d56806918d13 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence_work.h +++ b/drivers/gpu/drm/i915/i915_sw_fence_work.h @@ -17,7 +17,7 @@ struct dma_fence_work; struct dma_fence_work_ops { const char *name; - int (*work)(struct dma_fence_work *f); + void (*work)(struct dma_fence_work *f); void (*release)(struct dma_fence_work *f); }; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 0f227f28b280..5b9dce0f443b 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -300,14 +300,13 @@ struct i915_vma_work { unsigned int flags; }; -static int __vma_bind(struct dma_fence_work *work) +static void __vma_bind(struct dma_fence_work *work) { struct i915_vma_work *vw = container_of(work, typeof(*vw), base); struct i915_vma *vma = vw->vma; vma->ops->bind_vma(vw->vm, &vw->stash, vma, vw->cache_level, vw->flags); - return 0; } static void __vma_release(struct dma_fence_work *work) -- cgit v1.2.3 From 0c6609bb20cf473f48403763aa9a9504ff95fa0f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 14 Jul 2021 14:34:19 -0500 Subject: Revert "drm/i915: Skip over MI_NOOP when parsing" This reverts a6c5e2aea704 ("drm/i915: Skip over MI_NOOP when parsing"). It complicates the batch parsing code a bit and increases indentation for no reason other than fast-skipping a command that userspace uses only rarely. Sure, there may be IGT tests that fill batches with NOOPs but that's not a case we should optimize for in the kernel. We should optimize for code clarity instead. Signed-off-by: Jason Ekstrand Reviewed-by: Jon Bloomfield Acked-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210714193419.1459723-6-jason@jlekstrand.net --- drivers/gpu/drm/i915/i915_cmd_parser.c | 67 +++++++++++++++++----------------- 1 file changed, 34 insertions(+), 33 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 00ec618d0159..322f4d5955a4 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1470,42 +1470,43 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, * space. Parsing should be faster in some cases this way. */ batch_end = cmd + batch_length / sizeof(*batch_end); - while (*cmd != MI_BATCH_BUFFER_END) { - u32 length = 1; - - if (*cmd != MI_NOOP) { /* MI_NOOP == 0 */ - desc = find_cmd(engine, *cmd, desc, &default_desc); - if (!desc) { - DRM_DEBUG("CMD: Unrecognized command: 0x%08X\n", *cmd); - ret = -EINVAL; - break; - } + do { + u32 length; - if (desc->flags & CMD_DESC_FIXED) - length = desc->length.fixed; - else - length = (*cmd & desc->length.mask) + LENGTH_BIAS; + if (*cmd == MI_BATCH_BUFFER_END) + break; - if ((batch_end - cmd) < length) { - DRM_DEBUG("CMD: Command length exceeds batch length: 0x%08X length=%u batchlen=%td\n", - *cmd, - length, - batch_end - cmd); - ret = -EINVAL; - break; - } + desc = find_cmd(engine, *cmd, desc, &default_desc); + if (!desc) { + DRM_DEBUG("CMD: Unrecognized command: 0x%08X\n", *cmd); + ret = -EINVAL; + break; + } - if (!check_cmd(engine, desc, cmd, length)) { - ret = -EACCES; - break; - } + if (desc->flags & CMD_DESC_FIXED) + length = desc->length.fixed; + else + length = (*cmd & desc->length.mask) + LENGTH_BIAS; - if (cmd_desc_is(desc, MI_BATCH_BUFFER_START)) { - ret = check_bbstart(cmd, offset, length, batch_length, - batch_addr, shadow_addr, - jump_whitelist); - break; - } + if ((batch_end - cmd) < length) { + DRM_DEBUG("CMD: Command length exceeds batch length: 0x%08X length=%u batchlen=%td\n", + *cmd, + length, + batch_end - cmd); + ret = -EINVAL; + break; + } + + if (!check_cmd(engine, desc, cmd, length)) { + ret = -EACCES; + break; + } + + if (cmd_desc_is(desc, MI_BATCH_BUFFER_START)) { + ret = check_bbstart(cmd, offset, length, batch_length, + batch_addr, shadow_addr, + jump_whitelist); + break; } if (!IS_ERR_OR_NULL(jump_whitelist)) @@ -1518,7 +1519,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, ret = -EINVAL; break; } - } + } while (1); if (trampoline) { /* -- cgit v1.2.3 From e7737b67ab46ee0eeaa0ca1958f72d86f8d8ccf6 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 15 Jul 2021 11:15:33 +0100 Subject: drm/i915/uapi: reject caching ioctls for discrete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's a noop on DG1, and in the future when need to support other devices which let us control the coherency, then it should be an immutable creation time property for the BO. This will likely be controlled through a new gem_create_ext extension. v2: add some kernel doc for the discrete changes, and document the implicit rules Suggested-by: Daniel Vetter Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Jordan Justen Cc: Kenneth Graunke Cc: Jason Ekstrand Cc: Daniel Vetter Cc: Ramalingam C Reviewed-by: Ramalingam C Reviewed-by: Kenneth Graunke Link: https://patchwork.freedesktop.org/patch/msgid/20210715101536.2606307-2-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 6 ++++++ include/uapi/drm/i915_drm.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 7d1400b13429..43004bef55cb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -268,6 +268,9 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_object *obj; int err = 0; + if (IS_DGFX(to_i915(dev))) + return -ENODEV; + rcu_read_lock(); obj = i915_gem_object_lookup_rcu(file, args->handle); if (!obj) { @@ -303,6 +306,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, enum i915_cache_level level; int ret = 0; + if (IS_DGFX(i915)) + return -ENODEV; + switch (args->caching) { case I915_CACHING_NONE: level = I915_CACHE_NONE; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index e54f9efaead0..868c2ee7be60 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1395,6 +1395,35 @@ struct drm_i915_gem_busy { * ppGTT support, or if the object is used for scanout). Note that this might * require unbinding the object from the GTT first, if its current caching value * doesn't match. + * + * Note that this all changes on discrete platforms, starting from DG1, the + * set/get caching is no longer supported, and is now rejected. Instead the CPU + * caching attributes(WB vs WC) will become an immutable creation time property + * for the object, along with the GTT caching level. For now we don't expose any + * new uAPI for this, instead on DG1 this is all implicit, although this largely + * shouldn't matter since DG1 is coherent by default(without any way of + * controlling it). + * + * Implicit caching rules, starting from DG1: + * + * - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions) + * contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and + * mapped as write-combined only. + * + * - Everything else is always allocated and mapped as write-back, with the + * guarantee that everything is also coherent with the GPU. + * + * Note that this is likely to change in the future again, where we might need + * more flexibility on future devices, so making this all explicit as part of a + * new &drm_i915_gem_create_ext extension is probable. + * + * Side note: Part of the reason for this is that changing the at-allocation-time CPU + * caching attributes for the pages might be required(and is expensive) if we + * need to then CPU map the pages later with different caching attributes. This + * inconsistent caching behaviour, while supported on x86, is not universally + * supported on other architectures. So for simplicity we opt for setting + * everything at creation time, whilst also making it immutable, on discrete + * platforms. */ struct drm_i915_gem_caching { /** -- cgit v1.2.3 From 15eb083bdb561bb4862cd04cd0523e55483e877e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 20 Jul 2021 13:21:08 -0500 Subject: drm/i915: Correct the docs for intel_engine_cmd_parser In 93b713304188 ("drm/i915: Revert "drm/i915/gem: Asynchronous cmdparser""), the parameters to intel_engine_cmd_parser() were altered without updating the docs, causing Fi.CI.DOCS to start failing. Fixes: 93b713304188 ("drm/i915: Revert "drm/i915/gem: Asynchronous cmdparser"") Signed-off-by: Jason Ekstrand Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210720182108.2761496-1-jason@jlekstrand.net Signed-off-by: Rodrigo Vivi [Added 'Fixes:' tag and corrected the hash for the ancestor] --- drivers/gpu/drm/i915/i915_cmd_parser.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 322f4d5955a4..e0403ce9ce69 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1416,9 +1416,7 @@ static unsigned long *alloc_whitelist(u32 batch_length) * @batch_offset: byte offset in the batch at which execution starts * @batch_length: length of the commands in batch_obj * @shadow: validated copy of the batch buffer in question - * @jump_whitelist: buffer preallocated with intel_engine_cmd_parser_alloc_jump_whitelist() - * @shadow_map: mapping to @shadow vma - * @batch_map: mapping to @batch vma + * @trampoline: true if we need to trampoline into privileged execution * * Parses the specified batch buffer looking for privilege violations as * described in the overview. -- cgit v1.2.3 From 7961c5b60f23dff5d82a523f9aeb8ebf34cf9926 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 14 Jul 2021 14:28:33 +0200 Subject: drm/i915: Add TTM offset argument to mmap. The FIXED mapping is only used for ttm, and tells userspace that the mapping type is pre-defined. This disables the other type of mmap offsets when discrete memory is used, so fix the selftests as well. Document the struct as well, so it shows up in docbook. Cc: Jason Ekstrand Reviewed-by: Daniel Vetter Reviewed-by: Jason Ekstrand Signed-off-by: Maarten Lankhorst [mauld: Included minor fixes from the review comments] Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210714122833.766586-1-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 17 +++++++- drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 27 ++++++++++++- include/uapi/drm/i915_drm.h | 47 +++++++++++++++++----- 4 files changed, 78 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index a90f796e85c0..2f3b7dc7b0e6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -679,10 +679,16 @@ __assign_mmap_offset(struct drm_i915_gem_object *obj, return -ENODEV; if (obj->ops->mmap_offset) { + if (mmap_type != I915_MMAP_TYPE_FIXED) + return -ENODEV; + *offset = obj->ops->mmap_offset(obj); return 0; } + if (mmap_type == I915_MMAP_TYPE_FIXED) + return -ENODEV; + if (mmap_type != I915_MMAP_TYPE_GTT && !i915_gem_object_has_struct_page(obj) && !i915_gem_object_has_iomem(obj)) @@ -727,7 +733,9 @@ i915_gem_dumb_mmap_offset(struct drm_file *file, { enum i915_mmap_type mmap_type; - if (boot_cpu_has(X86_FEATURE_PAT)) + if (HAS_LMEM(to_i915(dev))) + mmap_type = I915_MMAP_TYPE_FIXED; + else if (boot_cpu_has(X86_FEATURE_PAT)) mmap_type = I915_MMAP_TYPE_WC; else if (!i915_ggtt_has_aperture(&to_i915(dev)->ggtt)) return -ENODEV; @@ -798,6 +806,10 @@ i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, type = I915_MMAP_TYPE_UC; break; + case I915_MMAP_OFFSET_FIXED: + type = I915_MMAP_TYPE_FIXED; + break; + default: return -EINVAL; } @@ -968,6 +980,9 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) vma->vm_ops = &vm_ops_cpu; break; + case I915_MMAP_TYPE_FIXED: + GEM_WARN_ON(1); + fallthrough; case I915_MMAP_TYPE_WB: vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); vma->vm_ops = &vm_ops_cpu; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index ef3de2ae9723..afbadfc5516b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -105,6 +105,7 @@ enum i915_mmap_type { I915_MMAP_TYPE_WC, I915_MMAP_TYPE_WB, I915_MMAP_TYPE_UC, + I915_MMAP_TYPE_FIXED, }; struct i915_mmap_offset { diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index fda017c19ef6..0b2b73d8a364 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -573,6 +573,14 @@ err: return 0; } +static enum i915_mmap_type default_mapping(struct drm_i915_private *i915) +{ + if (HAS_LMEM(i915)) + return I915_MMAP_TYPE_FIXED; + + return I915_MMAP_TYPE_GTT; +} + static bool assert_mmap_offset(struct drm_i915_private *i915, unsigned long size, int expected) @@ -585,7 +593,7 @@ static bool assert_mmap_offset(struct drm_i915_private *i915, if (IS_ERR(obj)) return expected && expected == PTR_ERR(obj); - ret = __assign_mmap_offset(obj, I915_MMAP_TYPE_GTT, &offset, NULL); + ret = __assign_mmap_offset(obj, default_mapping(i915), &offset, NULL); i915_gem_object_put(obj); return ret == expected; @@ -689,7 +697,7 @@ static int igt_mmap_offset_exhaustion(void *arg) goto out; } - err = __assign_mmap_offset(obj, I915_MMAP_TYPE_GTT, &offset, NULL); + err = __assign_mmap_offset(obj, default_mapping(i915), &offset, NULL); if (err) { pr_err("Unable to insert object into reclaimed hole\n"); goto err_obj; @@ -831,8 +839,14 @@ static int wc_check(struct drm_i915_gem_object *obj) static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); bool no_map; + if (HAS_LMEM(i915)) + return type == I915_MMAP_TYPE_FIXED; + else if (type == I915_MMAP_TYPE_FIXED) + return false; + if (type == I915_MMAP_TYPE_GTT && !i915_ggtt_has_aperture(&to_i915(obj->base.dev)->ggtt)) return false; @@ -970,6 +984,8 @@ static int igt_mmap(void *arg) err = __igt_mmap(i915, obj, I915_MMAP_TYPE_GTT); if (err == 0) err = __igt_mmap(i915, obj, I915_MMAP_TYPE_WC); + if (err == 0) + err = __igt_mmap(i915, obj, I915_MMAP_TYPE_FIXED); i915_gem_object_put(obj); if (err) @@ -987,6 +1003,7 @@ static const char *repr_mmap_type(enum i915_mmap_type type) case I915_MMAP_TYPE_WB: return "wb"; case I915_MMAP_TYPE_WC: return "wc"; case I915_MMAP_TYPE_UC: return "uc"; + case I915_MMAP_TYPE_FIXED: return "fixed"; default: return "unknown"; } } @@ -1100,6 +1117,8 @@ static int igt_mmap_access(void *arg) err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_WC); if (err == 0) err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_UC); + if (err == 0) + err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_FIXED); i915_gem_object_put(obj); if (err) @@ -1241,6 +1260,8 @@ static int igt_mmap_gpu(void *arg) err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_GTT); if (err == 0) err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_WC); + if (err == 0) + err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_FIXED); i915_gem_object_put(obj); if (err) @@ -1396,6 +1417,8 @@ static int igt_mmap_revoke(void *arg) err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_GTT); if (err == 0) err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_WC); + if (err == 0) + err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_FIXED); i915_gem_object_put(obj); if (err) diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index e20eeeca7a1c..0aea82657cdc 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -849,31 +849,56 @@ struct drm_i915_gem_mmap_gtt { __u64 offset; }; +/** + * struct drm_i915_gem_mmap_offset - Retrieve an offset so we can mmap this buffer object. + * + * This struct is passed as argument to the `DRM_IOCTL_I915_GEM_MMAP_OFFSET` ioctl, + * and is used to retrieve the fake offset to mmap an object specified by &handle. + * + * The legacy way of using `DRM_IOCTL_I915_GEM_MMAP` is removed on gen12+. + * `DRM_IOCTL_I915_GEM_MMAP_GTT` is an older supported alias to this struct, but will behave + * as setting the &extensions to 0, and &flags to `I915_MMAP_OFFSET_GTT`. + */ struct drm_i915_gem_mmap_offset { - /** Handle for the object being mapped. */ + /** @handle: Handle for the object being mapped. */ __u32 handle; + /** @pad: Must be zero */ __u32 pad; /** - * Fake offset to use for subsequent mmap call + * @offset: The fake offset to use for subsequent mmap call * * This is a fixed-size type for 32/64 compatibility. */ __u64 offset; /** - * Flags for extended behaviour. + * @flags: Flags for extended behaviour. * - * It is mandatory that one of the MMAP_OFFSET types - * (GTT, WC, WB, UC, etc) should be included. + * It is mandatory that one of the `MMAP_OFFSET` types + * should be included: + * + * - `I915_MMAP_OFFSET_GTT`: Use mmap with the object bound to GTT. (Write-Combined) + * - `I915_MMAP_OFFSET_WC`: Use Write-Combined caching. + * - `I915_MMAP_OFFSET_WB`: Use Write-Back caching. + * - `I915_MMAP_OFFSET_FIXED`: Use object placement to determine caching. + * + * On devices with local memory `I915_MMAP_OFFSET_FIXED` is the only valid + * type. On devices without local memory, this caching mode is invalid. + * + * As caching mode when specifying `I915_MMAP_OFFSET_FIXED`, WC or WB will + * be used, depending on the object placement on creation. WB will be used + * when the object can only exist in system memory, WC otherwise. */ __u64 flags; -#define I915_MMAP_OFFSET_GTT 0 -#define I915_MMAP_OFFSET_WC 1 -#define I915_MMAP_OFFSET_WB 2 -#define I915_MMAP_OFFSET_UC 3 - /* - * Zero-terminated chain of extensions. +#define I915_MMAP_OFFSET_GTT 0 +#define I915_MMAP_OFFSET_WC 1 +#define I915_MMAP_OFFSET_WB 2 +#define I915_MMAP_OFFSET_UC 3 +#define I915_MMAP_OFFSET_FIXED 4 + + /** + * @extensions: Zero-terminated chain of extensions. * * No current extensions defined; mbz. */ -- cgit v1.2.3 From f4fa096ad4c288d80a8ac89b5cc4861e68e8c435 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 16 Jul 2021 22:14:20 -0700 Subject: drm/i915: Fix application of WaInPlaceDecompressionHang MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On SKL we've been applying this workaround on H0+ steppings, which is actually backwards; H0 is supposed to be the first stepping where the workaround is no longer needed. Flip the bounds so that the workaround applies to all steppings _before_ H0. On BXT we've been applying this workaround to all steppings, but the bspec tells us it's only needed until C0. Pre-C0 GT steppings only appeared in pre-production hardware, which we no longer support in the driver, so we can drop the workaround completely for this platform. On ICL we've been applying this workaround to all steppings, but there doesn't seem to be any indication that this workaround was ever needed for this platform (even now-deprecated page 20196 of the bspec doesn't mention it). We can go ahead and drop it. I also don't see any mention of this workaround being needed for KBL, although this may be an oversight since the workaround is needed for all steppings of CFL. I'll leave the workaround in place for KBL to be safe. Bspec: 14091, 33450 Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20210717051426.4120328-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 7731db33c46a..76a3b5d5e9dc 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -838,23 +838,12 @@ skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); /* WaInPlaceDecompressionHang:skl */ - if (IS_SKL_GT_STEP(i915, STEP_H0, STEP_FOREVER)) + if (IS_SKL_GT_STEP(i915, STEP_A0, STEP_H0 - 1)) wa_write_or(wal, GEN9_GAMT_ECO_REG_RW_IA, GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); } -static void -bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) -{ - gen9_gt_workarounds_init(i915, wal); - - /* WaInPlaceDecompressionHang:bxt */ - wa_write_or(wal, - GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); -} - static void kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { @@ -942,11 +931,6 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { icl_wa_init_mcr(i915, wal); - /* WaInPlaceDecompressionHang:icl */ - wa_write_or(wal, - GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); - /* WaModifyGamTlbPartitioning:icl */ wa_write_clr_set(wal, GEN11_GACB_PERF_CTRL, @@ -1081,7 +1065,7 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) else if (IS_KABYLAKE(i915)) kbl_gt_workarounds_init(i915, wal); else if (IS_BROXTON(i915)) - bxt_gt_workarounds_init(i915, wal); + gen9_gt_workarounds_init(i915, wal); else if (IS_SKYLAKE(i915)) skl_gt_workarounds_init(i915, wal); else if (IS_HASWELL(i915)) -- cgit v1.2.3 From 131b1252e76af0ee462e31df428d6fdafee48532 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 16 Jul 2021 22:14:21 -0700 Subject: drm/i915/icl: Drop a couple unnecessary workarounds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While doing a quick sanity check of the ICL workarounds in the driver I noticed a few things that should be updated: * There's no mention in the bspec that WaPipelineFlushCoherentLines is needed on gen11 (both the current WA database and the old, deprecated page 20196 were checked); it appears this might have just been copied from the gen9 list? Even if this were needed, it doesn't seem like this was the correct implementation anyway since the gen9 workaround is supposed to be implemented in the indirect context bb (as we do in gen8_emit_flush_coherentl3_wa() on gen8/gen9). * WaForwardProgressSoftReset does not appear in the current workaround database. The old deprecated workaround list has a note indicating the workaround was dropped in 2017, so we should be safe to drop it from the code too. While we're at it, add the formal workaround ID number to WaDisableBankHangMode (our hardware team made a transition from text-based workaround names to ID numbers partway through the development of ICL, which is why some workarounds only have names, some only have numbers, and some have both). Bspec: 33450 Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20210717051426.4120328-3-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 76a3b5d5e9dc..36d972492883 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -517,7 +517,7 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine, static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - /* WaDisableBankHangMode:icl */ + /* Wa_1406697149 (WaDisableBankHangMode:icl) */ wa_write(wal, GEN8_L3CNTLREG, intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) | @@ -1587,11 +1587,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) _3D_CHICKEN3, _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE); - /* WaPipelineFlushCoherentLines:icl */ - wa_write_or(wal, - GEN8_L3SQCREG4, - GEN8_LQSC_FLUSH_COHERENT_LINES); - /* * Wa_1405543622:icl * Formerly known as WaGAPZPriorityScheme @@ -1621,13 +1616,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN8_L3SQCREG4, GEN11_LQSC_CLEAN_EVICT_DISABLE); - /* WaForwardProgressSoftReset:icl */ - wa_write_or(wal, - GEN10_SCRATCH_LNCF2, - PMFLUSHDONE_LNICRSDROP | - PMFLUSH_GAPL3UNBLOCK | - PMFLUSHDONE_LNEBLK); - /* Wa_1606682166:icl */ wa_write_or(wal, GEN7_SARCHKMD, -- cgit v1.2.3 From 1e1824de33c1a44640778fd04106e240e819866b Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 16 Jul 2021 22:14:22 -0700 Subject: drm/i915: Program DFR enable/disable as a GT workaround MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DFR programming (which we enable as an optimization on gen11, but must ensure is disabled on gen12) should be handled as a GT workaround rather than clock gating initialization. This will ensure that the programming of these registers is verified with our typical workaround checks. Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20210717051426.4120328-4-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 9 +++++++++ drivers/gpu/drm/i915/intel_pm.c | 8 -------- 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 36d972492883..685c6115d380 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -965,6 +965,12 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); + + /* + * This is not a documented workaround, but rather an optimization + * to reduce sampler power. + */ + wa_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE); } /* @@ -998,6 +1004,9 @@ gen12_gt_workarounds_init(struct drm_i915_private *i915, /* Wa_14011060649:tgl,rkl,dg1,adl-s,adl-p */ wa_14011060649(i915, wal); + + /* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */ + wa_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE); } static void diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 7ca93bd02522..839ab94b36fe 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7340,10 +7340,6 @@ static void icl_init_clock_gating(struct drm_i915_private *dev_priv) intel_uncore_write(&dev_priv->uncore, ILK_DPFC_CHICKEN, ILK_DPFC_CHICKEN_COMP_DUMMY_PIXEL); - /* This is not an Wa. Enable to reduce Sampler power */ - intel_uncore_write(&dev_priv->uncore, GEN10_DFR_RATIO_EN_AND_CHICKEN, - intel_uncore_read(&dev_priv->uncore, GEN10_DFR_RATIO_EN_AND_CHICKEN) & ~DFR_DISABLE); - /*Wa_14010594013:icl, ehl */ intel_uncore_rmw(&dev_priv->uncore, GEN8_CHICKEN_DCPR_1, 0, CNL_DELAY_PMRSP); @@ -7362,10 +7358,6 @@ static void gen12lp_init_clock_gating(struct drm_i915_private *dev_priv) intel_uncore_write(&dev_priv->uncore, GEN9_CLKGATE_DIS_3, intel_uncore_read(&dev_priv->uncore, GEN9_CLKGATE_DIS_3) | TGL_VRH_GATING_DIS); - /* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */ - intel_uncore_rmw(&dev_priv->uncore, GEN10_DFR_RATIO_EN_AND_CHICKEN, - 0, DFR_DISABLE); - /* Wa_14013723622:tgl,rkl,dg1,adl-s */ if (DISPLAY_VER(dev_priv) == 12) intel_uncore_rmw(&dev_priv->uncore, CLKREQ_POLICY, -- cgit v1.2.3 From 6b73a7f380a3f1a9599bc802cf78febeb77f42db Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 16 Jul 2021 22:14:26 -0700 Subject: drm/i915: Make GT workaround upper bounds exclusive MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Workarounds are documented in the bspec with an exclusive upper bound (i.e., a "fixed" stepping that no longer needs the workaround). This makes our driver's use of an inclusive upper bound for stepping ranges confusing; the differing notation between code and bspec makes it very easy for mistakes to creep in. Let's switch the upper bound of our IS_{GT,DISP}_STEP macros over to use an exclusive upper bound like the bspec does. This also has the benefit of helping make sure workarounds are properly handled for new minor steppings that show up (e.g., an A1 between the A0 and B0 we already knew about) --- if the new intermediate stepping pulls in hardware fixes early, there will be an update to the workaround definition which lets us know we need to change our code. If the new stepping does not pull a hardware fix earlier, then the new stepping will already be captured properly by the "[begin, fix)" range in the code. We'll probably need to be extra vigilant in code review of new workarounds for the near future to make sure developers notice the new semantics of workaround bounds. But we just migrated a bunch of our platforms from the IS_REVID bounds over to IS_{GT,DISP}_STEP, so people are already adjusting to the new macros and now is a good time to make this change too. [mattrope: Split out GT changes to apply through gt-next tree] Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20210717051426.4120328-8-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 2 +- drivers/gpu/drm/i915/gt/intel_region_lmem.c | 2 +- drivers/gpu/drm/i915/gt/intel_workarounds.c | 28 ++++++++++++++-------------- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/intel_pm.c | 6 +++--- 5 files changed, 20 insertions(+), 20 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 87b06572fd2e..a69f5c438c72 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -42,7 +42,7 @@ int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode) vf_flush_wa = true; /* WaForGAMHang:kbl */ - if (IS_KBL_GT_STEP(rq->engine->i915, 0, STEP_B0)) + if (IS_KBL_GT_STEP(rq->engine->i915, 0, STEP_C0)) dc_flush_wa = true; } diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index 50d11a84e7a9..e3a2a2fa5f94 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -157,7 +157,7 @@ intel_gt_setup_fake_lmem(struct intel_gt *gt) static bool get_legacy_lowmem_region(struct intel_uncore *uncore, u64 *start, u32 *size) { - if (!IS_DG1_GT_STEP(uncore->i915, STEP_A0, STEP_B0)) + if (!IS_DG1_GT_STEP(uncore->i915, STEP_A0, STEP_C0)) return false; *start = 0; diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 685c6115d380..247f0331ebee 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -838,7 +838,7 @@ skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); /* WaInPlaceDecompressionHang:skl */ - if (IS_SKL_GT_STEP(i915, STEP_A0, STEP_H0 - 1)) + if (IS_SKL_GT_STEP(i915, STEP_A0, STEP_H0)) wa_write_or(wal, GEN9_GAMT_ECO_REG_RW_IA, GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); @@ -850,7 +850,7 @@ kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) gen9_gt_workarounds_init(i915, wal); /* WaDisableDynamicCreditSharing:kbl */ - if (IS_KBL_GT_STEP(i915, 0, STEP_B0)) + if (IS_KBL_GT_STEP(i915, 0, STEP_C0)) wa_write_or(wal, GAMT_CHKN_BIT_REG, GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); @@ -961,7 +961,7 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) /* Wa_1607087056:icl,ehl,jsl */ if (IS_ICELAKE(i915) || - IS_JSL_EHL_GT_STEP(i915, STEP_A0, STEP_A0)) + IS_JSL_EHL_GT_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); @@ -1015,19 +1015,19 @@ tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) gen12_gt_workarounds_init(i915, wal); /* Wa_1409420604:tgl */ - if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_A0)) + if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS); /* Wa_1607087056:tgl also know as BUG:1409180338 */ - if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_A0)) + if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); /* Wa_1408615072:tgl[a0] */ - if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_A0)) + if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL); } @@ -1038,7 +1038,7 @@ dg1_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) gen12_gt_workarounds_init(i915, wal); /* Wa_1607087056:dg1 */ - if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_A0)) + if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); @@ -1436,7 +1436,7 @@ static void dg1_whitelist_build(struct intel_engine_cs *engine) tgl_whitelist_build(engine); /* GEN:BUG:1409280441:dg1 */ - if (IS_DG1_GT_STEP(engine->i915, STEP_A0, STEP_A0) && + if (IS_DG1_GT_STEP(engine->i915, STEP_A0, STEP_B0) && (engine->class == RENDER_CLASS || engine->class == COPY_ENGINE_CLASS)) whitelist_reg_ext(w, RING_ID(engine->mmio_base), @@ -1504,8 +1504,8 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; - if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_A0) || - IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_A0)) { + if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) || + IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) { /* * Wa_1607138336:tgl[a0],dg1[a0] * Wa_1607063988:tgl[a0],dg1[a0] @@ -1515,7 +1515,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN12_DISABLE_POSH_BUSY_FF_DOP_CG); } - if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_A0)) { + if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) { /* * Wa_1606679103:tgl * (see also Wa_1606682166:icl) @@ -1550,7 +1550,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) } if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || - IS_DG1_GT_STEP(i915, STEP_A0, STEP_A0) || + IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* Wa_1409804808:tgl,rkl,dg1[a0],adl-s,adl-p */ wa_masked_en(wal, GEN7_ROW_CHICKEN2, @@ -1564,7 +1564,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) } - if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_A0) || + if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* * Wa_1607030317:tgl @@ -1925,7 +1925,7 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) struct drm_i915_private *i915 = engine->i915; /* WaKBLVECSSemaphoreWaitPoll:kbl */ - if (IS_KBL_GT_STEP(i915, STEP_A0, STEP_E0)) { + if (IS_KBL_GT_STEP(i915, STEP_A0, STEP_F0)) { wa_write(wal, RING_SEMA_WAIT_POLL(engine->mmio_base), 1); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d0027b0e8afc..3ee350129f36 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1339,7 +1339,7 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev) #define IS_GT_STEP(__i915, since, until) \ (drm_WARN_ON(&(__i915)->drm, INTEL_GT_STEP(__i915) == STEP_NONE), \ - INTEL_GT_STEP(__i915) >= (since) && INTEL_GT_STEP(__i915) <= (until)) + INTEL_GT_STEP(__i915) >= (since) && INTEL_GT_STEP(__i915) < (until)) static __always_inline unsigned int __platform_mask_index(const struct intel_runtime_info *info, diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 839ab94b36fe..0e5d8a88f537 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7377,7 +7377,7 @@ static void dg1_init_clock_gating(struct drm_i915_private *dev_priv) gen12lp_init_clock_gating(dev_priv); /* Wa_1409836686:dg1[a0] */ - if (IS_DG1_GT_STEP(dev_priv, STEP_A0, STEP_A0)) + if (IS_DG1_GT_STEP(dev_priv, STEP_A0, STEP_B0)) intel_uncore_write(&dev_priv->uncore, GEN9_CLKGATE_DIS_3, intel_uncore_read(&dev_priv->uncore, GEN9_CLKGATE_DIS_3) | DPT_GATING_DIS); } @@ -7462,12 +7462,12 @@ static void kbl_init_clock_gating(struct drm_i915_private *dev_priv) FBC_LLC_FULLY_OPEN); /* WaDisableSDEUnitClockGating:kbl */ - if (IS_KBL_GT_STEP(dev_priv, 0, STEP_B0)) + if (IS_KBL_GT_STEP(dev_priv, 0, STEP_C0)) intel_uncore_write(&dev_priv->uncore, GEN8_UCGCTL6, intel_uncore_read(&dev_priv->uncore, GEN8_UCGCTL6) | GEN8_SDEUNIT_CLOCK_GATE_DISABLE); /* WaDisableGamClockGating:kbl */ - if (IS_KBL_GT_STEP(dev_priv, 0, STEP_B0)) + if (IS_KBL_GT_STEP(dev_priv, 0, STEP_C0)) intel_uncore_write(&dev_priv->uncore, GEN6_UCGCTL1, intel_uncore_read(&dev_priv->uncore, GEN6_UCGCTL1) | GEN6_GAMUNIT_CLOCK_GATE_DISABLE); -- cgit v1.2.3 From 4f62a7e0d3338771261a945ceb87182e911a4f71 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 21 Jul 2021 20:32:29 +0200 Subject: drm/i915: Ditch i915 globals shrink infrastructure This essentially reverts commit 84a1074920523430f9dc30ff907f4801b4820072 Author: Chris Wilson Date: Wed Jan 24 11:36:08 2018 +0000 drm/i915: Shrink the GEM kmem_caches upon idling mm/vmscan.c:do_shrink_slab() is a thing, if there's an issue with it then we need to fix that there, not hand-roll our own slab shrinking code in i915. Also when this was added there was only one other caller of kmem_cache_shrink (added 2005 to the acpi code). Now there's a 2nd one outside of i915 code in a kunit test, which seems legit since that wants to very carefully control what's in the kmem_cache. This out of a total of over 500 calls to kmem_cache_create. This alone should have been warning sign enough that we're doing something silly. Noticed while reviewing a patch set from Jason to fix up some issues in our i915_init() and i915_exit() module load/cleanup code. Now that i915_globals.c isn't any different than normal init/exit functions, we should convert them over to one unified table and remove i915_globals.[hc] entirely. v2: Improve commit message (Jason) Reviewed-by: Jason Ekstrand Cc: David Airlie Cc: Jason Ekstrand Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210721183229.4136488-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 6 -- drivers/gpu/drm/i915/gem/i915_gem_object.c | 6 -- drivers/gpu/drm/i915/gt/intel_context.c | 6 -- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 4 -- drivers/gpu/drm/i915/i915_active.c | 6 -- drivers/gpu/drm/i915/i915_globals.c | 95 ----------------------------- drivers/gpu/drm/i915/i915_globals.h | 3 - drivers/gpu/drm/i915/i915_request.c | 7 --- drivers/gpu/drm/i915/i915_scheduler.c | 7 --- drivers/gpu/drm/i915/i915_vma.c | 6 -- 10 files changed, 146 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 7d6f52d8a801..bf2a2319353a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -2280,18 +2280,12 @@ i915_gem_engines_iter_next(struct i915_gem_engines_iter *it) #include "selftests/i915_gem_context.c" #endif -static void i915_global_gem_context_shrink(void) -{ - kmem_cache_shrink(global.slab_luts); -} - static void i915_global_gem_context_exit(void) { kmem_cache_destroy(global.slab_luts); } static struct i915_global_gem_context global = { { - .shrink = i915_global_gem_context_shrink, .exit = i915_global_gem_context_exit, } }; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 9da7b288b7ed..5c21cff33199 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -664,18 +664,12 @@ void i915_gem_init__objects(struct drm_i915_private *i915) INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); } -static void i915_global_objects_shrink(void) -{ - kmem_cache_shrink(global.slab_objects); -} - static void i915_global_objects_exit(void) { kmem_cache_destroy(global.slab_objects); } static struct i915_global_object global = { { - .shrink = i915_global_objects_shrink, .exit = i915_global_objects_exit, } }; diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index bd63813c8a80..c1338441cc1d 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -398,18 +398,12 @@ void intel_context_fini(struct intel_context *ce) i915_active_fini(&ce->active); } -static void i915_global_context_shrink(void) -{ - kmem_cache_shrink(global.slab_ce); -} - static void i915_global_context_exit(void) { kmem_cache_destroy(global.slab_ce); } static struct i915_global_context global = { { - .shrink = i915_global_context_shrink, .exit = i915_global_context_exit, } }; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index aef3084e8b16..d86825437516 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -67,8 +67,6 @@ static int __gt_unpark(struct intel_wakeref *wf) GT_TRACE(gt, "\n"); - i915_globals_unpark(); - /* * It seems that the DMC likes to transition between the DC states a lot * when there are no connected displays (no active power domains) during @@ -116,8 +114,6 @@ static int __gt_park(struct intel_wakeref *wf) GEM_BUG_ON(!wakeref); intel_display_power_put_async(i915, POWER_DOMAIN_GT_IRQ, wakeref); - i915_globals_park(); - return 0; } diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index b1aa1c482c32..91723123ae9f 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -1176,18 +1176,12 @@ struct i915_active *i915_active_create(void) #include "selftests/i915_active.c" #endif -static void i915_global_active_shrink(void) -{ - kmem_cache_shrink(global.slab_cache); -} - static void i915_global_active_exit(void) { kmem_cache_destroy(global.slab_cache); } static struct i915_global_active global = { { - .shrink = i915_global_active_shrink, .exit = i915_global_active_exit, } }; diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c index 77f1911c463b..7fe2e503897b 100644 --- a/drivers/gpu/drm/i915/i915_globals.c +++ b/drivers/gpu/drm/i915/i915_globals.c @@ -17,61 +17,8 @@ static LIST_HEAD(globals); -static atomic_t active; -static atomic_t epoch; -static struct park_work { - struct delayed_work work; - struct rcu_head rcu; - unsigned long flags; -#define PENDING 0 - int epoch; -} park; - -static void i915_globals_shrink(void) -{ - struct i915_global *global; - - /* - * kmem_cache_shrink() discards empty slabs and reorders partially - * filled slabs to prioritise allocating from the mostly full slabs, - * with the aim of reducing fragmentation. - */ - list_for_each_entry(global, &globals, link) - global->shrink(); -} - -static void __i915_globals_grace(struct rcu_head *rcu) -{ - /* Ratelimit parking as shrinking is quite slow */ - schedule_delayed_work(&park.work, round_jiffies_up_relative(2 * HZ)); -} - -static void __i915_globals_queue_rcu(void) -{ - park.epoch = atomic_inc_return(&epoch); - if (!atomic_read(&active)) { - init_rcu_head(&park.rcu); - call_rcu(&park.rcu, __i915_globals_grace); - } -} - -static void __i915_globals_park(struct work_struct *work) -{ - destroy_rcu_head(&park.rcu); - - /* Confirm nothing woke up in the last grace period */ - if (park.epoch != atomic_read(&epoch)) { - __i915_globals_queue_rcu(); - return; - } - - clear_bit(PENDING, &park.flags); - i915_globals_shrink(); -} - void __init i915_global_register(struct i915_global *global) { - GEM_BUG_ON(!global->shrink); GEM_BUG_ON(!global->exit); list_add_tail(&global->link, &globals); @@ -109,52 +56,10 @@ int __init i915_globals_init(void) } } - INIT_DELAYED_WORK(&park.work, __i915_globals_park); return 0; } -void i915_globals_park(void) -{ - /* - * Defer shrinking the global slab caches (and other work) until - * after a RCU grace period has completed with no activity. This - * is to try and reduce the latency impact on the consumers caused - * by us shrinking the caches the same time as they are trying to - * allocate, with the assumption being that if we idle long enough - * for an RCU grace period to elapse since the last use, it is likely - * to be longer until we need the caches again. - */ - if (!atomic_dec_and_test(&active)) - return; - - /* Queue cleanup after the next RCU grace period has freed slabs */ - if (!test_and_set_bit(PENDING, &park.flags)) - __i915_globals_queue_rcu(); -} - -void i915_globals_unpark(void) -{ - atomic_inc(&epoch); - atomic_inc(&active); -} - -static void __exit __i915_globals_flush(void) -{ - atomic_inc(&active); /* skip shrinking */ - - rcu_barrier(); /* wait for the work to be queued */ - flush_delayed_work(&park.work); - - atomic_dec(&active); -} - void __exit i915_globals_exit(void) { - GEM_BUG_ON(atomic_read(&active)); - - __i915_globals_flush(); __i915_globals_cleanup(); - - /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ - rcu_barrier(); } diff --git a/drivers/gpu/drm/i915/i915_globals.h b/drivers/gpu/drm/i915/i915_globals.h index 2d199f411a4a..9e6b4fd07528 100644 --- a/drivers/gpu/drm/i915/i915_globals.h +++ b/drivers/gpu/drm/i915/i915_globals.h @@ -14,15 +14,12 @@ typedef void (*i915_global_func_t)(void); struct i915_global { struct list_head link; - i915_global_func_t shrink; i915_global_func_t exit; }; void i915_global_register(struct i915_global *global); int i915_globals_init(void); -void i915_globals_park(void); -void i915_globals_unpark(void); void i915_globals_exit(void); /* constructors */ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 09ebea9a0090..d3de9f60e03a 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -2077,12 +2077,6 @@ void i915_request_show(struct drm_printer *m, #include "selftests/i915_request.c" #endif -static void i915_global_request_shrink(void) -{ - kmem_cache_shrink(global.slab_execute_cbs); - kmem_cache_shrink(global.slab_requests); -} - static void i915_global_request_exit(void) { kmem_cache_destroy(global.slab_execute_cbs); @@ -2090,7 +2084,6 @@ static void i915_global_request_exit(void) } static struct i915_global_request global = { { - .shrink = i915_global_request_shrink, .exit = i915_global_request_exit, } }; diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 3a58a9130309..561c649e59f7 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -475,12 +475,6 @@ i915_sched_engine_create(unsigned int subclass) return sched_engine; } -static void i915_global_scheduler_shrink(void) -{ - kmem_cache_shrink(global.slab_dependencies); - kmem_cache_shrink(global.slab_priorities); -} - static void i915_global_scheduler_exit(void) { kmem_cache_destroy(global.slab_dependencies); @@ -488,7 +482,6 @@ static void i915_global_scheduler_exit(void) } static struct i915_global_scheduler global = { { - .shrink = i915_global_scheduler_shrink, .exit = i915_global_scheduler_exit, } }; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 5b9dce0f443b..09a7c47926f7 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1414,18 +1414,12 @@ void i915_vma_make_purgeable(struct i915_vma *vma) #include "selftests/i915_vma.c" #endif -static void i915_global_vma_shrink(void) -{ - kmem_cache_shrink(global.slab_vmas); -} - static void i915_global_vma_exit(void) { kmem_cache_destroy(global.slab_vmas); } static struct i915_global_vma global = { { - .shrink = i915_global_vma_shrink, .exit = i915_global_vma_exit, } }; -- cgit v1.2.3 From 75d3bf84dfca2fd3f83125eb68f0f55c7018d4de Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 21 Jul 2021 10:23:53 -0500 Subject: drm/i915: Call i915_globals_exit() after i915_pmu_exit() We should tear down in the opposite order we set up. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Reviewed-by: Tvrtko Ursulin Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210721152358.2893314-2-jason@jlekstrand.net --- drivers/gpu/drm/i915/i915_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 866ba136794c..2cdace47610b 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1209,8 +1209,8 @@ static void __exit i915_exit(void) i915_perf_sysctl_unregister(); pci_unregister_driver(&i915_pci_driver); - i915_globals_exit(); i915_pmu_exit(); + i915_globals_exit(); } module_init(i915_init); -- cgit v1.2.3 From db484889d1ff0645e07e360d3e3ad306c0515821 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 21 Jul 2021 10:23:54 -0500 Subject: drm/i915: Call i915_globals_exit() if pci_register_device() fails In the unlikely event that pci_register_device() fails, we were tearing down our PMU setup but not globals. This leaves a bunch of memory slabs lying around. Signed-off-by: Jason Ekstrand Fixes: 32eb6bcfdda9 ("drm/i915: Make request allocation caches global") [danvet: Fix conflicts against removal of the globals_flush infrastructure.] Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210721152358.2893314-3-jason@jlekstrand.net --- drivers/gpu/drm/i915/i915_globals.c | 2 +- drivers/gpu/drm/i915/i915_pci.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c index 7fe2e503897b..0a1b8920a3ba 100644 --- a/drivers/gpu/drm/i915/i915_globals.c +++ b/drivers/gpu/drm/i915/i915_globals.c @@ -59,7 +59,7 @@ int __init i915_globals_init(void) return 0; } -void __exit i915_globals_exit(void) +void i915_globals_exit(void) { __i915_globals_cleanup(); } diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 2cdace47610b..ef1ddb1ebaf1 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1195,6 +1195,7 @@ static int __init i915_init(void) err = pci_register_driver(&i915_pci_driver); if (err) { i915_pmu_exit(); + i915_globals_exit(); return err; } -- cgit v1.2.3 From a04ea6ae7c6728cd834709f3477e75d4f74583da Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 21 Jul 2021 10:23:55 -0500 Subject: drm/i915: Use a table for i915_init/exit (v2) If the driver was not fully loaded, we may still have globals lying around. If we don't tear those down in i915_exit(), we'll leak a bunch of memory slabs. This can happen two ways: use_kms = false and if we've run mock selftests. In either case, we have an early exit from i915_init which happens after i915_globals_init() and we need to clean up those globals. The mock selftests case is especially sticky. The load isn't entirely a no-op. We actually do quite a bit inside those selftests including allocating a bunch of mock objects and running tests on them. Once all those tests are complete, we exit early from i915_init(). Perviously, i915_init() would return a non-zero error code on failure and a zero error code on success. In the success case, we would get to i915_exit() and check i915_pci_driver.driver.owner to detect if i915_init exited early and do nothing. In the failure case, we would fail i915_init() but there would be no opportunity to clean up globals. The most annoying part is that you don't actually notice the failure as part of the self-tests since leaking a bit of memory, while bad, doesn't result in anything observable from userspace. Instead, the next time we load the driver (usually for next IGT test), i915_globals_init() gets invoked again, we go to allocate a bunch of new memory slabs, those implicitly create debugfs entries, and debugfs warns that we're trying to create directories and files that already exist. Since this all happens as part of the next driver load, it shows up in the dmesg-warn of whatever IGT test ran after the mock selftests. While the obvious thing to do here might be to call i915_globals_exit() after selftests, that's not actually safe. The dma-buf selftests call i915_gem_prime_export which creates a file. We call dma_buf_put() on the resulting dmabuf which calls fput() on the file. However, fput() isn't immediate and gets flushed right before syscall returns. This means that all the fput()s from the selftests don't happen until right before the module load syscall used to fire off the selftests returns which is after i915_init(). If we call i915_globals_exit() in i915_init() after selftests, we end up freeing slabs out from under objects which won't get released until fput() is flushed at the end of the module load syscall. The solution here is to let i915_init() return success early and detect the early success in i915_exit() and only tear down globals and nothing else. This way the module loads successfully, regardless of the success or failure of the tests. Because we've not enumerated any PCI devices, no device nodes are created and it's entirely useless from userspace. The only thing the module does at that point is hold on to a bit of memory until we unload it and i915_exit() is called. Importantly, this means that everything from our selftests has the ability to properly flush out between i915_init() and i915_exit() because there is at least one syscall boundary in between. In order to handle all the delicate init/exit cases, we convert the whole thing to a table of init/exit pairs and track the init status in the new init_progress global. This allows us to ensure that i915_exit() always tears down exactly the things that i915_init() successfully initialized. We also allow early-exit of i915_init() without failure by an init function returning > 0. This is useful for nomodeset, and selftests. For the mock selftests, we convert them to always return 1 so we get the desired behavior of the driver always succeeding to load the driver and then properly tearing down the partially loaded driver. v2 (Tvrtko Ursulin): - Guard init_funcs[i].exit with GEM_BUG_ON(i >= ARRAY_SIZE(init_funcs)) v2 (Daniel Vetter): - Update the docstring for i915.mock_selftests Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Cc: Tvrtko Ursulin Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210721152358.2893314-4-jason@jlekstrand.net --- drivers/gpu/drm/i915/i915_pci.c | 105 +++++++++++++++++-------- drivers/gpu/drm/i915/i915_perf.c | 3 +- drivers/gpu/drm/i915/i915_perf.h | 2 +- drivers/gpu/drm/i915/i915_pmu.c | 4 +- drivers/gpu/drm/i915/i915_pmu.h | 4 +- drivers/gpu/drm/i915/selftests/i915_selftest.c | 4 +- 6 files changed, 82 insertions(+), 40 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index ef1ddb1ebaf1..e18e267dcd5f 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1150,27 +1150,9 @@ static void i915_pci_shutdown(struct pci_dev *pdev) i915_driver_shutdown(i915); } -static struct pci_driver i915_pci_driver = { - .name = DRIVER_NAME, - .id_table = pciidlist, - .probe = i915_pci_probe, - .remove = i915_pci_remove, - .shutdown = i915_pci_shutdown, - .driver.pm = &i915_pm_ops, -}; - -static int __init i915_init(void) +static int i915_check_nomodeset(void) { bool use_kms = true; - int err; - - err = i915_globals_init(); - if (err) - return err; - - err = i915_mock_selftests(); - if (err) - return err > 0 ? 0 : err; /* * Enable KMS by default, unless explicitly overriden by @@ -1187,31 +1169,88 @@ static int __init i915_init(void) if (!use_kms) { /* Silently fail loading to not upset userspace. */ DRM_DEBUG_DRIVER("KMS disabled.\n"); - return 0; + return 1; } - i915_pmu_init(); + return 0; +} - err = pci_register_driver(&i915_pci_driver); - if (err) { - i915_pmu_exit(); - i915_globals_exit(); - return err; +static struct pci_driver i915_pci_driver = { + .name = DRIVER_NAME, + .id_table = pciidlist, + .probe = i915_pci_probe, + .remove = i915_pci_remove, + .shutdown = i915_pci_shutdown, + .driver.pm = &i915_pm_ops, +}; + +static int i915_register_pci_driver(void) +{ + return pci_register_driver(&i915_pci_driver); +} + +static void i915_unregister_pci_driver(void) +{ + pci_unregister_driver(&i915_pci_driver); +} + +static const struct { + int (*init)(void); + void (*exit)(void); +} init_funcs[] = { + { i915_globals_init, i915_globals_exit }, + { i915_mock_selftests, NULL }, + { i915_check_nomodeset, NULL }, + { i915_pmu_init, i915_pmu_exit }, + { i915_register_pci_driver, i915_unregister_pci_driver }, + { i915_perf_sysctl_register, i915_perf_sysctl_unregister }, +}; +static int init_progress; + +static int __init i915_init(void) +{ + int err, i; + + for (i = 0; i < ARRAY_SIZE(init_funcs); i++) { + err = init_funcs[i].init(); + if (err < 0) { + while (i--) { + if (init_funcs[i].exit) + init_funcs[i].exit(); + } + return err; + } else if (err > 0) { + /* + * Early-exit success is reserved for things which + * don't have an exit() function because we have no + * idea how far they got or how to partially tear + * them down. + */ + WARN_ON(init_funcs[i].exit); + + /* + * We don't want to advertise devices with an only + * partially initialized driver. + */ + WARN_ON(i915_pci_driver.driver.owner); + break; + } } - i915_perf_sysctl_register(); + init_progress = i; + return 0; } static void __exit i915_exit(void) { - if (!i915_pci_driver.driver.owner) - return; + int i; - i915_perf_sysctl_unregister(); - pci_unregister_driver(&i915_pci_driver); - i915_pmu_exit(); - i915_globals_exit(); + for (i = init_progress - 1; i >= 0; i--) { + GEM_BUG_ON(i >= ARRAY_SIZE(init_funcs)); + if (init_funcs[i].exit) + init_funcs[i].exit(); + } } module_init(i915_init); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index b4ec114a4698..48ddb363b3bd 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -4483,9 +4483,10 @@ static int destroy_config(int id, void *p, void *data) return 0; } -void i915_perf_sysctl_register(void) +int i915_perf_sysctl_register(void) { sysctl_header = register_sysctl_table(dev_root); + return 0; } void i915_perf_sysctl_unregister(void) diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h index 882fdd0a7680..1d1329e5af3a 100644 --- a/drivers/gpu/drm/i915/i915_perf.h +++ b/drivers/gpu/drm/i915/i915_perf.h @@ -23,7 +23,7 @@ void i915_perf_fini(struct drm_i915_private *i915); void i915_perf_register(struct drm_i915_private *i915); void i915_perf_unregister(struct drm_i915_private *i915); int i915_perf_ioctl_version(void); -void i915_perf_sysctl_register(void); +int i915_perf_sysctl_register(void); void i915_perf_sysctl_unregister(void); int i915_perf_open_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 34d37d46a126..eca92076f31d 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -1088,7 +1088,7 @@ static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) static enum cpuhp_state cpuhp_slot = CPUHP_INVALID; -void i915_pmu_init(void) +int i915_pmu_init(void) { int ret; @@ -1101,6 +1101,8 @@ void i915_pmu_init(void) ret); else cpuhp_slot = ret; + + return 0; } void i915_pmu_exit(void) diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index 60f9595f902c..449057648f39 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -147,14 +147,14 @@ struct i915_pmu { }; #ifdef CONFIG_PERF_EVENTS -void i915_pmu_init(void); +int i915_pmu_init(void); void i915_pmu_exit(void); void i915_pmu_register(struct drm_i915_private *i915); void i915_pmu_unregister(struct drm_i915_private *i915); void i915_pmu_gt_parked(struct drm_i915_private *i915); void i915_pmu_gt_unparked(struct drm_i915_private *i915); #else -static inline void i915_pmu_init(void) {} +static inline int i915_pmu_init(void) { return 0; } static inline void i915_pmu_exit(void) {} static inline void i915_pmu_register(struct drm_i915_private *i915) {} static inline void i915_pmu_unregister(struct drm_i915_private *i915) {} diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c index 1bc11c09faef..484759c9409c 100644 --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -187,7 +187,7 @@ int i915_mock_selftests(void) err = run_selftests(mock, NULL); if (err) { i915_selftest.mock = err; - return err; + return 1; } if (i915_selftest.mock < 0) { @@ -430,7 +430,7 @@ module_param_named(st_timeout, i915_selftest.timeout_ms, uint, 0400); module_param_named(st_filter, i915_selftest.filter, charp, 0400); module_param_named_unsafe(mock_selftests, i915_selftest.mock, int, 0400); -MODULE_PARM_DESC(mock_selftests, "Run selftests before loading, using mock hardware (0:disabled [default], 1:run tests then load driver, -1:run tests then exit module)"); +MODULE_PARM_DESC(mock_selftests, "Run selftests before loading, using mock hardware (0:disabled [default], 1:run tests then load driver, -1:run tests then leave dummy module)"); module_param_named_unsafe(live_selftests, i915_selftest.live, int, 0400); MODULE_PARM_DESC(live_selftests, "Run selftests after driver initialisation on the live system (0:disabled [default], 1:run tests then continue, -1:run tests then exit module)"); -- cgit v1.2.3 From 0f4651359a235a702b383076fc2ccbd90d9bedb4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 21 Jul 2021 10:23:58 -0500 Subject: drm/i915: Make the kmem slab for i915_buddy_block a global MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's no reason that I can tell why this should be per-i915_buddy_mm and doing so causes KMEM_CACHE to throw dmesg warnings because it tries to create a debugfs entry with the name i915_buddy_block multiple times. We could handle this by carefully giving each slab its own name but that brings its own pain because then we have to store that string somewhere and manage the lifetimes of the different slabs. The most likely outcome would be a global atomic which we increment to get a new name or something like that. The much easier solution is to use the i915_globals system like we do for every other slab in i915. This ensures that we have exactly one of them for each i915 driver load and it gets neatly created on module load and destroyed on module unload. Using the globals system also means that its now tied into the shrink handler so we can properly respond to low-memory situations. Signed-off-by: Jason Ekstrand Fixes: 88be9a0a06b7 ("drm/i915/ttm: add ttm_buddy_man") Reviewed-by: Matthew Auld Cc: Matthew Auld Cc: Christian König [danvet: Rebase against removal of global shrink code] Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210721152358.2893314-7-jason@jlekstrand.net --- drivers/gpu/drm/i915/i915_buddy.c | 38 +++++++++++++++++++++++++++---------- drivers/gpu/drm/i915/i915_buddy.h | 3 ++- drivers/gpu/drm/i915/i915_globals.c | 2 ++ 3 files changed, 32 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_buddy.c b/drivers/gpu/drm/i915/i915_buddy.c index 29dd7d0310c1..caabcaea3be7 100644 --- a/drivers/gpu/drm/i915/i915_buddy.c +++ b/drivers/gpu/drm/i915/i915_buddy.c @@ -8,8 +8,14 @@ #include "i915_buddy.h" #include "i915_gem.h" +#include "i915_globals.h" #include "i915_utils.h" +static struct i915_global_buddy { + struct i915_global base; + struct kmem_cache *slab_blocks; +} global; + static struct i915_buddy_block *i915_block_alloc(struct i915_buddy_mm *mm, struct i915_buddy_block *parent, unsigned int order, @@ -19,7 +25,7 @@ static struct i915_buddy_block *i915_block_alloc(struct i915_buddy_mm *mm, GEM_BUG_ON(order > I915_BUDDY_MAX_ORDER); - block = kmem_cache_zalloc(mm->slab_blocks, GFP_KERNEL); + block = kmem_cache_zalloc(global.slab_blocks, GFP_KERNEL); if (!block) return NULL; @@ -34,7 +40,7 @@ static struct i915_buddy_block *i915_block_alloc(struct i915_buddy_mm *mm, static void i915_block_free(struct i915_buddy_mm *mm, struct i915_buddy_block *block) { - kmem_cache_free(mm->slab_blocks, block); + kmem_cache_free(global.slab_blocks, block); } static void mark_allocated(struct i915_buddy_block *block) @@ -85,15 +91,11 @@ int i915_buddy_init(struct i915_buddy_mm *mm, u64 size, u64 chunk_size) GEM_BUG_ON(mm->max_order > I915_BUDDY_MAX_ORDER); - mm->slab_blocks = KMEM_CACHE(i915_buddy_block, SLAB_HWCACHE_ALIGN); - if (!mm->slab_blocks) - return -ENOMEM; - mm->free_list = kmalloc_array(mm->max_order + 1, sizeof(struct list_head), GFP_KERNEL); if (!mm->free_list) - goto out_destroy_slab; + return -ENOMEM; for (i = 0; i <= mm->max_order; ++i) INIT_LIST_HEAD(&mm->free_list[i]); @@ -145,8 +147,6 @@ out_free_roots: kfree(mm->roots); out_free_list: kfree(mm->free_list); -out_destroy_slab: - kmem_cache_destroy(mm->slab_blocks); return -ENOMEM; } @@ -161,7 +161,6 @@ void i915_buddy_fini(struct i915_buddy_mm *mm) kfree(mm->roots); kfree(mm->free_list); - kmem_cache_destroy(mm->slab_blocks); } static int split_block(struct i915_buddy_mm *mm, @@ -410,3 +409,22 @@ err_free: #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/i915_buddy.c" #endif + +static void i915_global_buddy_exit(void) +{ + kmem_cache_destroy(global.slab_blocks); +} + +static struct i915_global_buddy global = { { + .exit = i915_global_buddy_exit, +} }; + +int __init i915_global_buddy_init(void) +{ + global.slab_blocks = KMEM_CACHE(i915_buddy_block, 0); + if (!global.slab_blocks) + return -ENOMEM; + + i915_global_register(&global.base); + return 0; +} diff --git a/drivers/gpu/drm/i915/i915_buddy.h b/drivers/gpu/drm/i915/i915_buddy.h index 37f8c42071d1..d8f26706de52 100644 --- a/drivers/gpu/drm/i915/i915_buddy.h +++ b/drivers/gpu/drm/i915/i915_buddy.h @@ -47,7 +47,6 @@ struct i915_buddy_block { * i915_buddy_alloc* and i915_buddy_free* should suffice. */ struct i915_buddy_mm { - struct kmem_cache *slab_blocks; /* Maintain a free list for each order. */ struct list_head *free_list; @@ -130,4 +129,6 @@ void i915_buddy_free(struct i915_buddy_mm *mm, struct i915_buddy_block *block); void i915_buddy_free_list(struct i915_buddy_mm *mm, struct list_head *objects); +int i915_global_buddy_init(void); + #endif diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c index 0a1b8920a3ba..91198f5b0a06 100644 --- a/drivers/gpu/drm/i915/i915_globals.c +++ b/drivers/gpu/drm/i915/i915_globals.c @@ -8,6 +8,7 @@ #include #include "i915_active.h" +#include "i915_buddy.h" #include "gem/i915_gem_context.h" #include "gem/i915_gem_object.h" #include "i915_globals.h" @@ -34,6 +35,7 @@ static void __i915_globals_cleanup(void) static __initconst int (* const initfn[])(void) = { i915_global_active_init, + i915_global_buddy_init, i915_global_context_init, i915_global_gem_context_init, i915_global_objects_init, -- cgit v1.2.3 From d1fbcbbc8cb4f8871f898c32ae041d5102a28ec2 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 7 Jul 2021 16:59:20 -0700 Subject: drm/i915: do not abbreviate version in debugfs Brevity is not needed here, so just spell out "* version" in the string. Suggested-by: Chris Wilson Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210707235921.2416911-1-lucas.demarchi@intel.com (cherry picked from commit 0f9b145a0a0ab0d3d4143c20e2ca347d8a105e33) Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/intel_device_info.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 7eaa92fee421..3daf0cd8d48b 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -96,9 +96,9 @@ static const char *iommu_name(void) void intel_device_info_print_static(const struct intel_device_info *info, struct drm_printer *p) { - drm_printf(p, "graphics_ver: %u\n", info->graphics_ver); - drm_printf(p, "media_ver: %u\n", info->media_ver); - drm_printf(p, "display_ver: %u\n", info->display.ver); + drm_printf(p, "graphics version: %u\n", info->graphics_ver); + drm_printf(p, "media version: %u\n", info->media_ver); + drm_printf(p, "display version: %u\n", info->display.ver); drm_printf(p, "gt: %d\n", info->gt); drm_printf(p, "iommu: %s\n", iommu_name()); drm_printf(p, "memory-regions: %x\n", info->memory_regions); -- cgit v1.2.3 From f39730350dd126e3e5312214b458a7ded44e9d91 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 7 Jul 2021 16:59:21 -0700 Subject: drm/i915: Add release id version Besides the arch version returned by GRAPHICS_VER(), new platforms contain a "release id" to make clear the difference from one platform to another. The release id number is not formally defined by hardware until future platforms that will expose it via a new GMD_ID register. For the platforms we support before that register becomes available we will set the values in software and we can set them as we please. So the plan is to set them so we can group different features under a single GRAPHICS_VER_FULL() check. After GMD_ID is used, the usefulness of a "full version check" will be greatly reduced and will be mostly used for deciding workarounds and a few code paths. So it makes sense to keep it as a separate field from graphics_ver. Also, as a platform with `release == n` may be closer feature-wise to `n - 2` than to `n - 1`, use the word "release" rather than the more common "minor" for this This is a mix of 2 independent changes: one by me and the other by Matt Roper. v2: - Reword commit message to make it clearer why we don't call it "minor" (Matt Roper and Tvrtko) - Rename variables s/*_ver_release/*_rel/ and print them in a single line formatted as {ver}.{rel:2} (Jani and Matt Roper) Cc: Matt Roper Signed-off-by: Lucas De Marchi Signed-off-by: Matt Roper Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210707235921.2416911-2-lucas.demarchi@intel.com (cherry picked from commit ca6374e267e2735fe382fe95de2a8a9c30c6bdb3) Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/i915_drv.h | 6 ++++++ drivers/gpu/drm/i915/intel_device_info.c | 12 ++++++++++-- drivers/gpu/drm/i915/intel_device_info.h | 2 ++ 3 files changed, 18 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c6d245367b03..858da206a9f4 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1252,11 +1252,17 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev) */ #define IS_GEN(dev_priv, n) (GRAPHICS_VER(dev_priv) == (n)) +#define IP_VER(ver, rel) ((ver) << 8 | (rel)) + #define GRAPHICS_VER(i915) (INTEL_INFO(i915)->graphics_ver) +#define GRAPHICS_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->graphics_ver, \ + INTEL_INFO(i915)->graphics_rel) #define IS_GRAPHICS_VER(i915, from, until) \ (GRAPHICS_VER(i915) >= (from) && GRAPHICS_VER(i915) <= (until)) #define MEDIA_VER(i915) (INTEL_INFO(i915)->media_ver) +#define MEDIA_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->media_ver, \ + INTEL_INFO(i915)->media_rel) #define IS_MEDIA_VER(i915, from, until) \ (MEDIA_VER(i915) >= (from) && MEDIA_VER(i915) <= (until)) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 3daf0cd8d48b..d2a514d2551d 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -96,8 +96,16 @@ static const char *iommu_name(void) void intel_device_info_print_static(const struct intel_device_info *info, struct drm_printer *p) { - drm_printf(p, "graphics version: %u\n", info->graphics_ver); - drm_printf(p, "media version: %u\n", info->media_ver); + if (info->graphics_rel) + drm_printf(p, "graphics version: %u.%02u\n", info->graphics_ver, info->graphics_rel); + else + drm_printf(p, "graphics version: %u\n", info->graphics_ver); + + if (info->media_rel) + drm_printf(p, "media version: %u.%02u\n", info->media_ver, info->media_rel); + else + drm_printf(p, "media version: %u\n", info->media_ver); + drm_printf(p, "display version: %u\n", info->display.ver); drm_printf(p, "gt: %d\n", info->gt); drm_printf(p, "iommu: %s\n", iommu_name()); diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index b326aff65cd6..301bd8ba161a 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -162,7 +162,9 @@ enum intel_ppgtt_type { struct intel_device_info { u8 graphics_ver; + u8 graphics_rel; u8 media_ver; + u8 media_rel; u8 gt; /* GT number, 0 if undefined */ intel_engine_mask_t platform_engine_mask; /* Engines supported by the HW */ -- cgit v1.2.3 From 05eb46384ecb19f020971da02e4605e74b3d920b Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 21 Jul 2021 15:30:26 -0700 Subject: drm/i915: Add XE_HP initial definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our _FEATURES macro went back to GEN7, extending each other, making it difficult to grasp what was really enabled/disabled. Take the opportunity of the GEN -> XE_HP name break and also break with the feature inheritance. For XE_HP this basically goes from GEN12 back to GEN7 coalescing the features making sure the overrides remain, remove all the display-specific features and sort it. Then also remove the definitions that would be overridden by DGFX_FEATURES and those that were 0 (since that is the default). Exception here is has_master_unit_irq: although it is a feature that started with DG1 and is true for all DGFX platforms, it's also true for XE_HP in general. Signed-off-by: Lucas De Marchi Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20210721223043.834562-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 83b500bb170c..0625109af319 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -963,6 +963,30 @@ static const struct intel_device_info adl_p_info = { }; #undef GEN + +#define XE_HP_PAGE_SIZES \ + .page_sizes = I915_GTT_PAGE_SIZE_4K | \ + I915_GTT_PAGE_SIZE_64K | \ + I915_GTT_PAGE_SIZE_2M + +#define XE_HP_FEATURES \ + .graphics_ver = 12, \ + .graphics_rel = 50, \ + XE_HP_PAGE_SIZES, \ + .dma_mask_size = 46, \ + .has_64bit_reloc = 1, \ + .has_global_mocs = 1, \ + .has_gt_uc = 1, \ + .has_llc = 1, \ + .has_logical_ring_contexts = 1, \ + .has_logical_ring_elsq = 1, \ + .has_rc6 = 1, \ + .has_reset_engine = 1, \ + .has_rps = 1, \ + .has_runtime_pm = 1, \ + .ppgtt_size = 48, \ + .ppgtt_type = INTEL_PPGTT_FULL + #undef PLATFORM /* -- cgit v1.2.3 From 086df54e20bec27ccc4a1df926039faf44e1037d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 21 Jul 2021 15:30:27 -0700 Subject: drm/i915/xehpsdv: add initial XeHP SDV definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit XeHP SDV is a Intel® dGPU without display. This is just the definition of some basic platform macros, by large a copy of current state of Tigerlake which does not reflect the end state of this platform. v2: - Switch to intel_step infrastructure for stepping matches. (Jani) v3: - Bring earlier in patch series and leave addition of new media engines to the engine mask for a later patch. Bspec: 44467, 48077 Cc: Rodrigo Vivi Signed-off-by: Lucas De Marchi Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: José Roberto de Souza Signed-off-by: Stuart Summers Signed-off-by: Tomas Winkler Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20210721223043.834562-3-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 4 ++++ drivers/gpu/drm/i915/i915_pci.c | 19 +++++++++++++++++++ drivers/gpu/drm/i915/intel_device_info.c | 1 + drivers/gpu/drm/i915/intel_device_info.h | 1 + drivers/gpu/drm/i915/intel_step.c | 12 +++++++++++- drivers/gpu/drm/i915/intel_step.h | 1 + 6 files changed, 37 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 858da206a9f4..567e8b9fdb87 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1391,6 +1391,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_DG1(dev_priv) IS_PLATFORM(dev_priv, INTEL_DG1) #define IS_ALDERLAKE_S(dev_priv) IS_PLATFORM(dev_priv, INTEL_ALDERLAKE_S) #define IS_ALDERLAKE_P(dev_priv) IS_PLATFORM(dev_priv, INTEL_ALDERLAKE_P) +#define IS_XEHPSDV(dev_priv) IS_PLATFORM(dev_priv, INTEL_XEHPSDV) #define IS_HSW_EARLY_SDV(dev_priv) (IS_HASWELL(dev_priv) && \ (INTEL_DEVID(dev_priv) & 0xFF00) == 0x0C00) #define IS_BDW_ULT(dev_priv) \ @@ -1501,6 +1502,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, (IS_ALDERLAKE_P(__i915) && \ IS_GT_STEP(__i915, since, until)) +#define IS_XEHPSDV_GT_STEP(p, since, until) \ + (IS_XEHPSDV(p) && IS_GT_STEP(__i915, since, until)) + #define IS_LP(dev_priv) (INTEL_INFO(dev_priv)->is_lp) #define IS_GEN9_LP(dev_priv) (GRAPHICS_VER(dev_priv) == 9 && IS_LP(dev_priv)) #define IS_GEN9_BC(dev_priv) (GRAPHICS_VER(dev_priv) == 9 && !IS_LP(dev_priv)) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 0625109af319..0f8f14e405a1 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -987,6 +987,25 @@ static const struct intel_device_info adl_p_info = { .ppgtt_size = 48, \ .ppgtt_type = INTEL_PPGTT_FULL +#define XE_HPM_FEATURES \ + .media_ver = 12, \ + .media_rel = 50 + +__maybe_unused +static const struct intel_device_info xehpsdv_info = { + XE_HP_FEATURES, + XE_HPM_FEATURES, + DGFX_FEATURES, + PLATFORM(INTEL_XEHPSDV), + .display = { }, + .pipe_mask = 0, + .platform_engine_mask = + BIT(RCS0) | BIT(BCS0) | + BIT(VECS0) | BIT(VECS1) | + BIT(VCS0) | BIT(VCS1) | BIT(VCS2) | BIT(VCS3), + .require_force_probe = 1, +}; + #undef PLATFORM /* diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index d2a514d2551d..b750f9ded9d5 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -68,6 +68,7 @@ static const char * const platform_names[] = { PLATFORM_NAME(DG1), PLATFORM_NAME(ALDERLAKE_S), PLATFORM_NAME(ALDERLAKE_P), + PLATFORM_NAME(XEHPSDV), }; #undef PLATFORM_NAME diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 301bd8ba161a..4482a73a5816 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -88,6 +88,7 @@ enum intel_platform { INTEL_DG1, INTEL_ALDERLAKE_S, INTEL_ALDERLAKE_P, + INTEL_XEHPSDV, INTEL_MAX_PLATFORMS }; diff --git a/drivers/gpu/drm/i915/intel_step.c b/drivers/gpu/drm/i915/intel_step.c index 9fcf17708cc8..b99829bcb4f7 100644 --- a/drivers/gpu/drm/i915/intel_step.c +++ b/drivers/gpu/drm/i915/intel_step.c @@ -101,6 +101,13 @@ static const struct intel_step_info adlp_revids[] = { [0xC] = { .gt_step = STEP_C0, .display_step = STEP_D0 }, }; +static const struct intel_step_info xehpsdv_revids[] = { + [0x0] = { .gt_step = STEP_A0 }, + [0x1] = { .gt_step = STEP_A1 }, + [0x4] = { .gt_step = STEP_B0 }, + [0x8] = { .gt_step = STEP_C0 }, +}; + void intel_step_init(struct drm_i915_private *i915) { const struct intel_step_info *revids = NULL; @@ -108,7 +115,10 @@ void intel_step_init(struct drm_i915_private *i915) int revid = INTEL_REVID(i915); struct intel_step_info step = {}; - if (IS_ALDERLAKE_P(i915)) { + if (IS_XEHPSDV(i915)) { + revids = xehpsdv_revids; + size = ARRAY_SIZE(xehpsdv_revids); + } else if (IS_ALDERLAKE_P(i915)) { revids = adlp_revids; size = ARRAY_SIZE(adlp_revids); } else if (IS_ALDERLAKE_S(i915)) { diff --git a/drivers/gpu/drm/i915/intel_step.h b/drivers/gpu/drm/i915/intel_step.h index 88a77159703e..41567d9b7c35 100644 --- a/drivers/gpu/drm/i915/intel_step.h +++ b/drivers/gpu/drm/i915/intel_step.h @@ -22,6 +22,7 @@ struct intel_step_info { enum intel_step { STEP_NONE = 0, STEP_A0, + STEP_A1, STEP_A2, STEP_B0, STEP_B1, -- cgit v1.2.3 From 9e22cfc5e9b92556a56d8a564cdab31045f29010 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 21 Jul 2021 15:30:28 -0700 Subject: drm/i915/dg2: add DG2 platform info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DG2 has Xe_LPD display (version 13) and Xe_HPG (version 12.55) graphics. There are two variants (treated as subplatforms in the code): DG2-G10 and DG2-G11 that require independent programming in some areas (e.g., workarounds). Bspec: 44472, 44474, 46197, 48028, 48077 Cc: Anusha Srivatsa Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20210721223043.834562-4-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 27 +++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_pci.c | 16 ++++++++++++++++ drivers/gpu/drm/i915/intel_device_info.c | 1 + drivers/gpu/drm/i915/intel_device_info.h | 5 +++++ drivers/gpu/drm/i915/intel_step.c | 20 +++++++++++++++++++- 5 files changed, 68 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 567e8b9fdb87..d7f46136b1ae 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1392,6 +1392,11 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_ALDERLAKE_S(dev_priv) IS_PLATFORM(dev_priv, INTEL_ALDERLAKE_S) #define IS_ALDERLAKE_P(dev_priv) IS_PLATFORM(dev_priv, INTEL_ALDERLAKE_P) #define IS_XEHPSDV(dev_priv) IS_PLATFORM(dev_priv, INTEL_XEHPSDV) +#define IS_DG2(dev_priv) IS_PLATFORM(dev_priv, INTEL_DG2) +#define IS_DG2_G10(dev_priv) \ + IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G10) +#define IS_DG2_G11(dev_priv) \ + IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G11) #define IS_HSW_EARLY_SDV(dev_priv) (IS_HASWELL(dev_priv) && \ (INTEL_DEVID(dev_priv) & 0xFF00) == 0x0C00) #define IS_BDW_ULT(dev_priv) \ @@ -1505,6 +1510,28 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_XEHPSDV_GT_STEP(p, since, until) \ (IS_XEHPSDV(p) && IS_GT_STEP(__i915, since, until)) +/* + * DG2 hardware steppings are a bit unusual. The hardware design was forked + * to create two variants (G10 and G11) which have distinct workaround sets. + * The G11 fork of the DG2 design resets the GT stepping back to "A0" for its + * first iteration, even though it's more similar to a G10 B0 stepping in terms + * of functionality and workarounds. However the display stepping does not + * reset in the same manner --- a specific stepping like "B0" has a consistent + * meaning regardless of whether it belongs to a G10 or G11 DG2. + * + * TLDR: All GT workarounds and stepping-specific logic must be applied in + * relation to a specific subplatform (G10 or G11), whereas display workarounds + * and stepping-specific logic will be applied with a general DG2-wide stepping + * number. + */ +#define IS_DG2_GT_STEP(__i915, variant, since, until) \ + (IS_SUBPLATFORM(__i915, INTEL_DG2, INTEL_SUBPLATFORM_##variant) && \ + IS_GT_STEP(__i915, since, until)) + +#define IS_DG2_DISP_STEP(__i915, since, until) \ + (IS_DG2(__i915) && \ + IS_DISPLAY_STEP(__i915, since, until)) + #define IS_LP(dev_priv) (INTEL_INFO(dev_priv)->is_lp) #define IS_GEN9_LP(dev_priv) (GRAPHICS_VER(dev_priv) == 9 && IS_LP(dev_priv)) #define IS_GEN9_BC(dev_priv) (GRAPHICS_VER(dev_priv) == 9 && !IS_LP(dev_priv)) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 0f8f14e405a1..70e161d5f15f 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1006,6 +1006,22 @@ static const struct intel_device_info xehpsdv_info = { .require_force_probe = 1, }; +__maybe_unused +static const struct intel_device_info dg2_info = { + XE_HP_FEATURES, + XE_HPM_FEATURES, + XE_LPD_FEATURES, + DGFX_FEATURES, + .graphics_rel = 55, + .media_rel = 55, + PLATFORM(INTEL_DG2), + .platform_engine_mask = + BIT(RCS0) | BIT(BCS0) | + BIT(VECS0) | BIT(VECS1) | + BIT(VCS0) | BIT(VCS2), + .require_force_probe = 1, +}; + #undef PLATFORM /* diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index b750f9ded9d5..822a26b574a9 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -69,6 +69,7 @@ static const char * const platform_names[] = { PLATFORM_NAME(ALDERLAKE_S), PLATFORM_NAME(ALDERLAKE_P), PLATFORM_NAME(XEHPSDV), + PLATFORM_NAME(DG2), }; #undef PLATFORM_NAME diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 4482a73a5816..4447b121c8c2 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -89,6 +89,7 @@ enum intel_platform { INTEL_ALDERLAKE_S, INTEL_ALDERLAKE_P, INTEL_XEHPSDV, + INTEL_DG2, INTEL_MAX_PLATFORMS }; @@ -107,6 +108,10 @@ enum intel_platform { /* CNL/ICL */ #define INTEL_SUBPLATFORM_PORTF (0) +/* DG2 */ +#define INTEL_SUBPLATFORM_G10 0 +#define INTEL_SUBPLATFORM_G11 1 + enum intel_ppgtt_type { INTEL_PPGTT_NONE = I915_GEM_PPGTT_NONE, INTEL_PPGTT_ALIASING = I915_GEM_PPGTT_ALIASING, diff --git a/drivers/gpu/drm/i915/intel_step.c b/drivers/gpu/drm/i915/intel_step.c index b99829bcb4f7..12c92e554253 100644 --- a/drivers/gpu/drm/i915/intel_step.c +++ b/drivers/gpu/drm/i915/intel_step.c @@ -108,6 +108,18 @@ static const struct intel_step_info xehpsdv_revids[] = { [0x8] = { .gt_step = STEP_C0 }, }; +static const struct intel_step_info dg2_g10_revid_step_tbl[] = { + [0x0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, + [0x1] = { .gt_step = STEP_A1, .display_step = STEP_A0 }, + [0x4] = { .gt_step = STEP_B0, .display_step = STEP_B0 }, + [0x8] = { .gt_step = STEP_C0, .display_step = STEP_C0 }, +}; + +static const struct intel_step_info dg2_g11_revid_step_tbl[] = { + [0x0] = { .gt_step = STEP_A0, .display_step = STEP_B0 }, + [0x4] = { .gt_step = STEP_B0, .display_step = STEP_C0 }, +}; + void intel_step_init(struct drm_i915_private *i915) { const struct intel_step_info *revids = NULL; @@ -115,7 +127,13 @@ void intel_step_init(struct drm_i915_private *i915) int revid = INTEL_REVID(i915); struct intel_step_info step = {}; - if (IS_XEHPSDV(i915)) { + if (IS_DG2_G10(i915)) { + revids = dg2_g10_revid_step_tbl; + size = ARRAY_SIZE(dg2_g10_revid_step_tbl); + } else if (IS_DG2_G11(i915)) { + revids = dg2_g11_revid_step_tbl; + size = ARRAY_SIZE(dg2_g11_revid_step_tbl); + } else if (IS_XEHPSDV(i915)) { revids = xehpsdv_revids; size = ARRAY_SIZE(xehpsdv_revids); } else if (IS_ALDERLAKE_P(i915)) { -- cgit v1.2.3 From 442e049aedb2aa8dac55b073595b02cf5d13899c Mon Sep 17 00:00:00 2001 From: Venkata Sandeep Dhanalakota Date: Wed, 21 Jul 2021 15:30:31 -0700 Subject: drm/i915/gen12: Use fuse info to enable SFC In Gen12 there are various fuse combinations and in each configuration vdbox engine may be connected to SFC depending on which engines are available, so we need to set the SFC capability based on fuse value from the hardware. Even numbered physical instance always have SFC, odd numbered physical instances have SFC only if previous even instance is fused off. v2: - Minor style & typo fixes (Tvrtko) - Drop an unwanted 'inline' (Tvrtko) Bspec: 48028 Cc: Tvrtko Ursulin Cc: Daniele Ceraolo Spurio Signed-off-by: Venkata Sandeep Dhanalakota Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20210721223043.834562-7-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index d561573ed98c..c1c96ced2a4b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -442,6 +442,28 @@ void intel_engines_free(struct intel_gt *gt) } } +static +bool gen11_vdbox_has_sfc(struct drm_i915_private *i915, + unsigned int physical_vdbox, + unsigned int logical_vdbox, u16 vdbox_mask) +{ + /* + * In Gen11, only even numbered logical VDBOXes are hooked + * up to an SFC (Scaler & Format Converter) unit. + * In Gen12, Even numbered physical instance always are connected + * to an SFC. Odd numbered physical instances have SFC only if + * previous even instance is fused off. + */ + if (GRAPHICS_VER(i915) == 12) + return (physical_vdbox % 2 == 0) || + !(BIT(physical_vdbox - 1) & vdbox_mask); + else if (GRAPHICS_VER(i915) == 11) + return logical_vdbox % 2 == 0; + + MISSING_CASE(GRAPHICS_VER(i915)); + return false; +} + /* * Determine which engines are fused off in our particular hardware. * Note that we have a catch-22 situation where we need to be able to access @@ -486,13 +508,9 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) continue; } - /* - * In Gen11, only even numbered logical VDBOXes are - * hooked up to an SFC (Scaler & Format Converter) unit. - * In TGL each VDBOX has access to an SFC. - */ - if (GRAPHICS_VER(i915) >= 12 || logical_vdbox++ % 2 == 0) + if (gen11_vdbox_has_sfc(i915, i, logical_vdbox, vdbox_mask)) gt->info.vdbox_sfc_access |= BIT(i); + logical_vdbox++; } drm_dbg(&i915->drm, "vdbox enable: %04x, instances: %04lx\n", vdbox_mask, VDBOX_MASK(gt)); -- cgit v1.2.3 From 8f57f295c8952ed45aa7c1c6296d36ced08f85ed Mon Sep 17 00:00:00 2001 From: John Harrison Date: Wed, 21 Jul 2021 15:30:32 -0700 Subject: drm/i915/selftests: Allow for larger engine counts Increasing the engine count causes a couple of local array variables to exceed the kernel stack limit. So make them dynamic allocations instead. Signed-off-by: John Harrison Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20210721223043.834562-8-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/selftest_execlists.c | 10 ++++++-- drivers/gpu/drm/i915/gt/selftest_workarounds.c | 32 +++++++++++++++++--------- 2 files changed, 29 insertions(+), 13 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 73ddc6e14730..22a124b134b6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -3561,12 +3561,16 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) #define BATCH BIT(0) { struct task_struct *tsk[I915_NUM_ENGINES] = {}; - struct preempt_smoke arg[I915_NUM_ENGINES]; + struct preempt_smoke *arg; struct intel_engine_cs *engine; enum intel_engine_id id; unsigned long count; int err = 0; + arg = kmalloc_array(I915_NUM_ENGINES, sizeof(*arg), GFP_KERNEL); + if (!arg) + return -ENOMEM; + for_each_engine(engine, smoke->gt, id) { arg[id] = *smoke; arg[id].engine = engine; @@ -3574,7 +3578,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) arg[id].batch = NULL; arg[id].count = 0; - tsk[id] = kthread_run(smoke_crescendo_thread, &arg, + tsk[id] = kthread_run(smoke_crescendo_thread, arg, "igt/smoke:%d", id); if (IS_ERR(tsk[id])) { err = PTR_ERR(tsk[id]); @@ -3603,6 +3607,8 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", count, flags, smoke->gt->info.num_engines, smoke->ncontext); + + kfree(arg); return 0; } diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 7ebc4edb8ecf..7a38ce40feb2 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -1175,31 +1175,36 @@ live_gpu_reset_workarounds(void *arg) { struct intel_gt *gt = arg; intel_wakeref_t wakeref; - struct wa_lists lists; + struct wa_lists *lists; bool ok; if (!intel_has_gpu_reset(gt)) return 0; + lists = kzalloc(sizeof(*lists), GFP_KERNEL); + if (!lists) + return -ENOMEM; + pr_info("Verifying after GPU reset...\n"); igt_global_reset_lock(gt); wakeref = intel_runtime_pm_get(gt->uncore->rpm); - reference_lists_init(gt, &lists); + reference_lists_init(gt, lists); - ok = verify_wa_lists(gt, &lists, "before reset"); + ok = verify_wa_lists(gt, lists, "before reset"); if (!ok) goto out; intel_gt_reset(gt, ALL_ENGINES, "live_workarounds"); - ok = verify_wa_lists(gt, &lists, "after reset"); + ok = verify_wa_lists(gt, lists, "after reset"); out: - reference_lists_fini(gt, &lists); + reference_lists_fini(gt, lists); intel_runtime_pm_put(gt->uncore->rpm, wakeref); igt_global_reset_unlock(gt); + kfree(lists); return ok ? 0 : -ESRCH; } @@ -1214,16 +1219,20 @@ live_engine_reset_workarounds(void *arg) struct igt_spinner spin; struct i915_request *rq; intel_wakeref_t wakeref; - struct wa_lists lists; + struct wa_lists *lists; int ret = 0; if (!intel_has_reset_engine(gt)) return 0; + lists = kzalloc(sizeof(*lists), GFP_KERNEL); + if (!lists) + return -ENOMEM; + igt_global_reset_lock(gt); wakeref = intel_runtime_pm_get(gt->uncore->rpm); - reference_lists_init(gt, &lists); + reference_lists_init(gt, lists); for_each_engine(engine, gt, id) { bool ok; @@ -1235,7 +1244,7 @@ live_engine_reset_workarounds(void *arg) break; } - ok = verify_wa_lists(gt, &lists, "before reset"); + ok = verify_wa_lists(gt, lists, "before reset"); if (!ok) { ret = -ESRCH; goto err; @@ -1247,7 +1256,7 @@ live_engine_reset_workarounds(void *arg) goto err; } - ok = verify_wa_lists(gt, &lists, "after idle reset"); + ok = verify_wa_lists(gt, lists, "after idle reset"); if (!ok) { ret = -ESRCH; goto err; @@ -1282,7 +1291,7 @@ live_engine_reset_workarounds(void *arg) igt_spinner_end(&spin); igt_spinner_fini(&spin); - ok = verify_wa_lists(gt, &lists, "after busy reset"); + ok = verify_wa_lists(gt, lists, "after busy reset"); if (!ok) { ret = -ESRCH; goto err; @@ -1294,9 +1303,10 @@ err: break; } - reference_lists_fini(gt, &lists); + reference_lists_fini(gt, lists); intel_runtime_pm_put(gt->uncore->rpm, wakeref); igt_global_reset_unlock(gt); + kfree(lists); igt_flush_test(gt->i915); -- cgit v1.2.3 From 50a9ea0843da815bf0e05fad4c9d80f89ab5349e Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Wed, 21 Jul 2021 15:30:33 -0700 Subject: drm/i915/xehp: Handle new device context ID format Xe_HP changes the format of the context ID from past platforms. Signed-off-by: Stuart Summers Signed-off-by: Umesh Nerlige Ramappa Signed-off-by: Matt Roper Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20210721223043.834562-9-matthew.d.roper@intel.com --- .../gpu/drm/i915/gt/intel_execlists_submission.c | 74 ++++++++++++++++++---- drivers/gpu/drm/i915/gt/intel_lrc.c | 8 +++ drivers/gpu/drm/i915/gt/intel_lrc_reg.h | 2 + drivers/gpu/drm/i915/i915_perf.c | 29 ++++++--- drivers/gpu/drm/i915/i915_reg.h | 5 ++ 5 files changed, 97 insertions(+), 21 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 56e25090da67..87cedaeb4bf8 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -153,6 +153,12 @@ #define GEN12_CSB_CTX_VALID(csb_dw) \ (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID) +#define XEHP_CTX_STATUS_SWITCHED_TO_NEW_QUEUE BIT(1) /* upper csb dword */ +#define XEHP_CSB_SW_CTX_ID_MASK GENMASK(31, 10) +#define XEHP_IDLE_CTX_ID 0xFFFF +#define XEHP_CSB_CTX_VALID(csb_dw) \ + (FIELD_GET(XEHP_CSB_SW_CTX_ID_MASK, csb_dw) != XEHP_IDLE_CTX_ID) + /* Typical size of the average request (2 pipecontrols and a MI_BB) */ #define EXECLISTS_REQUEST_SIZE 64 /* bytes */ @@ -478,6 +484,16 @@ __execlists_schedule_in(struct i915_request *rq) /* Use a fixed tag for OA and friends */ GEM_BUG_ON(ce->tag <= BITS_PER_LONG); ce->lrc.ccid = ce->tag; + } else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) { + /* We don't need a strict matching tag, just different values */ + unsigned int tag = ffs(READ_ONCE(engine->context_tag)); + + GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG); + clear_bit(tag - 1, &engine->context_tag); + ce->lrc.ccid = tag << (XEHP_SW_CTX_ID_SHIFT - 32); + + BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID); + } else { /* We don't need a strict matching tag, just different values */ unsigned int tag = __ffs(engine->context_tag); @@ -588,8 +604,14 @@ static void __execlists_schedule_out(struct i915_request * const rq, intel_engine_add_retire(engine, ce->timeline); ccid = ce->lrc.ccid; - ccid >>= GEN11_SW_CTX_ID_SHIFT - 32; - ccid &= GEN12_MAX_CONTEXT_HW_ID; + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) { + ccid >>= XEHP_SW_CTX_ID_SHIFT - 32; + ccid &= XEHP_MAX_CONTEXT_HW_ID; + } else { + ccid >>= GEN11_SW_CTX_ID_SHIFT - 32; + ccid &= GEN12_MAX_CONTEXT_HW_ID; + } + if (ccid < BITS_PER_LONG) { GEM_BUG_ON(ccid == 0); GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag)); @@ -1648,13 +1670,24 @@ static void invalidate_csb_entries(const u64 *first, const u64 *last) * bits 44-46: reserved * bits 47-57: sw context id of the lrc the GT switched away from * bits 58-63: sw counter of the lrc the GT switched away from + * + * Xe_HP csb shuffles things around compared to TGL: + * + * bits 0-3: context switch detail (same possible values as TGL) + * bits 4-9: engine instance + * bits 10-25: sw context id of the lrc the GT switched to + * bits 26-31: sw counter of the lrc the GT switched to + * bit 32: semaphore wait mode (poll or signal), Only valid when + * switch detail is set to "wait on semaphore" + * bit 33: switched to new queue + * bits 34-41: wait detail (for switch detail 1 to 4) + * bits 42-57: sw context id of the lrc the GT switched away from + * bits 58-63: sw counter of the lrc the GT switched away from */ -static bool gen12_csb_parse(const u64 csb) +static inline bool +__gen12_csb_parse(bool ctx_to_valid, bool ctx_away_valid, bool new_queue, + u8 switch_detail) { - bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(csb)); - bool new_queue = - lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE; - /* * The context switch detail is not guaranteed to be 5 when a preemption * occurs, so we can't just check for that. The check below works for @@ -1663,7 +1696,7 @@ static bool gen12_csb_parse(const u64 csb) * would require some extra handling, but we don't support that. */ if (!ctx_away_valid || new_queue) { - GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(csb))); + GEM_BUG_ON(!ctx_to_valid); return true; } @@ -1672,10 +1705,26 @@ static bool gen12_csb_parse(const u64 csb) * context switch on an unsuccessful wait instruction since we always * use polling mode. */ - GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb))); + GEM_BUG_ON(switch_detail); return false; } +static bool xehp_csb_parse(const u64 csb) +{ + return __gen12_csb_parse(XEHP_CSB_CTX_VALID(lower_32_bits(csb)), /* cxt to */ + XEHP_CSB_CTX_VALID(upper_32_bits(csb)), /* cxt away */ + upper_32_bits(csb) & XEHP_CTX_STATUS_SWITCHED_TO_NEW_QUEUE, + GEN12_CTX_SWITCH_DETAIL(lower_32_bits(csb))); +} + +static bool gen12_csb_parse(const u64 csb) +{ + return __gen12_csb_parse(GEN12_CSB_CTX_VALID(lower_32_bits(csb)), /* cxt to */ + GEN12_CSB_CTX_VALID(upper_32_bits(csb)), /* cxt away */ + lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE, + GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb))); +} + static bool gen8_csb_parse(const u64 csb) { return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED); @@ -1840,7 +1889,9 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive) ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n", head, upper_32_bits(csb), lower_32_bits(csb)); - if (GRAPHICS_VER(engine->i915) >= 12) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + promote = xehp_csb_parse(csb); + else if (GRAPHICS_VER(engine->i915) >= 12) promote = gen12_csb_parse(csb); else promote = gen8_csb_parse(csb); @@ -3323,7 +3374,8 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) execlists->csb_size = GEN11_CSB_ENTRIES; engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0); - if (GRAPHICS_VER(engine->i915) >= 11) { + if (GRAPHICS_VER(engine->i915) >= 11 && + GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 50)) { execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32); execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32); } diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 8ada1afe3d22..7f8fe6726504 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1101,6 +1101,14 @@ setup_indirect_ctx_bb(const struct intel_context *ce, * bits 55-60: SW counter * bits 61-63: engine class * + * On Xe_HP, the upper dword of the descriptor has a new format: + * + * bits 32-37: virtual function number + * bit 38: mbz, reserved for use by hardware + * bits 39-54: SW context ID + * bits 55-57: reserved + * bits 58-63: SW counter + * * engine info, SW context ID and SW counter need to form a unique number * (Context ID) per lrc. */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h index 41e5350a7a05..9548f4ade068 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h @@ -91,5 +91,7 @@ #define GEN11_MAX_CONTEXT_HW_ID (1 << 11) /* exclusive */ /* in Gen12 ID 0x7FF is reserved to indicate idle */ #define GEN12_MAX_CONTEXT_HW_ID (GEN11_MAX_CONTEXT_HW_ID - 1) +/* in Xe_HP ID 0xFFFF is reserved to indicate "invalid context" */ +#define XEHP_MAX_CONTEXT_HW_ID 0xFFFF #endif /* _INTEL_LRC_REG_H_ */ diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 48ddb363b3bd..838cc14c2f24 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1284,17 +1284,26 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) break; case 11: - case 12: { - stream->specific_ctx_id_mask = - ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); - /* - * Pick an unused context id - * 0 - BITS_PER_LONG are used by other contexts - * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context - */ - stream->specific_ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); + case 12: + if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 50)) { + stream->specific_ctx_id_mask = + ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) << + (XEHP_SW_CTX_ID_SHIFT - 32); + stream->specific_ctx_id = + (XEHP_MAX_CONTEXT_HW_ID - 1) << + (XEHP_SW_CTX_ID_SHIFT - 32); + } else { + stream->specific_ctx_id_mask = + ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); + /* + * Pick an unused context id + * 0 - BITS_PER_LONG are used by other contexts + * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context + */ + stream->specific_ctx_id = + (GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); + } break; - } default: MISSING_CASE(GRAPHICS_VER(ce->engine->i915)); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 94fde5ca26ae..b956e3a63ca9 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4158,6 +4158,11 @@ enum { #define GEN11_ENGINE_INSTANCE_SHIFT 48 #define GEN11_ENGINE_INSTANCE_WIDTH 6 +#define XEHP_SW_CTX_ID_SHIFT 39 +#define XEHP_SW_CTX_ID_WIDTH 16 +#define XEHP_SW_COUNTER_SHIFT 58 +#define XEHP_SW_COUNTER_WIDTH 6 + #define CHV_CLK_CTL1 _MMIO(0x101100) #define VLV_CLK_CTL2 _MMIO(0x101104) #define CLK_CTL2_CZCOUNT_30NS_SHIFT 28 -- cgit v1.2.3 From 7fc37efd8fa0678ea2cc4c3c5e881002010a3123 Mon Sep 17 00:00:00 2001 From: Prathap Kumar Valsan Date: Wed, 21 Jul 2021 15:30:34 -0700 Subject: drm/i915/xehp: New engine context offsets The layout of some engine contexts has changed on Xe_HP. Define the new offsets. Bspec: 45585, 46256 Signed-off-by: Prathap Kumar Valsan Signed-off-by: Ramalingam C Signed-off-by: Venkata Ramana Nayana Signed-off-by: Akeem G Abodunrin Signed-off-by: Matt Roper Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20210721223043.834562-10-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_lrc.c | 65 +++++++++++++++++++++++++++++++++---- 1 file changed, 59 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 7f8fe6726504..c3f5bec8ae15 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -484,6 +484,47 @@ static const u8 gen12_rcs_offsets[] = { END }; +static const u8 xehp_rcs_offsets[] = { + NOP(1), + LRI(13, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + + NOP(5), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + LRI(3, POSTED), + REG(0x1b0), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + + END +}; + #undef END #undef REG16 #undef REG @@ -502,7 +543,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) !intel_engine_has_relative_mmio(engine)); if (engine->class == RENDER_CLASS) { - if (GRAPHICS_VER(engine->i915) >= 12) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + return xehp_rcs_offsets; + else if (GRAPHICS_VER(engine->i915) >= 12) return gen12_rcs_offsets; else if (GRAPHICS_VER(engine->i915) >= 11) return gen11_rcs_offsets; @@ -522,7 +565,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) { - if (GRAPHICS_VER(engine->i915) >= 12) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + return 0x70; + else if (GRAPHICS_VER(engine->i915) >= 12) return 0x60; else if (GRAPHICS_VER(engine->i915) >= 9) return 0x54; @@ -534,7 +579,9 @@ static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) static int lrc_ring_gpr0(const struct intel_engine_cs *engine) { - if (GRAPHICS_VER(engine->i915) >= 12) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + return 0x84; + else if (GRAPHICS_VER(engine->i915) >= 12) return 0x74; else if (GRAPHICS_VER(engine->i915) >= 9) return 0x68; @@ -578,10 +625,16 @@ static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine) static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine) { - if (engine->class != RENDER_CLASS) - return -1; - if (GRAPHICS_VER(engine->i915) >= 12) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + /* + * Note that the CSFE context has a dummy slot for CMD_BUF_CCTL + * simply to match the RCS context image layout. + */ + return 0xc6; + else if (engine->class != RENDER_CLASS) + return -1; + else if (GRAPHICS_VER(engine->i915) >= 12) return 0xb6; else if (GRAPHICS_VER(engine->i915) >= 11) return 0xaa; -- cgit v1.2.3 From 56bc88745e731ff3830d2165f3a404d54eaf8287 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 21 Jul 2021 14:50:44 -0700 Subject: drm/i915/guc: Add new GuC interface defines and structures Add new GuC interface defines and structures while maintaining old ones in parallel. Cc: John Harrison Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210721215101.139794-2-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h | 14 ++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 42 ++++++++++++++++++++++++ 2 files changed, 56 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index 2d6198e63ebe..57e18babdf4b 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -124,10 +124,24 @@ enum intel_guc_action { INTEL_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH = 0x302, INTEL_GUC_ACTION_ENTER_S_STATE = 0x501, INTEL_GUC_ACTION_EXIT_S_STATE = 0x502, + INTEL_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506, + INTEL_GUC_ACTION_SCHED_CONTEXT = 0x1000, + INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001, + INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002, + INTEL_GUC_ACTION_SCHED_ENGINE_MODE_SET = 0x1003, + INTEL_GUC_ACTION_SCHED_ENGINE_MODE_DONE = 0x1004, + INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY = 0x1005, + INTEL_GUC_ACTION_SET_CONTEXT_EXECUTION_QUANTUM = 0x1006, + INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007, + INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008, + INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009, INTEL_GUC_ACTION_SLPC_REQUEST = 0x3003, INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, + INTEL_GUC_ACTION_REGISTER_CONTEXT = 0x4502, + INTEL_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503, INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505, INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506, + INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600, INTEL_GUC_ACTION_LIMIT }; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 617ec601648d..3e060d5958cc 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -17,6 +17,9 @@ #include "abi/guc_communication_ctb_abi.h" #include "abi/guc_messages_abi.h" +#define GUC_CONTEXT_DISABLE 0 +#define GUC_CONTEXT_ENABLE 1 + #define GUC_CLIENT_PRIORITY_KMD_HIGH 0 #define GUC_CLIENT_PRIORITY_HIGH 1 #define GUC_CLIENT_PRIORITY_KMD_NORMAL 2 @@ -26,6 +29,9 @@ #define GUC_MAX_STAGE_DESCRIPTORS 1024 #define GUC_INVALID_STAGE_ID GUC_MAX_STAGE_DESCRIPTORS +#define GUC_MAX_LRC_DESCRIPTORS 65535 +#define GUC_INVALID_LRC_ID GUC_MAX_LRC_DESCRIPTORS + #define GUC_RENDER_ENGINE 0 #define GUC_VIDEO_ENGINE 1 #define GUC_BLITTER_ENGINE 2 @@ -237,6 +243,42 @@ struct guc_stage_desc { u64 desc_private; } __packed; +#define CONTEXT_REGISTRATION_FLAG_KMD BIT(0) + +#define CONTEXT_POLICY_DEFAULT_EXECUTION_QUANTUM_US 1000000 +#define CONTEXT_POLICY_DEFAULT_PREEMPTION_TIME_US 500000 + +/* Preempt to idle on quantum expiry */ +#define CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE BIT(0) + +/* + * GuC Context registration descriptor. + * FIXME: This is only required to exist during context registration. + * The current 1:1 between guc_lrc_desc and LRCs for the lifetime of the LRC + * is not required. + */ +struct guc_lrc_desc { + u32 hw_context_desc; + u32 slpm_perf_mode_hint; /* SPLC v1 only */ + u32 slpm_freq_hint; + u32 engine_submit_mask; /* In logical space */ + u8 engine_class; + u8 reserved0[3]; + u32 priority; + u32 process_desc; + u32 wq_addr; + u32 wq_size; + u32 context_flags; /* CONTEXT_REGISTRATION_* */ + /* Time for one workload to execute. (in micro seconds) */ + u32 execution_quantum; + /* Time to wait for a preemption request to complete before issuing a + * reset. (in micro seconds). + */ + u32 preemption_timeout; + u32 policy_flags; /* CONTEXT_POLICY_* */ + u32 reserved1[19]; +} __packed; + #define GUC_POWER_UNSPECIFIED 0 #define GUC_POWER_D0 1 #define GUC_POWER_D1 2 -- cgit v1.2.3 From 7518d9b67cf5ccf8f255bc45e18aa9c3d479f4b6 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 21 Jul 2021 14:50:45 -0700 Subject: drm/i915/guc: Remove GuC stage descriptor, add LRC descriptor Remove old GuC stage descriptor, add LRC descriptor which will be used by the new GuC interface implemented in this patch series. v2: (John Harrison) - s/lrc/LRC/g Cc: John Harrison Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210721215101.139794-3-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 4 +- drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 65 -------------------- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 72 ++++++++--------------- 3 files changed, 25 insertions(+), 116 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 72e4653222e2..2625d2d5959f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -43,8 +43,8 @@ struct intel_guc { struct i915_vma *ads_vma; struct __guc_ads_blob *ads_blob; - struct i915_vma *stage_desc_pool; - void *stage_desc_pool_vaddr; + struct i915_vma *lrc_desc_pool; + void *lrc_desc_pool_vaddr; /* Control params for fw initialization */ u32 params[GUC_CTL_MAX_DWORDS]; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 3e060d5958cc..3489b390ae77 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -26,9 +26,6 @@ #define GUC_CLIENT_PRIORITY_NORMAL 3 #define GUC_CLIENT_PRIORITY_NUM 4 -#define GUC_MAX_STAGE_DESCRIPTORS 1024 -#define GUC_INVALID_STAGE_ID GUC_MAX_STAGE_DESCRIPTORS - #define GUC_MAX_LRC_DESCRIPTORS 65535 #define GUC_INVALID_LRC_ID GUC_MAX_LRC_DESCRIPTORS @@ -181,68 +178,6 @@ struct guc_process_desc { u32 reserved[30]; } __packed; -/* engine id and context id is packed into guc_execlist_context.context_id*/ -#define GUC_ELC_CTXID_OFFSET 0 -#define GUC_ELC_ENGINE_OFFSET 29 - -/* The execlist context including software and HW information */ -struct guc_execlist_context { - u32 context_desc; - u32 context_id; - u32 ring_status; - u32 ring_lrca; - u32 ring_begin; - u32 ring_end; - u32 ring_next_free_location; - u32 ring_current_tail_pointer_value; - u8 engine_state_submit_value; - u8 engine_state_wait_value; - u16 pagefault_count; - u16 engine_submit_queue_count; -} __packed; - -/* - * This structure describes a stage set arranged for a particular communication - * between uKernel (GuC) and Driver (KMD). Technically, this is known as a - * "GuC Context descriptor" in the specs, but we use the term "stage descriptor" - * to avoid confusion with all the other things already named "context" in the - * driver. A static pool of these descriptors are stored inside a GEM object - * (stage_desc_pool) which is held for the entire lifetime of our interaction - * with the GuC, being allocated before the GuC is loaded with its firmware. - */ -struct guc_stage_desc { - u32 sched_common_area; - u32 stage_id; - u32 pas_id; - u8 engines_used; - u64 db_trigger_cpu; - u32 db_trigger_uk; - u64 db_trigger_phy; - u16 db_id; - - struct guc_execlist_context lrc[GUC_MAX_ENGINES_NUM]; - - u8 attribute; - - u32 priority; - - u32 wq_sampled_tail_offset; - u32 wq_total_submit_enqueues; - - u32 process_desc; - u32 wq_addr; - u32 wq_size; - - u32 engine_presence; - - u8 engine_suspended; - - u8 reserved0[3]; - u64 reserved1[1]; - - u64 desc_private; -} __packed; - #define CONTEXT_REGISTRATION_FLAG_KMD BIT(0) #define CONTEXT_POLICY_DEFAULT_EXECUTION_QUANTUM_US 1000000 diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index e9c237b18692..ed5d8ab3624f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -65,57 +65,35 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) return rb_entry(rb, struct i915_priolist, node); } -static struct guc_stage_desc *__get_stage_desc(struct intel_guc *guc, u32 id) +/* Future patches will use this function */ +__maybe_unused +static struct guc_lrc_desc *__get_lrc_desc(struct intel_guc *guc, u32 index) { - struct guc_stage_desc *base = guc->stage_desc_pool_vaddr; + struct guc_lrc_desc *base = guc->lrc_desc_pool_vaddr; - return &base[id]; -} - -static int guc_stage_desc_pool_create(struct intel_guc *guc) -{ - u32 size = PAGE_ALIGN(sizeof(struct guc_stage_desc) * - GUC_MAX_STAGE_DESCRIPTORS); + GEM_BUG_ON(index >= GUC_MAX_LRC_DESCRIPTORS); - return intel_guc_allocate_and_map_vma(guc, size, &guc->stage_desc_pool, - &guc->stage_desc_pool_vaddr); + return &base[index]; } -static void guc_stage_desc_pool_destroy(struct intel_guc *guc) -{ - i915_vma_unpin_and_release(&guc->stage_desc_pool, I915_VMA_RELEASE_MAP); -} - -/* - * Initialise/clear the stage descriptor shared with the GuC firmware. - * - * This descriptor tells the GuC where (in GGTT space) to find the important - * data structures related to work submission (process descriptor, write queue, - * etc). - */ -static void guc_stage_desc_init(struct intel_guc *guc) +static int guc_lrc_desc_pool_create(struct intel_guc *guc) { - struct guc_stage_desc *desc; - - /* we only use 1 stage desc, so hardcode it to 0 */ - desc = __get_stage_desc(guc, 0); - memset(desc, 0, sizeof(*desc)); - - desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE | - GUC_STAGE_DESC_ATTR_KERNEL; + u32 size; + int ret; - desc->stage_id = 0; - desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL; + size = PAGE_ALIGN(sizeof(struct guc_lrc_desc) * + GUC_MAX_LRC_DESCRIPTORS); + ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool, + (void **)&guc->lrc_desc_pool_vaddr); + if (ret) + return ret; - desc->wq_size = GUC_WQ_SIZE; + return 0; } -static void guc_stage_desc_fini(struct intel_guc *guc) +static void guc_lrc_desc_pool_destroy(struct intel_guc *guc) { - struct guc_stage_desc *desc; - - desc = __get_stage_desc(guc, 0); - memset(desc, 0, sizeof(*desc)); + i915_vma_unpin_and_release(&guc->lrc_desc_pool, I915_VMA_RELEASE_MAP); } static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) @@ -410,26 +388,25 @@ int intel_guc_submission_init(struct intel_guc *guc) { int ret; - if (guc->stage_desc_pool) + if (guc->lrc_desc_pool) return 0; - ret = guc_stage_desc_pool_create(guc); + ret = guc_lrc_desc_pool_create(guc); if (ret) return ret; /* * Keep static analysers happy, let them know that we allocated the * vma after testing that it didn't exist earlier. */ - GEM_BUG_ON(!guc->stage_desc_pool); + GEM_BUG_ON(!guc->lrc_desc_pool); return 0; } void intel_guc_submission_fini(struct intel_guc *guc) { - if (guc->stage_desc_pool) { - guc_stage_desc_pool_destroy(guc); - } + if (guc->lrc_desc_pool) + guc_lrc_desc_pool_destroy(guc); } static int guc_context_alloc(struct intel_context *ce) @@ -695,7 +672,6 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine) void intel_guc_submission_enable(struct intel_guc *guc) { - guc_stage_desc_init(guc); } void intel_guc_submission_disable(struct intel_guc *guc) @@ -705,8 +681,6 @@ void intel_guc_submission_disable(struct intel_guc *guc) GEM_BUG_ON(gt->awake); /* GT should be parked first */ /* Note: By the time we're here, GuC may have already been reset */ - - guc_stage_desc_fini(guc); } static bool __guc_submission_selected(struct intel_guc *guc) -- cgit v1.2.3 From 27213d79b384d5b57d2dfa58a74b47d616f249ec Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 21 Jul 2021 14:50:46 -0700 Subject: drm/i915/guc: Add LRC descriptor context lookup array Add LRC descriptor context lookup array which can resolve the intel_context from the LRC descriptor index. In addition to lookup, it can determine if the LRC descriptor context is currently registered with the GuC by checking if an entry for a descriptor index is present. Future patches in the series will make use of this array. v2: (Michal) - "linux/xarray.h" -> - s/lrc/LRC (John H) - Fix commit message Cc: John Harrison Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210721215101.139794-4-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 5 ++++ drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 32 +++++++++++++++++++++-- 2 files changed, 35 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 2625d2d5959f..35783558d261 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -6,6 +6,8 @@ #ifndef _INTEL_GUC_H_ #define _INTEL_GUC_H_ +#include + #include "intel_uncore.h" #include "intel_guc_fw.h" #include "intel_guc_fwif.h" @@ -46,6 +48,9 @@ struct intel_guc { struct i915_vma *lrc_desc_pool; void *lrc_desc_pool_vaddr; + /* guc_id to intel_context lookup */ + struct xarray context_lookup; + /* Control params for fw initialization */ u32 params[GUC_CTL_MAX_DWORDS]; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index ed5d8ab3624f..23a94a896a0b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -65,8 +65,6 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) return rb_entry(rb, struct i915_priolist, node); } -/* Future patches will use this function */ -__maybe_unused static struct guc_lrc_desc *__get_lrc_desc(struct intel_guc *guc, u32 index) { struct guc_lrc_desc *base = guc->lrc_desc_pool_vaddr; @@ -76,6 +74,15 @@ static struct guc_lrc_desc *__get_lrc_desc(struct intel_guc *guc, u32 index) return &base[index]; } +static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) +{ + struct intel_context *ce = xa_load(&guc->context_lookup, id); + + GEM_BUG_ON(id >= GUC_MAX_LRC_DESCRIPTORS); + + return ce; +} + static int guc_lrc_desc_pool_create(struct intel_guc *guc) { u32 size; @@ -96,6 +103,25 @@ static void guc_lrc_desc_pool_destroy(struct intel_guc *guc) i915_vma_unpin_and_release(&guc->lrc_desc_pool, I915_VMA_RELEASE_MAP); } +static inline void reset_lrc_desc(struct intel_guc *guc, u32 id) +{ + struct guc_lrc_desc *desc = __get_lrc_desc(guc, id); + + memset(desc, 0, sizeof(*desc)); + xa_erase_irq(&guc->context_lookup, id); +} + +static inline bool lrc_desc_registered(struct intel_guc *guc, u32 id) +{ + return __get_context(guc, id); +} + +static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id, + struct intel_context *ce) +{ + xa_store_irq(&guc->context_lookup, id, ce, GFP_ATOMIC); +} + static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) { /* Leaving stub as this function will be used in future patches */ @@ -400,6 +426,8 @@ int intel_guc_submission_init(struct intel_guc *guc) */ GEM_BUG_ON(!guc->lrc_desc_pool); + xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); + return 0; } -- cgit v1.2.3 From 925dc1cf58edcd6fa239c2b97f1aedb795f214d0 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 21 Jul 2021 14:50:47 -0700 Subject: drm/i915/guc: Implement GuC submission tasklet Implement GuC submission tasklet for new interface. The new GuC interface uses H2G to submit contexts to the GuC. Since H2G use a single channel, a single tasklet is used for the submission path. Also the per engine interrupt handler has been updated to disable the rescheduling of the physical engine tasklet, when using GuC scheduling, as the physical engine tasklet is no longer used. In this patch the field, guc_id, has been added to intel_context and is not assigned. Patches later in the series will assign this value. v2: (John Harrison) - Clean up some comments v3: (John Harrison) - More comment cleanups Cc: John Harrison Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210721215101.139794-5-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_context_types.h | 9 + drivers/gpu/drm/i915/gt/uc/intel_guc.h | 4 + drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 231 +++++++++++----------- 3 files changed, 127 insertions(+), 117 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 90026c177105..6d99631d19b9 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -137,6 +137,15 @@ struct intel_context { struct intel_sseu sseu; u8 wa_bb_page; /* if set, page num reserved for context workarounds */ + + /* GuC scheduling state flags that do not require a lock. */ + atomic_t guc_sched_state_no_lock; + + /* + * GuC LRC descriptor ID - Not assigned in this patch but future patches + * in the series will. + */ + u16 guc_id; }; #endif /* __INTEL_CONTEXT_TYPES__ */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 35783558d261..8c7b92f699f1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -30,6 +30,10 @@ struct intel_guc { struct intel_guc_log log; struct intel_guc_ct ct; + /* Global engine used to submit requests to GuC */ + struct i915_sched_engine *sched_engine; + struct i915_request *stalled_request; + /* intel_guc_recv interrupt related state */ spinlock_t irq_lock; unsigned int msg_enabled_mask; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 23a94a896a0b..ca0717166a27 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -60,6 +60,31 @@ #define GUC_REQUEST_SIZE 64 /* bytes */ +/* + * Below is a set of functions which control the GuC scheduling state which do + * not require a lock as all state transitions are mutually exclusive. i.e. It + * is not possible for the context pinning code and submission, for the same + * context, to be executing simultaneously. We still need an atomic as it is + * possible for some of the bits to changing at the same time though. + */ +#define SCHED_STATE_NO_LOCK_ENABLED BIT(0) +static inline bool context_enabled(struct intel_context *ce) +{ + return (atomic_read(&ce->guc_sched_state_no_lock) & + SCHED_STATE_NO_LOCK_ENABLED); +} + +static inline void set_context_enabled(struct intel_context *ce) +{ + atomic_or(SCHED_STATE_NO_LOCK_ENABLED, &ce->guc_sched_state_no_lock); +} + +static inline void clr_context_enabled(struct intel_context *ce) +{ + atomic_and((u32)~SCHED_STATE_NO_LOCK_ENABLED, + &ce->guc_sched_state_no_lock); +} + static inline struct i915_priolist *to_priolist(struct rb_node *rb) { return rb_entry(rb, struct i915_priolist, node); @@ -122,37 +147,29 @@ static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id, xa_store_irq(&guc->context_lookup, id, ce, GFP_ATOMIC); } -static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) +static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) { - /* Leaving stub as this function will be used in future patches */ -} + int err; + struct intel_context *ce = rq->context; + u32 action[3]; + int len = 0; + bool enabled = context_enabled(ce); -/* - * When we're doing submissions using regular execlists backend, writing to - * ELSP from CPU side is enough to make sure that writes to ringbuffer pages - * pinned in mappable aperture portion of GGTT are visible to command streamer. - * Writes done by GuC on our behalf are not guaranteeing such ordering, - * therefore, to ensure the flush, we're issuing a POSTING READ. - */ -static void flush_ggtt_writes(struct i915_vma *vma) -{ - if (i915_vma_is_map_and_fenceable(vma)) - intel_uncore_posting_read_fw(vma->vm->gt->uncore, - GUC_STATUS); -} + if (!enabled) { + action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; + action[len++] = ce->guc_id; + action[len++] = GUC_CONTEXT_ENABLE; + } else { + action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; + action[len++] = ce->guc_id; + } -static void guc_submit(struct intel_engine_cs *engine, - struct i915_request **out, - struct i915_request **end) -{ - struct intel_guc *guc = &engine->gt->uc.guc; + err = intel_guc_send_nb(guc, action, len); - do { - struct i915_request *rq = *out++; + if (!enabled && !err) + set_context_enabled(ce); - flush_ggtt_writes(rq->ring->vma); - guc_add_request(guc, rq); - } while (out != end); + return err; } static inline int rq_prio(const struct i915_request *rq) @@ -160,125 +177,88 @@ static inline int rq_prio(const struct i915_request *rq) return rq->sched.attr.priority; } -static struct i915_request *schedule_in(struct i915_request *rq, int idx) +static int guc_dequeue_one_context(struct intel_guc *guc) { - trace_i915_request_in(rq, idx); - - /* - * Currently we are not tracking the rq->context being inflight - * (ce->inflight = rq->engine). It is only used by the execlists - * backend at the moment, a similar counting strategy would be - * required if we generalise the inflight tracking. - */ - - __intel_gt_pm_get(rq->engine->gt); - return i915_request_get(rq); -} - -static void schedule_out(struct i915_request *rq) -{ - trace_i915_request_out(rq); - - intel_gt_pm_put_async(rq->engine->gt); - i915_request_put(rq); -} - -static void __guc_dequeue(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists * const execlists = &engine->execlists; - struct i915_sched_engine * const sched_engine = engine->sched_engine; - struct i915_request **first = execlists->inflight; - struct i915_request ** const last_port = first + execlists->port_mask; - struct i915_request *last = first[0]; - struct i915_request **port; + struct i915_sched_engine * const sched_engine = guc->sched_engine; + struct i915_request *last = NULL; bool submit = false; struct rb_node *rb; + int ret; lockdep_assert_held(&sched_engine->lock); - if (last) { - if (*++first) - return; - - last = NULL; + if (guc->stalled_request) { + submit = true; + last = guc->stalled_request; + goto resubmit; } - /* - * We write directly into the execlists->inflight queue and don't use - * the execlists->pending queue, as we don't have a distinct switch - * event. - */ - port = first; while ((rb = rb_first_cached(&sched_engine->queue))) { struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; priolist_for_each_request_consume(rq, rn, p) { - if (last && rq->context != last->context) { - if (port == last_port) - goto done; - - *port = schedule_in(last, - port - execlists->inflight); - port++; - } + if (last && rq->context != last->context) + goto done; list_del_init(&rq->sched.link); + __i915_request_submit(rq); - submit = true; + + trace_i915_request_in(rq, 0); last = rq; + submit = true; } rb_erase_cached(&p->node, &sched_engine->queue); i915_priolist_free(p); } done: - sched_engine->queue_priority_hint = - rb ? to_priolist(rb)->priority : INT_MIN; if (submit) { - *port = schedule_in(last, port - execlists->inflight); - *++port = NULL; - guc_submit(engine, first, port); + last->context->lrc_reg_state[CTX_RING_TAIL] = + intel_ring_set_tail(last->ring, last->tail); +resubmit: + /* + * We only check for -EBUSY here even though it is possible for + * -EDEADLK to be returned. If -EDEADLK is returned, the GuC has + * died and a full GT reset needs to be done. The hangcheck will + * eventually detect that the GuC has died and trigger this + * reset so no need to handle -EDEADLK here. + */ + ret = guc_add_request(guc, last); + if (ret == -EBUSY) { + tasklet_schedule(&sched_engine->tasklet); + guc->stalled_request = last; + return false; + } } - execlists->active = execlists->inflight; + + guc->stalled_request = NULL; + return submit; } static void guc_submission_tasklet(struct tasklet_struct *t) { struct i915_sched_engine *sched_engine = from_tasklet(sched_engine, t, tasklet); - struct intel_engine_cs * const engine = sched_engine->private_data; - struct intel_engine_execlists * const execlists = &engine->execlists; - struct i915_request **port, *rq; unsigned long flags; + bool loop; - spin_lock_irqsave(&engine->sched_engine->lock, flags); - - for (port = execlists->inflight; (rq = *port); port++) { - if (!i915_request_completed(rq)) - break; - - schedule_out(rq); - } - if (port != execlists->inflight) { - int idx = port - execlists->inflight; - int rem = ARRAY_SIZE(execlists->inflight) - idx; - memmove(execlists->inflight, port, rem * sizeof(*port)); - } + spin_lock_irqsave(&sched_engine->lock, flags); - __guc_dequeue(engine); + do { + loop = guc_dequeue_one_context(sched_engine->private_data); + } while (loop); - i915_sched_engine_reset_on_empty(engine->sched_engine); + i915_sched_engine_reset_on_empty(sched_engine); - spin_unlock_irqrestore(&engine->sched_engine->lock, flags); + spin_unlock_irqrestore(&sched_engine->lock, flags); } static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir) { - if (iir & GT_RENDER_USER_INTERRUPT) { + if (iir & GT_RENDER_USER_INTERRUPT) intel_engine_signal_breadcrumbs(engine); - tasklet_hi_schedule(&engine->sched_engine->tasklet); - } } static void guc_reset_prepare(struct intel_engine_cs *engine) @@ -349,6 +329,10 @@ static void guc_reset_cancel(struct intel_engine_cs *engine) struct rb_node *rb; unsigned long flags; + /* Can be called during boot if GuC fails to load */ + if (!engine->gt) + return; + ENGINE_TRACE(engine, "\n"); /* @@ -433,8 +417,11 @@ int intel_guc_submission_init(struct intel_guc *guc) void intel_guc_submission_fini(struct intel_guc *guc) { - if (guc->lrc_desc_pool) - guc_lrc_desc_pool_destroy(guc); + if (!guc->lrc_desc_pool) + return; + + guc_lrc_desc_pool_destroy(guc); + i915_sched_engine_put(guc->sched_engine); } static int guc_context_alloc(struct intel_context *ce) @@ -499,32 +486,32 @@ static int guc_request_alloc(struct i915_request *request) return 0; } -static inline void queue_request(struct intel_engine_cs *engine, +static inline void queue_request(struct i915_sched_engine *sched_engine, struct i915_request *rq, int prio) { GEM_BUG_ON(!list_empty(&rq->sched.link)); list_add_tail(&rq->sched.link, - i915_sched_lookup_priolist(engine->sched_engine, prio)); + i915_sched_lookup_priolist(sched_engine, prio)); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); } static void guc_submit_request(struct i915_request *rq) { - struct intel_engine_cs *engine = rq->engine; + struct i915_sched_engine *sched_engine = rq->engine->sched_engine; unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->sched_engine->lock, flags); + spin_lock_irqsave(&sched_engine->lock, flags); - queue_request(engine, rq, rq_prio(rq)); + queue_request(sched_engine, rq, rq_prio(rq)); - GEM_BUG_ON(i915_sched_engine_is_empty(engine->sched_engine)); + GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine)); GEM_BUG_ON(list_empty(&rq->sched.link)); - tasklet_hi_schedule(&engine->sched_engine->tasklet); + tasklet_hi_schedule(&sched_engine->tasklet); - spin_unlock_irqrestore(&engine->sched_engine->lock, flags); + spin_unlock_irqrestore(&sched_engine->lock, flags); } static void sanitize_hwsp(struct intel_engine_cs *engine) @@ -602,8 +589,6 @@ static void guc_release(struct intel_engine_cs *engine) { engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ - tasklet_kill(&engine->sched_engine->tasklet); - intel_engine_cleanup_common(engine); lrc_fini_wa_ctx(engine); } @@ -674,6 +659,7 @@ static inline void guc_default_irqs(struct intel_engine_cs *engine) int intel_guc_submission_setup(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; + struct intel_guc *guc = &engine->gt->uc.guc; /* * The setup relies on several assumptions (e.g. irqs always enabled) @@ -681,7 +667,18 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine) */ GEM_BUG_ON(GRAPHICS_VER(i915) < 11); - tasklet_setup(&engine->sched_engine->tasklet, guc_submission_tasklet); + if (!guc->sched_engine) { + guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); + if (!guc->sched_engine) + return -ENOMEM; + + guc->sched_engine->schedule = i915_schedule; + guc->sched_engine->private_data = guc; + tasklet_setup(&guc->sched_engine->tasklet, + guc_submission_tasklet); + } + i915_sched_engine_put(engine->sched_engine); + engine->sched_engine = i915_sched_engine_get(guc->sched_engine); guc_default_vfuncs(engine); guc_default_irqs(engine); -- cgit v1.2.3 From 2330923e92478ad80417dac114b80d257096e4be Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 21 Jul 2021 14:50:48 -0700 Subject: drm/i915/guc: Add bypass tasklet submission path to GuC Add bypass tasklet submission path to GuC. The tasklet is only used if H2G channel has backpresure. Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210721215101.139794-6-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 37 ++++++++++++++++++----- 1 file changed, 29 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index ca0717166a27..53b4a5eb4a85 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -172,6 +172,12 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) return err; } +static inline void guc_set_lrc_tail(struct i915_request *rq) +{ + rq->context->lrc_reg_state[CTX_RING_TAIL] = + intel_ring_set_tail(rq->ring, rq->tail); +} + static inline int rq_prio(const struct i915_request *rq) { return rq->sched.attr.priority; @@ -215,8 +221,7 @@ static int guc_dequeue_one_context(struct intel_guc *guc) } done: if (submit) { - last->context->lrc_reg_state[CTX_RING_TAIL] = - intel_ring_set_tail(last->ring, last->tail); + guc_set_lrc_tail(last); resubmit: /* * We only check for -EBUSY here even though it is possible for @@ -496,20 +501,36 @@ static inline void queue_request(struct i915_sched_engine *sched_engine, set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); } +static int guc_bypass_tasklet_submit(struct intel_guc *guc, + struct i915_request *rq) +{ + int ret; + + __i915_request_submit(rq); + + trace_i915_request_in(rq, 0); + + guc_set_lrc_tail(rq); + ret = guc_add_request(guc, rq); + if (ret == -EBUSY) + guc->stalled_request = rq; + + return ret; +} + static void guc_submit_request(struct i915_request *rq) { struct i915_sched_engine *sched_engine = rq->engine->sched_engine; + struct intel_guc *guc = &rq->engine->gt->uc.guc; unsigned long flags; /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&sched_engine->lock, flags); - queue_request(sched_engine, rq, rq_prio(rq)); - - GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine)); - GEM_BUG_ON(list_empty(&rq->sched.link)); - - tasklet_hi_schedule(&sched_engine->tasklet); + if (guc->stalled_request || !i915_sched_engine_is_empty(sched_engine)) + queue_request(sched_engine, rq, rq_prio(rq)); + else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY) + tasklet_hi_schedule(&sched_engine->tasklet); spin_unlock_irqrestore(&sched_engine->lock, flags); } -- cgit v1.2.3 From 3a4cdf1982f05d1da434eb3d777554ea6de6769a Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 21 Jul 2021 14:50:49 -0700 Subject: drm/i915/guc: Implement GuC context operations for new inteface Implement GuC context operations which includes GuC specific operations alloc, pin, unpin, and destroy. v2: (Daniel Vetter) - Use msleep_interruptible rather than cond_resched in busy loop (Michal) - Remove C++ style comment v3: (Matthew Brost) - Drop GUC_ID_START (John Harrison) - Fix a bunch of typos - Use drm_err rather than drm_dbg for G2H errors (Daniele) - Fix ;; typo - Clean up sched state functions - Add lockdep for guc_id functions - Don't call __release_guc_id when guc_id is invalid - Use MISSING_CASE - Add comment in guc_context_pin - Use shorter path to rpm (Daniele / CI) - Don't call release_guc_id on an invalid guc_id in destroy v4: (Daniel Vetter) - Add FIXME comment Signed-off-by: John Harrison Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210721215101.139794-7-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_context.c | 5 + drivers/gpu/drm/i915/gt/intel_context_types.h | 22 +- drivers/gpu/drm/i915/gt/intel_lrc_reg.h | 1 - drivers/gpu/drm/i915/gt/uc/intel_guc.h | 47 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 4 + drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 670 ++++++++++++++++++++-- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/i915_request.c | 1 + 8 files changed, 696 insertions(+), 55 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index c1338441cc1d..090c13287d3e 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -384,6 +384,11 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) mutex_init(&ce->pin_mutex); + spin_lock_init(&ce->guc_state.lock); + + ce->guc_id = GUC_INVALID_LRC_ID; + INIT_LIST_HEAD(&ce->guc_id_link); + i915_active_init(&ce->active, __intel_context_active, __intel_context_retire, 0); } diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 6d99631d19b9..606c480aec26 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -96,6 +96,7 @@ struct intel_context { #define CONTEXT_BANNED 6 #define CONTEXT_FORCE_SINGLE_SUBMISSION 7 #define CONTEXT_NOPREEMPT 8 +#define CONTEXT_LRCA_DIRTY 9 struct { u64 timeout_us; @@ -138,14 +139,29 @@ struct intel_context { u8 wa_bb_page; /* if set, page num reserved for context workarounds */ + struct { + /** lock: protects everything in guc_state */ + spinlock_t lock; + /** + * sched_state: scheduling state of this context using GuC + * submission + */ + u8 sched_state; + } guc_state; + /* GuC scheduling state flags that do not require a lock. */ atomic_t guc_sched_state_no_lock; + /* GuC LRC descriptor ID */ + u16 guc_id; + + /* GuC LRC descriptor reference count */ + atomic_t guc_id_ref; + /* - * GuC LRC descriptor ID - Not assigned in this patch but future patches - * in the series will. + * GuC ID link - in list when unpinned but guc_id still valid in GuC */ - u16 guc_id; + struct list_head guc_id_link; }; #endif /* __INTEL_CONTEXT_TYPES__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h index 9548f4ade068..f785d0ed238f 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h @@ -87,7 +87,6 @@ #define GEN11_CSB_WRITE_PTR_MASK (GEN11_CSB_PTR_MASK << 0) #define MAX_CONTEXT_HW_ID (1 << 21) /* exclusive */ -#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */ #define GEN11_MAX_CONTEXT_HW_ID (1 << 11) /* exclusive */ /* in Gen12 ID 0x7FF is reserved to indicate idle */ #define GEN12_MAX_CONTEXT_HW_ID (GEN11_MAX_CONTEXT_HW_ID - 1) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 8c7b92f699f1..7fd6c3e343e4 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -7,6 +7,7 @@ #define _INTEL_GUC_H_ #include +#include #include "intel_uncore.h" #include "intel_guc_fw.h" @@ -44,6 +45,14 @@ struct intel_guc { void (*disable)(struct intel_guc *guc); } interrupts; + /* + * contexts_lock protects the pool of free guc ids and a linked list of + * guc ids available to be stolen + */ + spinlock_t contexts_lock; + struct ida guc_ids; + struct list_head guc_id_list; + bool submission_selected; struct i915_vma *ads_vma; @@ -101,6 +110,41 @@ intel_guc_send_and_receive(struct intel_guc *guc, const u32 *action, u32 len, response_buf, response_buf_size, 0); } +static inline int intel_guc_send_busy_loop(struct intel_guc *guc, + const u32 *action, + u32 len, + bool loop) +{ + int err; + unsigned int sleep_period_ms = 1; + bool not_atomic = !in_atomic() && !irqs_disabled(); + + /* + * FIXME: Have caller pass in if we are in an atomic context to avoid + * using in_atomic(). It is likely safe here as we check for irqs + * disabled which basically all the spin locks in the i915 do but + * regardless this should be cleaned up. + */ + + /* No sleeping with spin locks, just busy loop */ + might_sleep_if(loop && not_atomic); + +retry: + err = intel_guc_send_nb(guc, action, len); + if (unlikely(err == -EBUSY && loop)) { + if (likely(not_atomic)) { + if (msleep_interruptible(sleep_period_ms)) + return -EINTR; + sleep_period_ms = sleep_period_ms << 1; + } else { + cpu_relax(); + } + goto retry; + } + + return err; +} + static inline void intel_guc_to_host_event_handler(struct intel_guc *guc) { intel_guc_ct_event_handler(&guc->ct); @@ -202,6 +246,9 @@ static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask) int intel_guc_reset_engine(struct intel_guc *guc, struct intel_engine_cs *engine); +int intel_guc_deregister_done_process_msg(struct intel_guc *guc, + const u32 *msg, u32 len); + void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 83ec60ea3f89..28ff82c5be45 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -928,6 +928,10 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r case INTEL_GUC_ACTION_DEFAULT: ret = intel_guc_to_host_process_recv_msg(guc, payload, len); break; + case INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE: + ret = intel_guc_deregister_done_process_msg(guc, payload, + len); + break; default: ret = -EOPNOTSUPP; break; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 53b4a5eb4a85..463613a414d2 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -13,7 +13,9 @@ #include "gt/intel_gt.h" #include "gt/intel_gt_irq.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_gt_requests.h" #include "gt/intel_lrc.h" +#include "gt/intel_lrc_reg.h" #include "gt/intel_mocs.h" #include "gt/intel_ring.h" @@ -85,6 +87,72 @@ static inline void clr_context_enabled(struct intel_context *ce) &ce->guc_sched_state_no_lock); } +/* + * Below is a set of functions which control the GuC scheduling state which + * require a lock, aside from the special case where the functions are called + * from guc_lrc_desc_pin(). In that case it isn't possible for any other code + * path to be executing on the context. + */ +#define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) +#define SCHED_STATE_DESTROYED BIT(1) +static inline void init_sched_state(struct intel_context *ce) +{ + /* Only should be called from guc_lrc_desc_pin() */ + atomic_set(&ce->guc_sched_state_no_lock, 0); + ce->guc_state.sched_state = 0; +} + +static inline bool +context_wait_for_deregister_to_register(struct intel_context *ce) +{ + return ce->guc_state.sched_state & + SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; +} + +static inline void +set_context_wait_for_deregister_to_register(struct intel_context *ce) +{ + /* Only should be called from guc_lrc_desc_pin() */ + ce->guc_state.sched_state |= + SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; +} + +static inline void +clr_context_wait_for_deregister_to_register(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state &= + ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; +} + +static inline bool +context_destroyed(struct intel_context *ce) +{ + return ce->guc_state.sched_state & SCHED_STATE_DESTROYED; +} + +static inline void +set_context_destroyed(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state |= SCHED_STATE_DESTROYED; +} + +static inline bool context_guc_id_invalid(struct intel_context *ce) +{ + return ce->guc_id == GUC_INVALID_LRC_ID; +} + +static inline void set_context_guc_id_invalid(struct intel_context *ce) +{ + ce->guc_id = GUC_INVALID_LRC_ID; +} + +static inline struct intel_guc *ce_to_guc(struct intel_context *ce) +{ + return &ce->engine->gt->uc.guc; +} + static inline struct i915_priolist *to_priolist(struct rb_node *rb) { return rb_entry(rb, struct i915_priolist, node); @@ -155,6 +223,9 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) int len = 0; bool enabled = context_enabled(ce); + GEM_BUG_ON(!atomic_read(&ce->guc_id_ref)); + GEM_BUG_ON(context_guc_id_invalid(ce)); + if (!enabled) { action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; action[len++] = ce->guc_id; @@ -417,6 +488,10 @@ int intel_guc_submission_init(struct intel_guc *guc) xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); + spin_lock_init(&guc->contexts_lock); + INIT_LIST_HEAD(&guc->guc_id_list); + ida_init(&guc->guc_ids); + return 0; } @@ -429,9 +504,308 @@ void intel_guc_submission_fini(struct intel_guc *guc) i915_sched_engine_put(guc->sched_engine); } -static int guc_context_alloc(struct intel_context *ce) +static inline void queue_request(struct i915_sched_engine *sched_engine, + struct i915_request *rq, + int prio) { - return lrc_alloc(ce, ce->engine); + GEM_BUG_ON(!list_empty(&rq->sched.link)); + list_add_tail(&rq->sched.link, + i915_sched_lookup_priolist(sched_engine, prio)); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); +} + +static int guc_bypass_tasklet_submit(struct intel_guc *guc, + struct i915_request *rq) +{ + int ret; + + __i915_request_submit(rq); + + trace_i915_request_in(rq, 0); + + guc_set_lrc_tail(rq); + ret = guc_add_request(guc, rq); + if (ret == -EBUSY) + guc->stalled_request = rq; + + return ret; +} + +static void guc_submit_request(struct i915_request *rq) +{ + struct i915_sched_engine *sched_engine = rq->engine->sched_engine; + struct intel_guc *guc = &rq->engine->gt->uc.guc; + unsigned long flags; + + /* Will be called from irq-context when using foreign fences. */ + spin_lock_irqsave(&sched_engine->lock, flags); + + if (guc->stalled_request || !i915_sched_engine_is_empty(sched_engine)) + queue_request(sched_engine, rq, rq_prio(rq)); + else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY) + tasklet_hi_schedule(&sched_engine->tasklet); + + spin_unlock_irqrestore(&sched_engine->lock, flags); +} + +static int new_guc_id(struct intel_guc *guc) +{ + return ida_simple_get(&guc->guc_ids, 0, + GUC_MAX_LRC_DESCRIPTORS, GFP_KERNEL | + __GFP_RETRY_MAYFAIL | __GFP_NOWARN); +} + +static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) +{ + if (!context_guc_id_invalid(ce)) { + ida_simple_remove(&guc->guc_ids, ce->guc_id); + reset_lrc_desc(guc, ce->guc_id); + set_context_guc_id_invalid(ce); + } + if (!list_empty(&ce->guc_id_link)) + list_del_init(&ce->guc_id_link); +} + +static void release_guc_id(struct intel_guc *guc, struct intel_context *ce) +{ + unsigned long flags; + + spin_lock_irqsave(&guc->contexts_lock, flags); + __release_guc_id(guc, ce); + spin_unlock_irqrestore(&guc->contexts_lock, flags); +} + +static int steal_guc_id(struct intel_guc *guc) +{ + struct intel_context *ce; + int guc_id; + + lockdep_assert_held(&guc->contexts_lock); + + if (!list_empty(&guc->guc_id_list)) { + ce = list_first_entry(&guc->guc_id_list, + struct intel_context, + guc_id_link); + + GEM_BUG_ON(atomic_read(&ce->guc_id_ref)); + GEM_BUG_ON(context_guc_id_invalid(ce)); + + list_del_init(&ce->guc_id_link); + guc_id = ce->guc_id; + set_context_guc_id_invalid(ce); + return guc_id; + } else { + return -EAGAIN; + } +} + +static int assign_guc_id(struct intel_guc *guc, u16 *out) +{ + int ret; + + lockdep_assert_held(&guc->contexts_lock); + + ret = new_guc_id(guc); + if (unlikely(ret < 0)) { + ret = steal_guc_id(guc); + if (ret < 0) + return ret; + } + + *out = ret; + return 0; +} + +#define PIN_GUC_ID_TRIES 4 +static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) +{ + int ret = 0; + unsigned long flags, tries = PIN_GUC_ID_TRIES; + + GEM_BUG_ON(atomic_read(&ce->guc_id_ref)); + +try_again: + spin_lock_irqsave(&guc->contexts_lock, flags); + + if (context_guc_id_invalid(ce)) { + ret = assign_guc_id(guc, &ce->guc_id); + if (ret) + goto out_unlock; + ret = 1; /* Indidcates newly assigned guc_id */ + } + if (!list_empty(&ce->guc_id_link)) + list_del_init(&ce->guc_id_link); + atomic_inc(&ce->guc_id_ref); + +out_unlock: + spin_unlock_irqrestore(&guc->contexts_lock, flags); + + /* + * -EAGAIN indicates no guc_ids are available, let's retire any + * outstanding requests to see if that frees up a guc_id. If the first + * retire didn't help, insert a sleep with the timeslice duration before + * attempting to retire more requests. Double the sleep period each + * subsequent pass before finally giving up. The sleep period has max of + * 100ms and minimum of 1ms. + */ + if (ret == -EAGAIN && --tries) { + if (PIN_GUC_ID_TRIES - tries > 1) { + unsigned int timeslice_shifted = + ce->engine->props.timeslice_duration_ms << + (PIN_GUC_ID_TRIES - tries - 2); + unsigned int max = min_t(unsigned int, 100, + timeslice_shifted); + + msleep(max_t(unsigned int, max, 1)); + } + intel_gt_retire_requests(guc_to_gt(guc)); + goto try_again; + } + + return ret; +} + +static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) +{ + unsigned long flags; + + GEM_BUG_ON(atomic_read(&ce->guc_id_ref) < 0); + + if (unlikely(context_guc_id_invalid(ce))) + return; + + spin_lock_irqsave(&guc->contexts_lock, flags); + if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id_link) && + !atomic_read(&ce->guc_id_ref)) + list_add_tail(&ce->guc_id_link, &guc->guc_id_list); + spin_unlock_irqrestore(&guc->contexts_lock, flags); +} + +static int __guc_action_register_context(struct intel_guc *guc, + u32 guc_id, + u32 offset) +{ + u32 action[] = { + INTEL_GUC_ACTION_REGISTER_CONTEXT, + guc_id, + offset, + }; + + return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), true); +} + +static int register_context(struct intel_context *ce) +{ + struct intel_guc *guc = ce_to_guc(ce); + u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) + + ce->guc_id * sizeof(struct guc_lrc_desc); + + return __guc_action_register_context(guc, ce->guc_id, offset); +} + +static int __guc_action_deregister_context(struct intel_guc *guc, + u32 guc_id) +{ + u32 action[] = { + INTEL_GUC_ACTION_DEREGISTER_CONTEXT, + guc_id, + }; + + return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), true); +} + +static int deregister_context(struct intel_context *ce, u32 guc_id) +{ + struct intel_guc *guc = ce_to_guc(ce); + + return __guc_action_deregister_context(guc, guc_id); +} + +static intel_engine_mask_t adjust_engine_mask(u8 class, intel_engine_mask_t mask) +{ + switch (class) { + case RENDER_CLASS: + return mask >> RCS0; + case VIDEO_ENHANCEMENT_CLASS: + return mask >> VECS0; + case VIDEO_DECODE_CLASS: + return mask >> VCS0; + case COPY_ENGINE_CLASS: + return mask >> BCS0; + default: + MISSING_CASE(class); + return 0; + } +} + +static void guc_context_policy_init(struct intel_engine_cs *engine, + struct guc_lrc_desc *desc) +{ + desc->policy_flags = 0; + + desc->execution_quantum = CONTEXT_POLICY_DEFAULT_EXECUTION_QUANTUM_US; + desc->preemption_timeout = CONTEXT_POLICY_DEFAULT_PREEMPTION_TIME_US; +} + +static int guc_lrc_desc_pin(struct intel_context *ce) +{ + struct intel_engine_cs *engine = ce->engine; + struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; + struct intel_guc *guc = &engine->gt->uc.guc; + u32 desc_idx = ce->guc_id; + struct guc_lrc_desc *desc; + bool context_registered; + intel_wakeref_t wakeref; + int ret = 0; + + GEM_BUG_ON(!engine->mask); + + /* + * Ensure LRC + CT vmas are is same region as write barrier is done + * based on CT vma region. + */ + GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != + i915_gem_object_is_lmem(ce->ring->vma->obj)); + + context_registered = lrc_desc_registered(guc, desc_idx); + + reset_lrc_desc(guc, desc_idx); + set_lrc_desc_registered(guc, desc_idx, ce); + + desc = __get_lrc_desc(guc, desc_idx); + desc->engine_class = engine_class_to_guc_class(engine->class); + desc->engine_submit_mask = adjust_engine_mask(engine->class, + engine->mask); + desc->hw_context_desc = ce->lrc.lrca; + desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL; + desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; + guc_context_policy_init(engine, desc); + init_sched_state(ce); + + /* + * The context_lookup xarray is used to determine if the hardware + * context is currently registered. There are two cases in which it + * could be registered either the guc_id has been stolen from another + * context or the lrc descriptor address of this context has changed. In + * either case the context needs to be deregistered with the GuC before + * registering this context. + */ + if (context_registered) { + set_context_wait_for_deregister_to_register(ce); + intel_context_get(ce); + + /* + * If stealing the guc_id, this ce has the same guc_id as the + * context whose guc_id was stolen. + */ + with_intel_runtime_pm(runtime_pm, wakeref) + ret = deregister_context(ce, ce->guc_id); + } else { + with_intel_runtime_pm(runtime_pm, wakeref) + ret = register_context(ce); + } + + return ret; } static int guc_context_pre_pin(struct intel_context *ce, @@ -443,36 +817,144 @@ static int guc_context_pre_pin(struct intel_context *ce, static int guc_context_pin(struct intel_context *ce, void *vaddr) { + if (i915_ggtt_offset(ce->state) != + (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) + set_bit(CONTEXT_LRCA_DIRTY, &ce->flags); + + /* + * GuC context gets pinned in guc_request_alloc. See that function for + * explaination of why. + */ + return lrc_pin(ce, ce->engine, vaddr); } +static void guc_context_unpin(struct intel_context *ce) +{ + struct intel_guc *guc = ce_to_guc(ce); + + unpin_guc_id(guc, ce); + lrc_unpin(ce); +} + +static void guc_context_post_unpin(struct intel_context *ce) +{ + lrc_post_unpin(ce); +} + +static inline void guc_lrc_desc_unpin(struct intel_context *ce) +{ + struct intel_guc *guc = ce_to_guc(ce); + unsigned long flags; + + GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id)); + GEM_BUG_ON(ce != __get_context(guc, ce->guc_id)); + + spin_lock_irqsave(&ce->guc_state.lock, flags); + set_context_destroyed(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + deregister_context(ce, ce->guc_id); +} + +static void guc_context_destroy(struct kref *kref) +{ + struct intel_context *ce = container_of(kref, typeof(*ce), ref); + struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; + struct intel_guc *guc = ce_to_guc(ce); + intel_wakeref_t wakeref; + unsigned long flags; + + /* + * If the guc_id is invalid this context has been stolen and we can free + * it immediately. Also can be freed immediately if the context is not + * registered with the GuC. + */ + if (context_guc_id_invalid(ce)) { + lrc_destroy(kref); + return; + } else if (!lrc_desc_registered(guc, ce->guc_id)) { + release_guc_id(guc, ce); + lrc_destroy(kref); + return; + } + + /* + * We have to acquire the context spinlock and check guc_id again, if it + * is valid it hasn't been stolen and needs to be deregistered. We + * delete this context from the list of unpinned guc_ids available to + * steal to seal a race with guc_lrc_desc_pin(). When the G2H CTB + * returns indicating this context has been deregistered the guc_id is + * returned to the pool of available guc_ids. + */ + spin_lock_irqsave(&guc->contexts_lock, flags); + if (context_guc_id_invalid(ce)) { + spin_unlock_irqrestore(&guc->contexts_lock, flags); + lrc_destroy(kref); + return; + } + + if (!list_empty(&ce->guc_id_link)) + list_del_init(&ce->guc_id_link); + spin_unlock_irqrestore(&guc->contexts_lock, flags); + + /* + * We defer GuC context deregistration until the context is destroyed + * in order to save on CTBs. With this optimization ideally we only need + * 1 CTB to register the context during the first pin and 1 CTB to + * deregister the context when the context is destroyed. Without this + * optimization, a CTB would be needed every pin & unpin. + * + * XXX: Need to acqiure the runtime wakeref as this can be triggered + * from context_free_worker when runtime wakeref is not held. + * guc_lrc_desc_unpin requires the runtime as a GuC register is written + * in H2G CTB to deregister the context. A future patch may defer this + * H2G CTB if the runtime wakeref is zero. + */ + with_intel_runtime_pm(runtime_pm, wakeref) + guc_lrc_desc_unpin(ce); +} + +static int guc_context_alloc(struct intel_context *ce) +{ + return lrc_alloc(ce, ce->engine); +} + static const struct intel_context_ops guc_context_ops = { .alloc = guc_context_alloc, .pre_pin = guc_context_pre_pin, .pin = guc_context_pin, - .unpin = lrc_unpin, - .post_unpin = lrc_post_unpin, + .unpin = guc_context_unpin, + .post_unpin = guc_context_post_unpin, .enter = intel_context_enter_engine, .exit = intel_context_exit_engine, .reset = lrc_reset, - .destroy = lrc_destroy, + .destroy = guc_context_destroy, }; -static int guc_request_alloc(struct i915_request *request) +static bool context_needs_register(struct intel_context *ce, bool new_guc_id) +{ + return new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || + !lrc_desc_registered(ce_to_guc(ce), ce->guc_id); +} + +static int guc_request_alloc(struct i915_request *rq) { + struct intel_context *ce = rq->context; + struct intel_guc *guc = ce_to_guc(ce); int ret; - GEM_BUG_ON(!intel_context_is_pinned(request->context)); + GEM_BUG_ON(!intel_context_is_pinned(rq->context)); /* * Flush enough space to reduce the likelihood of waiting after * we start building the request - in which case we will just * have to repeat work. */ - request->reserved_space += GUC_REQUEST_SIZE; + rq->reserved_space += GUC_REQUEST_SIZE; /* * Note that after this point, we have committed to using @@ -483,56 +965,47 @@ static int guc_request_alloc(struct i915_request *request) */ /* Unconditionally invalidate GPU caches and TLBs. */ - ret = request->engine->emit_flush(request, EMIT_INVALIDATE); + ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE); if (ret) return ret; - request->reserved_space -= GUC_REQUEST_SIZE; - return 0; -} + rq->reserved_space -= GUC_REQUEST_SIZE; -static inline void queue_request(struct i915_sched_engine *sched_engine, - struct i915_request *rq, - int prio) -{ - GEM_BUG_ON(!list_empty(&rq->sched.link)); - list_add_tail(&rq->sched.link, - i915_sched_lookup_priolist(sched_engine, prio)); - set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); -} - -static int guc_bypass_tasklet_submit(struct intel_guc *guc, - struct i915_request *rq) -{ - int ret; - - __i915_request_submit(rq); - - trace_i915_request_in(rq, 0); - - guc_set_lrc_tail(rq); - ret = guc_add_request(guc, rq); - if (ret == -EBUSY) - guc->stalled_request = rq; - - return ret; -} - -static void guc_submit_request(struct i915_request *rq) -{ - struct i915_sched_engine *sched_engine = rq->engine->sched_engine; - struct intel_guc *guc = &rq->engine->gt->uc.guc; - unsigned long flags; + /* + * Call pin_guc_id here rather than in the pinning step as with + * dma_resv, contexts can be repeatedly pinned / unpinned trashing the + * guc_ids and creating horrible race conditions. This is especially bad + * when guc_ids are being stolen due to over subscription. By the time + * this function is reached, it is guaranteed that the guc_id will be + * persistent until the generated request is retired. Thus, sealing these + * race conditions. It is still safe to fail here if guc_ids are + * exhausted and return -EAGAIN to the user indicating that they can try + * again in the future. + * + * There is no need for a lock here as the timeline mutex ensures at + * most one context can be executing this code path at once. The + * guc_id_ref is incremented once for every request in flight and + * decremented on each retire. When it is zero, a lock around the + * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. + */ + if (atomic_add_unless(&ce->guc_id_ref, 1, 0)) + return 0; - /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&sched_engine->lock, flags); + ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */ + if (unlikely(ret < 0)) + return ret; + if (context_needs_register(ce, !!ret)) { + ret = guc_lrc_desc_pin(ce); + if (unlikely(ret)) { /* unwind */ + atomic_dec(&ce->guc_id_ref); + unpin_guc_id(guc, ce); + return ret; + } + } - if (guc->stalled_request || !i915_sched_engine_is_empty(sched_engine)) - queue_request(sched_engine, rq, rq_prio(rq)); - else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY) - tasklet_hi_schedule(&sched_engine->tasklet); + clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags); - spin_unlock_irqrestore(&sched_engine->lock, flags); + return 0; } static void sanitize_hwsp(struct intel_engine_cs *engine) @@ -606,6 +1079,41 @@ static void guc_set_default_submission(struct intel_engine_cs *engine) engine->submit_request = guc_submit_request; } +static inline void guc_kernel_context_pin(struct intel_guc *guc, + struct intel_context *ce) +{ + if (context_guc_id_invalid(ce)) + pin_guc_id(guc, ce); + guc_lrc_desc_pin(ce); +} + +static inline void guc_init_lrc_mapping(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* make sure all descriptors are clean... */ + xa_destroy(&guc->context_lookup); + + /* + * Some contexts might have been pinned before we enabled GuC + * submission, so we need to add them to the GuC bookeeping. + * Also, after a reset the of the GuC we want to make sure that the + * information shared with GuC is properly reset. The kernel LRCs are + * not attached to the gem_context, so they need to be added separately. + * + * Note: we purposefully do not check the return of guc_lrc_desc_pin, + * because that function can only fail if a reset is just starting. This + * is at the end of reset so presumably another reset isn't happening + * and even it did this code would be run again. + */ + + for_each_engine(engine, gt, id) + if (engine->kernel_context) + guc_kernel_context_pin(guc, engine->kernel_context); +} + static void guc_release(struct intel_engine_cs *engine) { engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ @@ -718,6 +1226,7 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine) void intel_guc_submission_enable(struct intel_guc *guc) { + guc_init_lrc_mapping(guc); } void intel_guc_submission_disable(struct intel_guc *guc) @@ -743,3 +1252,62 @@ void intel_guc_submission_init_early(struct intel_guc *guc) { guc->submission_selected = __guc_submission_selected(guc); } + +static inline struct intel_context * +g2h_context_lookup(struct intel_guc *guc, u32 desc_idx) +{ + struct intel_context *ce; + + if (unlikely(desc_idx >= GUC_MAX_LRC_DESCRIPTORS)) { + drm_err(&guc_to_gt(guc)->i915->drm, + "Invalid desc_idx %u", desc_idx); + return NULL; + } + + ce = __get_context(guc, desc_idx); + if (unlikely(!ce)) { + drm_err(&guc_to_gt(guc)->i915->drm, + "Context is NULL, desc_idx %u", desc_idx); + return NULL; + } + + return ce; +} + +int intel_guc_deregister_done_process_msg(struct intel_guc *guc, + const u32 *msg, + u32 len) +{ + struct intel_context *ce; + u32 desc_idx = msg[0]; + + if (unlikely(len < 1)) { + drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); + return -EPROTO; + } + + ce = g2h_context_lookup(guc, desc_idx); + if (unlikely(!ce)) + return -EPROTO; + + if (context_wait_for_deregister_to_register(ce)) { + struct intel_runtime_pm *runtime_pm = + &ce->engine->gt->i915->runtime_pm; + intel_wakeref_t wakeref; + + /* + * Previous owner of this guc_id has been deregistered, now safe + * register this context. + */ + with_intel_runtime_pm(runtime_pm, wakeref) + register_context(ce); + clr_context_wait_for_deregister_to_register(ce); + intel_context_put(ce); + } else if (context_destroyed(ce)) { + /* Context has been destroyed */ + release_guc_id(guc, ce); + lrc_destroy(&ce->ref); + } + + return 0; +} diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index b956e3a63ca9..1022400dd1bd 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4142,6 +4142,7 @@ enum { FAULT_AND_CONTINUE /* Unsupported */ }; +#define CTX_GTT_ADDRESS_MASK GENMASK(31, 12) #define GEN8_CTX_VALID (1 << 0) #define GEN8_CTX_FORCE_PD_RESTORE (1 << 1) #define GEN8_CTX_FORCE_RESTORE (1 << 2) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index d3de9f60e03a..d35d7c96839d 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -407,6 +407,7 @@ bool i915_request_retire(struct i915_request *rq) */ if (!list_empty(&rq->sched.link)) remove_from_engine(rq); + atomic_dec(&rq->context->guc_id_ref); GEM_BUG_ON(!llist_empty(&rq->execute_cb)); __list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */ -- cgit v1.2.3 From b208f2d51b468f3289208e9d8d3cbbf76e84e426 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 21 Jul 2021 14:50:50 -0700 Subject: drm/i915/guc: Insert fence on context when deregistering Sometimes during context pinning a context with the same guc_id is registered with the GuC. In this a case deregister must be done before the context can be registered. A fence is inserted on all requests while the deregister is in flight. Once the G2H is received indicating the deregistration is complete the context is registered and the fence is released. v2: (John H) - Fix commit message Cc: John Harrison Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210721215101.139794-8-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_context.c | 1 + drivers/gpu/drm/i915/gt/intel_context_types.h | 5 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 51 ++++++++++++++++++++++- drivers/gpu/drm/i915/i915_request.h | 8 ++++ 4 files changed, 63 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 090c13287d3e..7c9159517f8a 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -385,6 +385,7 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) mutex_init(&ce->pin_mutex); spin_lock_init(&ce->guc_state.lock); + INIT_LIST_HEAD(&ce->guc_state.fences); ce->guc_id = GUC_INVALID_LRC_ID; INIT_LIST_HEAD(&ce->guc_id_link); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 606c480aec26..e0e3a937f709 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -147,6 +147,11 @@ struct intel_context { * submission */ u8 sched_state; + /* + * fences: maintains of list of requests that have a submit + * fence related to GuC submission + */ + struct list_head fences; } guc_state; /* GuC scheduling state flags that do not require a lock. */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 463613a414d2..a0871b800153 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -935,6 +935,30 @@ static const struct intel_context_ops guc_context_ops = { .destroy = guc_context_destroy, }; +static void __guc_signal_context_fence(struct intel_context *ce) +{ + struct i915_request *rq; + + lockdep_assert_held(&ce->guc_state.lock); + + list_for_each_entry(rq, &ce->guc_state.fences, guc_fence_link) + i915_sw_fence_complete(&rq->submit); + + INIT_LIST_HEAD(&ce->guc_state.fences); +} + +static void guc_signal_context_fence(struct intel_context *ce) +{ + unsigned long flags; + + GEM_BUG_ON(!context_wait_for_deregister_to_register(ce)); + + spin_lock_irqsave(&ce->guc_state.lock, flags); + clr_context_wait_for_deregister_to_register(ce); + __guc_signal_context_fence(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); +} + static bool context_needs_register(struct intel_context *ce, bool new_guc_id) { return new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || @@ -945,6 +969,7 @@ static int guc_request_alloc(struct i915_request *rq) { struct intel_context *ce = rq->context; struct intel_guc *guc = ce_to_guc(ce); + unsigned long flags; int ret; GEM_BUG_ON(!intel_context_is_pinned(rq->context)); @@ -989,7 +1014,7 @@ static int guc_request_alloc(struct i915_request *rq) * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. */ if (atomic_add_unless(&ce->guc_id_ref, 1, 0)) - return 0; + goto out; ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */ if (unlikely(ret < 0)) @@ -1005,6 +1030,28 @@ static int guc_request_alloc(struct i915_request *rq) clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags); +out: + /* + * We block all requests on this context if a G2H is pending for a + * context deregistration as the GuC will fail a context registration + * while this G2H is pending. Once a G2H returns, the fence is released + * that is blocking these requests (see guc_signal_context_fence). + * + * We can safely check the below field outside of the lock as it isn't + * possible for this field to transition from being clear to set but + * converse is possible, hence the need for the check within the lock. + */ + if (likely(!context_wait_for_deregister_to_register(ce))) + return 0; + + spin_lock_irqsave(&ce->guc_state.lock, flags); + if (context_wait_for_deregister_to_register(ce)) { + i915_sw_fence_await(&rq->submit); + + list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); + } + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + return 0; } @@ -1301,7 +1348,7 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc, */ with_intel_runtime_pm(runtime_pm, wakeref) register_context(ce); - clr_context_wait_for_deregister_to_register(ce); + guc_signal_context_fence(ce); intel_context_put(ce); } else if (context_destroyed(ce)) { /* Context has been destroyed */ diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 5deb65ec5fa5..717e5b292046 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -285,6 +285,14 @@ struct i915_request { struct hrtimer timer; } watchdog; + /* + * Requests may need to be stalled when using GuC submission waiting for + * certain GuC operations to complete. If that is the case, stalled + * requests are added to a per context list of stalled requests. The + * below list_head is the link in that list. + */ + struct list_head guc_fence_link; + I915_SELFTEST_DECLARE(struct { struct list_head link; unsigned long delay; -- cgit v1.2.3 From e0717063ccb446c820317b3376c5dfafb3624502 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 21 Jul 2021 14:50:51 -0700 Subject: drm/i915/guc: Defer context unpin until scheduling is disabled With GuC scheduling, it isn't safe to unpin a context while scheduling is enabled for that context as the GuC may touch some of the pinned state (e.g. LRC). To ensure scheduling isn't enabled when an unpin is done, a call back is added to intel_context_unpin when pin count == 1 to disable scheduling for that context. When the response CTB is received it is safe to do the final unpin. Future patches may add a heuristic / delay to schedule the disable call back to avoid thrashing on schedule enable / disable. v2: (John H) - s/drm_dbg/drm_err (Daneiel) - Clean up sched state function Cc: John Harrison Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210721215101.139794-9-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_context.c | 4 +- drivers/gpu/drm/i915/gt/intel_context.h | 27 +++- drivers/gpu/drm/i915/gt/intel_context_types.h | 2 + drivers/gpu/drm/i915/gt/uc/intel_guc.h | 2 + drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 3 + drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 146 +++++++++++++++++++++- 6 files changed, 180 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 7c9159517f8a..dca25b6569aa 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -306,9 +306,9 @@ retry: return err; } -void intel_context_unpin(struct intel_context *ce) +void __intel_context_do_unpin(struct intel_context *ce, int sub) { - if (!atomic_dec_and_test(&ce->pin_count)) + if (!atomic_sub_and_test(sub, &ce->pin_count)) return; CE_TRACE(ce, "unpin\n"); diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index b10cbe8fee99..974ef85320c2 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -113,7 +113,32 @@ static inline void __intel_context_pin(struct intel_context *ce) atomic_inc(&ce->pin_count); } -void intel_context_unpin(struct intel_context *ce); +void __intel_context_do_unpin(struct intel_context *ce, int sub); + +static inline void intel_context_sched_disable_unpin(struct intel_context *ce) +{ + __intel_context_do_unpin(ce, 2); +} + +static inline void intel_context_unpin(struct intel_context *ce) +{ + if (!ce->ops->sched_disable) { + __intel_context_do_unpin(ce, 1); + } else { + /* + * Move ownership of this pin to the scheduling disable which is + * an async operation. When that operation completes the above + * intel_context_sched_disable_unpin is called potentially + * unpinning the context. + */ + while (!atomic_add_unless(&ce->pin_count, -1, 1)) { + if (atomic_cmpxchg(&ce->pin_count, 1, 2) == 1) { + ce->ops->sched_disable(ce); + break; + } + } + } +} void intel_context_enter_engine(struct intel_context *ce); void intel_context_exit_engine(struct intel_context *ce); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index e0e3a937f709..4a5518d295c2 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -43,6 +43,8 @@ struct intel_context_ops { void (*enter)(struct intel_context *ce); void (*exit)(struct intel_context *ce); + void (*sched_disable)(struct intel_context *ce); + void (*reset)(struct intel_context *ce); void (*destroy)(struct kref *kref); }; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 7fd6c3e343e4..4d470ebeda95 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -248,6 +248,8 @@ int intel_guc_reset_engine(struct intel_guc *guc, int intel_guc_deregister_done_process_msg(struct intel_guc *guc, const u32 *msg, u32 len); +int intel_guc_sched_done_process_msg(struct intel_guc *guc, + const u32 *msg, u32 len); void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 28ff82c5be45..019b25ff1888 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -932,6 +932,9 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r ret = intel_guc_deregister_done_process_msg(guc, payload, len); break; + case INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE: + ret = intel_guc_sched_done_process_msg(guc, payload, len); + break; default: ret = -EOPNOTSUPP; break; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index a0871b800153..2f393d9dba0d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -70,6 +70,7 @@ * possible for some of the bits to changing at the same time though. */ #define SCHED_STATE_NO_LOCK_ENABLED BIT(0) +#define SCHED_STATE_NO_LOCK_PENDING_ENABLE BIT(1) static inline bool context_enabled(struct intel_context *ce) { return (atomic_read(&ce->guc_sched_state_no_lock) & @@ -87,6 +88,24 @@ static inline void clr_context_enabled(struct intel_context *ce) &ce->guc_sched_state_no_lock); } +static inline bool context_pending_enable(struct intel_context *ce) +{ + return (atomic_read(&ce->guc_sched_state_no_lock) & + SCHED_STATE_NO_LOCK_PENDING_ENABLE); +} + +static inline void set_context_pending_enable(struct intel_context *ce) +{ + atomic_or(SCHED_STATE_NO_LOCK_PENDING_ENABLE, + &ce->guc_sched_state_no_lock); +} + +static inline void clr_context_pending_enable(struct intel_context *ce) +{ + atomic_and((u32)~SCHED_STATE_NO_LOCK_PENDING_ENABLE, + &ce->guc_sched_state_no_lock); +} + /* * Below is a set of functions which control the GuC scheduling state which * require a lock, aside from the special case where the functions are called @@ -95,6 +114,7 @@ static inline void clr_context_enabled(struct intel_context *ce) */ #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) #define SCHED_STATE_DESTROYED BIT(1) +#define SCHED_STATE_PENDING_DISABLE BIT(2) static inline void init_sched_state(struct intel_context *ce) { /* Only should be called from guc_lrc_desc_pin() */ @@ -138,6 +158,23 @@ set_context_destroyed(struct intel_context *ce) ce->guc_state.sched_state |= SCHED_STATE_DESTROYED; } +static inline bool context_pending_disable(struct intel_context *ce) +{ + return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE; +} + +static inline void set_context_pending_disable(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE; +} + +static inline void clr_context_pending_disable(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE; +} + static inline bool context_guc_id_invalid(struct intel_context *ce) { return ce->guc_id == GUC_INVALID_LRC_ID; @@ -230,6 +267,8 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; action[len++] = ce->guc_id; action[len++] = GUC_CONTEXT_ENABLE; + set_context_pending_enable(ce); + intel_context_get(ce); } else { action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; action[len++] = ce->guc_id; @@ -237,8 +276,12 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) err = intel_guc_send_nb(guc, action, len); - if (!enabled && !err) + if (!enabled && !err) { set_context_enabled(ce); + } else if (!enabled) { + clr_context_pending_enable(ce); + intel_context_put(ce); + } return err; } @@ -842,6 +885,62 @@ static void guc_context_post_unpin(struct intel_context *ce) lrc_post_unpin(ce); } +static void __guc_context_sched_disable(struct intel_guc *guc, + struct intel_context *ce, + u16 guc_id) +{ + u32 action[] = { + INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, + guc_id, /* ce->guc_id not stable */ + GUC_CONTEXT_DISABLE + }; + + GEM_BUG_ON(guc_id == GUC_INVALID_LRC_ID); + + intel_context_get(ce); + + intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), true); +} + +static u16 prep_context_pending_disable(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + + set_context_pending_disable(ce); + clr_context_enabled(ce); + + return ce->guc_id; +} + +static void guc_context_sched_disable(struct intel_context *ce) +{ + struct intel_guc *guc = ce_to_guc(ce); + struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; + unsigned long flags; + u16 guc_id; + intel_wakeref_t wakeref; + + if (context_guc_id_invalid(ce) || + !lrc_desc_registered(guc, ce->guc_id)) { + clr_context_enabled(ce); + goto unpin; + } + + if (!context_enabled(ce)) + goto unpin; + + spin_lock_irqsave(&ce->guc_state.lock, flags); + guc_id = prep_context_pending_disable(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + with_intel_runtime_pm(runtime_pm, wakeref) + __guc_context_sched_disable(guc, ce, guc_id); + + return; +unpin: + intel_context_sched_disable_unpin(ce); +} + static inline void guc_lrc_desc_unpin(struct intel_context *ce) { struct intel_guc *guc = ce_to_guc(ce); @@ -849,6 +948,7 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce) GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id)); GEM_BUG_ON(ce != __get_context(guc, ce->guc_id)); + GEM_BUG_ON(context_enabled(ce)); spin_lock_irqsave(&ce->guc_state.lock, flags); set_context_destroyed(ce); @@ -931,6 +1031,8 @@ static const struct intel_context_ops guc_context_ops = { .enter = intel_context_enter_engine, .exit = intel_context_exit_engine, + .sched_disable = guc_context_sched_disable, + .reset = lrc_reset, .destroy = guc_context_destroy, }; @@ -1358,3 +1460,45 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc, return 0; } + +int intel_guc_sched_done_process_msg(struct intel_guc *guc, + const u32 *msg, + u32 len) +{ + struct intel_context *ce; + unsigned long flags; + u32 desc_idx = msg[0]; + + if (unlikely(len < 2)) { + drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); + return -EPROTO; + } + + ce = g2h_context_lookup(guc, desc_idx); + if (unlikely(!ce)) + return -EPROTO; + + if (unlikely(context_destroyed(ce) || + (!context_pending_enable(ce) && + !context_pending_disable(ce)))) { + drm_err(&guc_to_gt(guc)->i915->drm, + "Bad context sched_state 0x%x, 0x%x, desc_idx %u", + atomic_read(&ce->guc_sched_state_no_lock), + ce->guc_state.sched_state, desc_idx); + return -EPROTO; + } + + if (context_pending_enable(ce)) { + clr_context_pending_enable(ce); + } else if (context_pending_disable(ce)) { + intel_context_sched_disable_unpin(ce); + + spin_lock_irqsave(&ce->guc_state.lock, flags); + clr_context_pending_disable(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + } + + intel_context_put(ce); + + return 0; +} -- cgit v1.2.3 From b8b183abca5108ad67b0b9e23b6d407347bc4aa8 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 21 Jul 2021 14:50:52 -0700 Subject: drm/i915/guc: Disable engine barriers with GuC during unpin Disable engine barriers for unpinning with GuC. This feature isn't needed with the GuC as it disables context scheduling before unpinning which guarantees the HW will not reference the context. Hence it is not necessary to defer unpinning until a kernel context request completes on each engine in the context engine mask. Cc: John Harrison Signed-off-by: Matthew Brost Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210721215101.139794-10-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_context.c | 2 +- drivers/gpu/drm/i915/gt/selftest_context.c | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index dca25b6569aa..cba2521d26f4 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -80,7 +80,7 @@ static int intel_context_active_acquire(struct intel_context *ce) __i915_active_acquire(&ce->active); - if (intel_context_is_barrier(ce)) + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) return 0; /* Preallocate tracking nodes */ diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index 26685b927169..fa7b99a671dd 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -209,7 +209,13 @@ static int __live_active_context(struct intel_engine_cs *engine) * This test makes sure that the context is kept alive until a * subsequent idle-barrier (emitted when the engine wakeref hits 0 * with no more outstanding requests). + * + * In GuC submission mode we don't use idle barriers and we instead + * get a message from the GuC to signal that it is safe to unpin the + * context from memory. */ + if (intel_engine_uses_guc(engine)) + return 0; if (intel_engine_pm_is_awake(engine)) { pr_err("%s is awake before starting %s!\n", @@ -357,7 +363,11 @@ static int __live_remote_context(struct intel_engine_cs *engine) * on the context image remotely (intel_context_prepare_remote_request), * which inserts foreign fences into intel_context.active, does not * clobber the idle-barrier. + * + * In GuC submission mode we don't use idle barriers. */ + if (intel_engine_uses_guc(engine)) + return 0; if (intel_engine_pm_is_awake(engine)) { pr_err("%s is awake before starting %s!\n", -- cgit v1.2.3 From 1f5cdb06b1d3ea6238c807acd91e5ec59f1098d3 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 21 Jul 2021 14:50:53 -0700 Subject: drm/i915/guc: Extend deregistration fence to schedule disable Extend the deregistration context fence to fence whne a GuC context has scheduling disable pending. v2: (John H) - Update comment why we check the pin count within spin lock Cc: John Harrison Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210721215101.139794-11-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 40 +++++++++++++++++++---- 1 file changed, 33 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 2f393d9dba0d..fc0b36ab1e68 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -930,7 +930,22 @@ static void guc_context_sched_disable(struct intel_context *ce) goto unpin; spin_lock_irqsave(&ce->guc_state.lock, flags); + + /* + * We have to check if the context has been pinned again as another pin + * operation is allowed to pass this function. Checking the pin count, + * within ce->guc_state.lock, synchronizes this function with + * guc_request_alloc ensuring a request doesn't slip through the + * 'context_pending_disable' fence. Checking within the spin lock (can't + * sleep) ensures another process doesn't pin this context and generate + * a request before we set the 'context_pending_disable' flag here. + */ + if (unlikely(atomic_add_unless(&ce->pin_count, -2, 2))) { + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + return; + } guc_id = prep_context_pending_disable(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); with_intel_runtime_pm(runtime_pm, wakeref) @@ -1135,19 +1150,22 @@ static int guc_request_alloc(struct i915_request *rq) out: /* * We block all requests on this context if a G2H is pending for a - * context deregistration as the GuC will fail a context registration - * while this G2H is pending. Once a G2H returns, the fence is released - * that is blocking these requests (see guc_signal_context_fence). + * schedule disable or context deregistration as the GuC will fail a + * schedule enable or context registration if either G2H is pending + * respectfully. Once a G2H returns, the fence is released that is + * blocking these requests (see guc_signal_context_fence). * - * We can safely check the below field outside of the lock as it isn't - * possible for this field to transition from being clear to set but + * We can safely check the below fields outside of the lock as it isn't + * possible for these fields to transition from being clear to set but * converse is possible, hence the need for the check within the lock. */ - if (likely(!context_wait_for_deregister_to_register(ce))) + if (likely(!context_wait_for_deregister_to_register(ce) && + !context_pending_disable(ce))) return 0; spin_lock_irqsave(&ce->guc_state.lock, flags); - if (context_wait_for_deregister_to_register(ce)) { + if (context_wait_for_deregister_to_register(ce) || + context_pending_disable(ce)) { i915_sw_fence_await(&rq->submit); list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); @@ -1491,10 +1509,18 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, if (context_pending_enable(ce)) { clr_context_pending_enable(ce); } else if (context_pending_disable(ce)) { + /* + * Unpin must be done before __guc_signal_context_fence, + * otherwise a race exists between the requests getting + * submitted + retired before this unpin completes resulting in + * the pin_count going to zero and the context still being + * enabled. + */ intel_context_sched_disable_unpin(ce); spin_lock_irqsave(&ce->guc_state.lock, flags); clr_context_pending_disable(ce); + __guc_signal_context_fence(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); } -- cgit v1.2.3 From e6cb8dc93f346263eec8be75997d4bc3bfb17591 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 21 Jul 2021 14:50:54 -0700 Subject: drm/i915: Disable preempt busywait when using GuC scheduling Disable preempt busywait when using GuC scheduling. This isn't needed as the GuC controls preemption when scheduling. v2: (John H): - Fix commit message Cc: John Harrison Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210721215101.139794-12-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index a69f5c438c72..b29eb9fd0009 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -506,7 +506,8 @@ gen8_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs) *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - if (intel_engine_has_semaphores(rq->engine)) + if (intel_engine_has_semaphores(rq->engine) && + !intel_uc_uses_guc_submission(&rq->engine->gt->uc)) cs = emit_preempt_busywait(rq, cs); rq->tail = intel_ring_offset(rq, cs); @@ -598,7 +599,8 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs) *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - if (intel_engine_has_semaphores(rq->engine)) + if (intel_engine_has_semaphores(rq->engine) && + !intel_uc_uses_guc_submission(&rq->engine->gt->uc)) cs = gen12_emit_preempt_busywait(rq, cs); rq->tail = intel_ring_offset(rq, cs); -- cgit v1.2.3 From 38d5ec43063c5908d1cda4e7eb24330405ccdb6f Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 21 Jul 2021 14:50:55 -0700 Subject: drm/i915/guc: Ensure request ordering via completion fences If two requests are on the same ring, they are explicitly ordered by the HW. So, a submission fence is sufficient to ensure ordering when using the new GuC submission interface. Conversely, if two requests share a timeline and are on the same physical engine but different context this doesn't ensure ordering on the new GuC submission interface. So, a completion fence needs to be used to ensure ordering. v2: (Daniele) - Don't delete spin lock v3: (Daniele) - Delete forward dec Signed-off-by: John Harrison Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210721215101.139794-13-matthew.brost@intel.com --- drivers/gpu/drm/i915/i915_request.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index d35d7c96839d..6594cb2f8ebd 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -432,6 +432,7 @@ void i915_request_retire_upto(struct i915_request *rq) do { tmp = list_first_entry(&tl->requests, typeof(*tmp), link); + GEM_BUG_ON(!i915_request_completed(tmp)); } while (i915_request_retire(tmp) && tmp != rq); } @@ -1463,7 +1464,8 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) return ret; } - if (is_power_of_2(to->execution_mask | READ_ONCE(from->execution_mask))) + if (!intel_engine_uses_guc(to->engine) && + is_power_of_2(to->execution_mask | READ_ONCE(from->execution_mask))) ret = await_request_submit(to, from); else ret = emit_semaphore_wait(to, from, I915_FENCE_GFP); @@ -1622,6 +1624,8 @@ __i915_request_add_to_timeline(struct i915_request *rq) prev = to_request(__i915_active_fence_set(&timeline->last_request, &rq->fence)); if (prev && !__i915_request_is_complete(prev)) { + bool uses_guc = intel_engine_uses_guc(rq->engine); + /* * The requests are supposed to be kept in order. However, * we need to be wary in case the timeline->last_request @@ -1632,7 +1636,9 @@ __i915_request_add_to_timeline(struct i915_request *rq) i915_seqno_passed(prev->fence.seqno, rq->fence.seqno)); - if (is_power_of_2(READ_ONCE(prev->engine)->mask | rq->engine->mask)) + if ((!uses_guc && + is_power_of_2(READ_ONCE(prev->engine)->mask | rq->engine->mask)) || + (uses_guc && prev->context == rq->context)) i915_sw_fence_await_sw_fence(&rq->submit, &prev->submit, &rq->submitq); -- cgit v1.2.3 From 4dbd3944055507d53708f82e9252190800b95769 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 21 Jul 2021 14:50:56 -0700 Subject: drm/i915/guc: Disable semaphores when using GuC scheduling Semaphores are an optimization and not required for basic GuC submission to work properly. Disable until we have time to do the implementation to enable semaphores and tune them for performance. Also long direction is just to delete semaphores from the i915 so another reason to not enable these for GuC submission. This patch fixes an existing bugs where I915_ENGINE_HAS_SEMAPHORES was not honored correctly. v2: Reword commit message v3: (John H) - Add text to commit indicating this also fixing an existing bug v4: (John H) - s/bug/bugs Cc: John Harrison Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210721215101.139794-14-matthew.brost@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index bf2a2319353a..89ca401bf9ae 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -799,7 +799,8 @@ static int intel_context_set_gem(struct intel_context *ce, } if (ctx->sched.priority >= I915_PRIORITY_NORMAL && - intel_engine_has_timeslices(ce->engine)) + intel_engine_has_timeslices(ce->engine) && + intel_engine_has_semaphores(ce->engine)) __set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); if (IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) && @@ -1778,7 +1779,8 @@ static void __apply_priority(struct intel_context *ce, void *arg) if (!intel_engine_has_timeslices(ce->engine)) return; - if (ctx->sched.priority >= I915_PRIORITY_NORMAL) + if (ctx->sched.priority >= I915_PRIORITY_NORMAL && + intel_engine_has_semaphores(ce->engine)) intel_context_set_use_semaphores(ce); else intel_context_clear_use_semaphores(ce); -- cgit v1.2.3 From f4eb1f3fe94683cd7bdbb355d913bacf7e5d205f Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 21 Jul 2021 14:50:57 -0700 Subject: drm/i915/guc: Ensure G2H response has space in buffer Ensure G2H response has space in the buffer before sending H2G CTB as the GuC can't handle any backpressure on the G2H interface. v2: (Matthew) - s/INTEL_GUC_SEND/INTEL_GUC_CT_SEND v3: (Matthew) - Add G2H credit accounting to blocking path, add g2h_release_space helper (John H) - CTB_G2H_BUFFER_SIZE / 4 == G2H_ROOM_BUFFER_SIZE Signed-off-by: John Harrison Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210721215101.139794-15-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 8 +- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 94 +++++++++++++++++++---- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 11 ++- drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 4 + drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 13 ++-- 5 files changed, 104 insertions(+), 26 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 4d470ebeda95..451797c62b41 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -96,10 +96,11 @@ inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) } static -inline int intel_guc_send_nb(struct intel_guc *guc, const u32 *action, u32 len) +inline int intel_guc_send_nb(struct intel_guc *guc, const u32 *action, u32 len, + u32 g2h_len_dw) { return intel_guc_ct_send(&guc->ct, action, len, NULL, 0, - INTEL_GUC_CT_SEND_NB); + MAKE_SEND_FLAGS(g2h_len_dw)); } static inline int @@ -113,6 +114,7 @@ intel_guc_send_and_receive(struct intel_guc *guc, const u32 *action, u32 len, static inline int intel_guc_send_busy_loop(struct intel_guc *guc, const u32 *action, u32 len, + u32 g2h_len_dw, bool loop) { int err; @@ -130,7 +132,7 @@ static inline int intel_guc_send_busy_loop(struct intel_guc *guc, might_sleep_if(loop && not_atomic); retry: - err = intel_guc_send_nb(guc, action, len); + err = intel_guc_send_nb(guc, action, len, g2h_len_dw); if (unlikely(err == -EBUSY && loop)) { if (likely(not_atomic)) { if (msleep_interruptible(sleep_period_ms)) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 019b25ff1888..75f69c28056e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -73,6 +73,7 @@ static inline struct drm_device *ct_to_drm(struct intel_guc_ct *ct) #define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K) #define CTB_H2G_BUFFER_SIZE (SZ_4K) #define CTB_G2H_BUFFER_SIZE (4 * CTB_H2G_BUFFER_SIZE) +#define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 4) struct ct_request { struct list_head link; @@ -129,23 +130,27 @@ static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc) static void guc_ct_buffer_reset(struct intel_guc_ct_buffer *ctb) { + u32 space; + ctb->broken = false; ctb->tail = 0; ctb->head = 0; - ctb->space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size); + space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size) - ctb->resv_space; + atomic_set(&ctb->space, space); guc_ct_buffer_desc_init(ctb->desc); } static void guc_ct_buffer_init(struct intel_guc_ct_buffer *ctb, struct guc_ct_buffer_desc *desc, - u32 *cmds, u32 size_in_bytes) + u32 *cmds, u32 size_in_bytes, u32 resv_space) { GEM_BUG_ON(size_in_bytes % 4); ctb->desc = desc; ctb->cmds = cmds; ctb->size = size_in_bytes / 4; + ctb->resv_space = resv_space / 4; guc_ct_buffer_reset(ctb); } @@ -226,6 +231,7 @@ int intel_guc_ct_init(struct intel_guc_ct *ct) struct guc_ct_buffer_desc *desc; u32 blob_size; u32 cmds_size; + u32 resv_space; void *blob; u32 *cmds; int err; @@ -250,19 +256,23 @@ int intel_guc_ct_init(struct intel_guc_ct *ct) desc = blob; cmds = blob + 2 * CTB_DESC_SIZE; cmds_size = CTB_H2G_BUFFER_SIZE; - CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u\n", "send", - ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size); + resv_space = 0; + CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "send", + ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size, + resv_space); - guc_ct_buffer_init(&ct->ctbs.send, desc, cmds, cmds_size); + guc_ct_buffer_init(&ct->ctbs.send, desc, cmds, cmds_size, resv_space); /* store pointers to desc and cmds for recv ctb */ desc = blob + CTB_DESC_SIZE; cmds = blob + 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE; cmds_size = CTB_G2H_BUFFER_SIZE; - CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u\n", "recv", - ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size); + resv_space = G2H_ROOM_BUFFER_SIZE; + CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "recv", + ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size, + resv_space); - guc_ct_buffer_init(&ct->ctbs.recv, desc, cmds, cmds_size); + guc_ct_buffer_init(&ct->ctbs.recv, desc, cmds, cmds_size, resv_space); return 0; } @@ -461,8 +471,8 @@ static int ct_write(struct intel_guc_ct *ct, /* update local copies */ ctb->tail = tail; - GEM_BUG_ON(ctb->space < len + GUC_CTB_HDR_LEN); - ctb->space -= len + GUC_CTB_HDR_LEN; + GEM_BUG_ON(atomic_read(&ctb->space) < len + GUC_CTB_HDR_LEN); + atomic_sub(len + GUC_CTB_HDR_LEN, &ctb->space); /* now update descriptor */ WRITE_ONCE(desc->tail, tail); @@ -537,6 +547,32 @@ static inline bool ct_deadlocked(struct intel_guc_ct *ct) return ret; } +static inline bool g2h_has_room(struct intel_guc_ct *ct, u32 g2h_len_dw) +{ + struct intel_guc_ct_buffer *ctb = &ct->ctbs.recv; + + /* + * We leave a certain amount of space in the G2H CTB buffer for + * unexpected G2H CTBs (e.g. logging, engine hang, etc...) + */ + return !g2h_len_dw || atomic_read(&ctb->space) >= g2h_len_dw; +} + +static inline void g2h_reserve_space(struct intel_guc_ct *ct, u32 g2h_len_dw) +{ + lockdep_assert_held(&ct->ctbs.send.lock); + + GEM_BUG_ON(!g2h_has_room(ct, g2h_len_dw)); + + if (g2h_len_dw) + atomic_sub(g2h_len_dw, &ct->ctbs.recv.space); +} + +static inline void g2h_release_space(struct intel_guc_ct *ct, u32 g2h_len_dw) +{ + atomic_add(g2h_len_dw, &ct->ctbs.recv.space); +} + static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw) { struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; @@ -544,7 +580,7 @@ static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw) u32 head; u32 space; - if (ctb->space >= len_dw) + if (atomic_read(&ctb->space) >= len_dw) return true; head = READ_ONCE(desc->head); @@ -557,16 +593,16 @@ static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw) } space = CIRC_SPACE(ctb->tail, head, ctb->size); - ctb->space = space; + atomic_set(&ctb->space, space); return space >= len_dw; } -static int has_room_nb(struct intel_guc_ct *ct, u32 len_dw) +static int has_room_nb(struct intel_guc_ct *ct, u32 h2g_dw, u32 g2h_dw) { lockdep_assert_held(&ct->ctbs.send.lock); - if (unlikely(!h2g_has_room(ct, len_dw))) { + if (unlikely(!h2g_has_room(ct, h2g_dw) || !g2h_has_room(ct, g2h_dw))) { if (ct->stall_time == KTIME_MAX) ct->stall_time = ktime_get(); @@ -580,6 +616,12 @@ static int has_room_nb(struct intel_guc_ct *ct, u32 len_dw) return 0; } +#define G2H_LEN_DW(f) ({ \ + typeof(f) f_ = (f); \ + FIELD_GET(INTEL_GUC_CT_SEND_G2H_DW_MASK, f_) ? \ + FIELD_GET(INTEL_GUC_CT_SEND_G2H_DW_MASK, f_) + \ + GUC_CTB_HXG_MSG_MIN_LEN : 0; \ +}) static int ct_send_nb(struct intel_guc_ct *ct, const u32 *action, u32 len, @@ -587,12 +629,13 @@ static int ct_send_nb(struct intel_guc_ct *ct, { struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; unsigned long spin_flags; + u32 g2h_len_dw = G2H_LEN_DW(flags); u32 fence; int ret; spin_lock_irqsave(&ctb->lock, spin_flags); - ret = has_room_nb(ct, len + GUC_CTB_HDR_LEN); + ret = has_room_nb(ct, len + GUC_CTB_HDR_LEN, g2h_len_dw); if (unlikely(ret)) goto out; @@ -601,6 +644,7 @@ static int ct_send_nb(struct intel_guc_ct *ct, if (unlikely(ret)) goto out; + g2h_reserve_space(ct, g2h_len_dw); intel_guc_notify(ct_to_guc(ct)); out: @@ -632,11 +676,13 @@ static int ct_send(struct intel_guc_ct *ct, /* * We use a lazy spin wait loop here as we believe that if the CT * buffers are sized correctly the flow control condition should be - * rare. + * rare. Reserving the maximum size in the G2H credits as we don't know + * how big the response is going to be. */ retry: spin_lock_irqsave(&ctb->lock, flags); - if (unlikely(!h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) { + if (unlikely(!h2g_has_room(ct, len + GUC_CTB_HDR_LEN) || + !g2h_has_room(ct, GUC_CTB_HXG_MSG_MAX_LEN))) { if (ct->stall_time == KTIME_MAX) ct->stall_time = ktime_get(); spin_unlock_irqrestore(&ctb->lock, flags); @@ -664,6 +710,7 @@ retry: spin_unlock(&ct->requests.lock); err = ct_write(ct, action, len, fence, 0); + g2h_reserve_space(ct, GUC_CTB_HXG_MSG_MAX_LEN); spin_unlock_irqrestore(&ctb->lock, flags); @@ -673,6 +720,7 @@ retry: intel_guc_notify(ct_to_guc(ct)); err = wait_for_ct_request_update(&request, status); + g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN); if (unlikely(err)) goto unlink; @@ -992,10 +1040,22 @@ static void ct_incoming_request_worker_func(struct work_struct *w) static int ct_handle_event(struct intel_guc_ct *ct, struct ct_incoming_msg *request) { const u32 *hxg = &request->msg[GUC_CTB_MSG_MIN_LEN]; + u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]); unsigned long flags; GEM_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_EVENT); + /* + * Adjusting the space must be done in IRQ or deadlock can occur as the + * CTB processing in the below workqueue can send CTBs which creates a + * circular dependency if the space was returned there. + */ + switch (action) { + case INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE: + case INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE: + g2h_release_space(ct, request->size); + } + spin_lock_irqsave(&ct->requests.lock, flags); list_add_tail(&request->link, &ct->requests.incoming); spin_unlock_irqrestore(&ct->requests.lock, flags); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h index edd1bba0445d..dda2d6a75392 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h @@ -33,6 +33,7 @@ struct intel_guc; * @desc: pointer to the buffer descriptor * @cmds: pointer to the commands buffer * @size: size of the commands buffer in dwords + * @resv_space: reserved space in buffer in dwords * @head: local shadow copy of head in dwords * @tail: local shadow copy of tail in dwords * @space: local shadow copy of space in dwords @@ -43,9 +44,10 @@ struct intel_guc_ct_buffer { struct guc_ct_buffer_desc *desc; u32 *cmds; u32 size; + u32 resv_space; u32 tail; u32 head; - u32 space; + atomic_t space; bool broken; }; @@ -97,6 +99,13 @@ static inline bool intel_guc_ct_enabled(struct intel_guc_ct *ct) } #define INTEL_GUC_CT_SEND_NB BIT(31) +#define INTEL_GUC_CT_SEND_G2H_DW_SHIFT 0 +#define INTEL_GUC_CT_SEND_G2H_DW_MASK (0xff << INTEL_GUC_CT_SEND_G2H_DW_SHIFT) +#define MAKE_SEND_FLAGS(len) ({ \ + typeof(len) len_ = (len); \ + GEM_BUG_ON(!FIELD_FIT(INTEL_GUC_CT_SEND_G2H_DW_MASK, len_)); \ + (FIELD_PREP(INTEL_GUC_CT_SEND_G2H_DW_MASK, len_) | INTEL_GUC_CT_SEND_NB); \ +}) int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, u32 *response_buf, u32 response_buf_size, u32 flags); void intel_guc_ct_event_handler(struct intel_guc_ct *ct); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 3489b390ae77..82534259b7ad 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -17,6 +17,10 @@ #include "abi/guc_communication_ctb_abi.h" #include "abi/guc_messages_abi.h" +/* Payload length only i.e. don't include G2H header length */ +#define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET 2 +#define G2H_LEN_DW_DEREGISTER_CONTEXT 1 + #define GUC_CONTEXT_DISABLE 0 #define GUC_CONTEXT_ENABLE 1 diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index fc0b36ab1e68..cba38f05e44d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -258,6 +258,7 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) struct intel_context *ce = rq->context; u32 action[3]; int len = 0; + u32 g2h_len_dw = 0; bool enabled = context_enabled(ce); GEM_BUG_ON(!atomic_read(&ce->guc_id_ref)); @@ -269,13 +270,13 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) action[len++] = GUC_CONTEXT_ENABLE; set_context_pending_enable(ce); intel_context_get(ce); + g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; } else { action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; action[len++] = ce->guc_id; } - err = intel_guc_send_nb(guc, action, len); - + err = intel_guc_send_nb(guc, action, len, g2h_len_dw); if (!enabled && !err) { set_context_enabled(ce); } else if (!enabled) { @@ -734,7 +735,7 @@ static int __guc_action_register_context(struct intel_guc *guc, offset, }; - return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), true); + return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); } static int register_context(struct intel_context *ce) @@ -754,7 +755,8 @@ static int __guc_action_deregister_context(struct intel_guc *guc, guc_id, }; - return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), true); + return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), + G2H_LEN_DW_DEREGISTER_CONTEXT, true); } static int deregister_context(struct intel_context *ce, u32 guc_id) @@ -899,7 +901,8 @@ static void __guc_context_sched_disable(struct intel_guc *guc, intel_context_get(ce); - intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), true); + intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); } static u16 prep_context_pending_disable(struct intel_context *ce) -- cgit v1.2.3 From b97060a99b01b4d706b87df450b69f82962d2fba Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 21 Jul 2021 14:50:58 -0700 Subject: drm/i915/guc: Update intel_gt_wait_for_idle to work with GuC When running the GuC the GPU can't be considered idle if the GuC still has contexts pinned. As such, a call has been added in intel_gt_wait_for_idle to idle the UC and in turn the GuC by waiting for the number of unpinned contexts to go to zero. v2: rtimeout -> remaining_timeout v3: Drop unnecessary includes, guc_submission_busy_loop -> guc_submission_send_busy_loop, drop negatie timeout trick, move a refactor of guc_context_unpin to earlier path (John H) v4: Add stddef.h back into intel_gt_requests.h, sort circuit idle function if not in GuC submission mode Cc: John Harrison Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210721215101.139794-16-matthew.brost@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/gt/intel_gt.c | 19 +++++ drivers/gpu/drm/i915/gt/intel_gt.h | 2 + drivers/gpu/drm/i915/gt/intel_gt_requests.c | 21 ++---- drivers/gpu/drm/i915/gt/intel_gt_requests.h | 9 +-- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 4 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 4 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 88 +++++++++++++++++++++-- drivers/gpu/drm/i915/gt/uc/intel_uc.h | 5 ++ drivers/gpu/drm/i915/i915_gem_evict.c | 1 + drivers/gpu/drm/i915/selftests/igt_live_test.c | 2 +- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 3 +- 13 files changed, 134 insertions(+), 28 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 2f3b7dc7b0e6..5130e8ed9564 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -645,7 +645,8 @@ mmap_offset_attach(struct drm_i915_gem_object *obj, goto insert; /* Attempt to reap some mmap space from dead objects */ - err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT); + err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT, + NULL); if (err) goto err; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index e714e21c0a4d..acfdd53b2678 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -585,6 +585,25 @@ static void __intel_gt_disable(struct intel_gt *gt) GEM_BUG_ON(intel_gt_pm_is_awake(gt)); } +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) +{ + long remaining_timeout; + + /* If the device is asleep, we have no requests outstanding */ + if (!intel_gt_pm_is_awake(gt)) + return 0; + + while ((timeout = intel_gt_retire_requests_timeout(gt, timeout, + &remaining_timeout)) > 0) { + cond_resched(); + if (signal_pending(current)) + return -EINTR; + } + + return timeout ? timeout : intel_uc_wait_for_idle(>->uc, + remaining_timeout); +} + int intel_gt_init(struct intel_gt *gt) { int err; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index e7aabe0cc5bf..74e771871a9b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -48,6 +48,8 @@ void intel_gt_driver_release(struct intel_gt *gt); void intel_gt_driver_late_release(struct intel_gt *gt); +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); + void intel_gt_check_and_clear_faults(struct intel_gt *gt); void intel_gt_clear_error_registers(struct intel_gt *gt, intel_engine_mask_t engine_mask); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index 647eca9d867a..edb881d75630 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -130,7 +130,8 @@ void intel_engine_fini_retire(struct intel_engine_cs *engine) GEM_BUG_ON(engine->retire); } -long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout, + long *remaining_timeout) { struct intel_gt_timelines *timelines = >->timelines; struct intel_timeline *tl, *tn; @@ -195,22 +196,10 @@ out_active: spin_lock(&timelines->lock); if (flush_submission(gt, timeout)) /* Wait, there's more! */ active_count++; - return active_count ? timeout : 0; -} - -int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) -{ - /* If the device is asleep, we have no requests outstanding */ - if (!intel_gt_pm_is_awake(gt)) - return 0; - - while ((timeout = intel_gt_retire_requests_timeout(gt, timeout)) > 0) { - cond_resched(); - if (signal_pending(current)) - return -EINTR; - } + if (remaining_timeout) + *remaining_timeout = timeout; - return timeout; + return active_count ? timeout : 0; } static void retire_work_handler(struct work_struct *work) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h index fcc30a6e4fe9..51dbe0e3294e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h @@ -6,14 +6,17 @@ #ifndef INTEL_GT_REQUESTS_H #define INTEL_GT_REQUESTS_H +#include + struct intel_engine_cs; struct intel_gt; struct intel_timeline; -long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout); +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout, + long *remaining_timeout); static inline void intel_gt_retire_requests(struct intel_gt *gt) { - intel_gt_retire_requests_timeout(gt, 0); + intel_gt_retire_requests_timeout(gt, 0, NULL); } void intel_engine_init_retire(struct intel_engine_cs *engine); @@ -21,8 +24,6 @@ void intel_engine_add_retire(struct intel_engine_cs *engine, struct intel_timeline *tl); void intel_engine_fini_retire(struct intel_engine_cs *engine); -int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); - void intel_gt_init_requests(struct intel_gt *gt); void intel_gt_park_requests(struct intel_gt *gt); void intel_gt_unpark_requests(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 451797c62b41..d4987cd789ea 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -39,6 +39,8 @@ struct intel_guc { spinlock_t irq_lock; unsigned int msg_enabled_mask; + atomic_t outstanding_submission_g2h; + struct { void (*reset)(struct intel_guc *guc); void (*enable)(struct intel_guc *guc); @@ -245,6 +247,8 @@ static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask) spin_unlock_irq(&guc->irq_lock); } +int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout); + int intel_guc_reset_engine(struct intel_guc *guc, struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 75f69c28056e..b6bbbdb4c689 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -109,6 +109,7 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct) INIT_LIST_HEAD(&ct->requests.incoming); INIT_WORK(&ct->requests.worker, ct_incoming_request_worker_func); tasklet_setup(&ct->receive_tasklet, ct_receive_tasklet_func); + init_waitqueue_head(&ct->wq); } static inline const char *guc_ct_buffer_type_to_str(u32 type) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h index dda2d6a75392..2758ee849a59 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h @@ -10,6 +10,7 @@ #include #include #include +#include #include "intel_guc_fwif.h" @@ -68,6 +69,9 @@ struct intel_guc_ct { struct tasklet_struct receive_tasklet; + /** @wq: wait queue for g2h chanenl */ + wait_queue_head_t wq; + struct { u16 last_fence; /* last fence used to send request */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index cba38f05e44d..e4ce21c9b7ef 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -252,6 +252,72 @@ static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id, xa_store_irq(&guc->context_lookup, id, ce, GFP_ATOMIC); } +static int guc_submission_send_busy_loop(struct intel_guc *guc, + const u32 *action, + u32 len, + u32 g2h_len_dw, + bool loop) +{ + int err; + + err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); + + if (!err && g2h_len_dw) + atomic_inc(&guc->outstanding_submission_g2h); + + return err; +} + +static int guc_wait_for_pending_msg(struct intel_guc *guc, + atomic_t *wait_var, + bool interruptible, + long timeout) +{ + const int state = interruptible ? + TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; + DEFINE_WAIT(wait); + + might_sleep(); + GEM_BUG_ON(timeout < 0); + + if (!atomic_read(wait_var)) + return 0; + + if (!timeout) + return -ETIME; + + for (;;) { + prepare_to_wait(&guc->ct.wq, &wait, state); + + if (!atomic_read(wait_var)) + break; + + if (signal_pending_state(state, current)) { + timeout = -EINTR; + break; + } + + if (!timeout) { + timeout = -ETIME; + break; + } + + timeout = io_schedule_timeout(timeout); + } + finish_wait(&guc->ct.wq, &wait); + + return (timeout < 0) ? timeout : 0; +} + +int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) +{ + if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc)) + return 0; + + return guc_wait_for_pending_msg(guc, &guc->outstanding_submission_g2h, + true, timeout); +} + static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) { int err; @@ -278,6 +344,7 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) err = intel_guc_send_nb(guc, action, len, g2h_len_dw); if (!enabled && !err) { + atomic_inc(&guc->outstanding_submission_g2h); set_context_enabled(ce); } else if (!enabled) { clr_context_pending_enable(ce); @@ -735,7 +802,8 @@ static int __guc_action_register_context(struct intel_guc *guc, offset, }; - return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); + return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), + 0, true); } static int register_context(struct intel_context *ce) @@ -755,8 +823,9 @@ static int __guc_action_deregister_context(struct intel_guc *guc, guc_id, }; - return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), - G2H_LEN_DW_DEREGISTER_CONTEXT, true); + return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), + G2H_LEN_DW_DEREGISTER_CONTEXT, + true); } static int deregister_context(struct intel_context *ce, u32 guc_id) @@ -901,8 +970,8 @@ static void __guc_context_sched_disable(struct intel_guc *guc, intel_context_get(ce); - intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), - G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); + guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); } static u16 prep_context_pending_disable(struct intel_context *ce) @@ -1444,6 +1513,12 @@ g2h_context_lookup(struct intel_guc *guc, u32 desc_idx) return ce; } +static void decr_outstanding_submission_g2h(struct intel_guc *guc) +{ + if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) + wake_up_all(&guc->ct.wq); +} + int intel_guc_deregister_done_process_msg(struct intel_guc *guc, const u32 *msg, u32 len) @@ -1479,6 +1554,8 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc, lrc_destroy(&ce->ref); } + decr_outstanding_submission_g2h(guc); + return 0; } @@ -1527,6 +1604,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, spin_unlock_irqrestore(&ce->guc_state.lock, flags); } + decr_outstanding_submission_g2h(guc); intel_context_put(ce); return 0; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h index 9c954c589edf..c4cef885e984 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h @@ -81,6 +81,11 @@ uc_state_checkers(guc, guc_submission); #undef uc_state_checkers #undef __uc_state_checker +static inline int intel_uc_wait_for_idle(struct intel_uc *uc, long timeout) +{ + return intel_guc_wait_for_idle(&uc->guc, timeout); +} + #define intel_uc_ops_function(_NAME, _OPS, _TYPE, _RET) \ static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \ { \ diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 4d2d59a9942b..2b73ddb11c66 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -27,6 +27,7 @@ */ #include "gem/i915_gem_context.h" +#include "gt/intel_gt.h" #include "gt/intel_gt_requests.h" #include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c index c130010a7033..1c721542e277 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -5,7 +5,7 @@ */ #include "i915_drv.h" -#include "gt/intel_gt_requests.h" +#include "gt/intel_gt.h" #include "../i915_selftest.h" #include "igt_flush_test.h" diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index d189c4bd4bef..4f8180146888 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -52,7 +52,8 @@ void mock_device_flush(struct drm_i915_private *i915) do { for_each_engine(engine, gt, id) mock_engine_flush(engine); - } while (intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT)); + } while (intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT, + NULL)); } static void mock_device_release(struct drm_device *dev) -- cgit v1.2.3 From 28ff6520a34d6539064c4b8b5c0f0fded125a779 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 21 Jul 2021 14:50:59 -0700 Subject: drm/i915/guc: Update GuC debugfs to support new GuC Update GuC debugfs to support the new GuC structures. v2: (John Harrison) - Remove intel_lrc_reg.h include from i915_debugfs.c (Michal) - Rename GuC debugfs functions Signed-off-by: John Harrison Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210721215101.139794-17-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 22 +++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 3 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c | 23 +++++++++- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 55 +++++++++++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h | 5 +++ 5 files changed, 107 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index b6bbbdb4c689..8bb6b1bbcea1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -1174,3 +1174,25 @@ void intel_guc_ct_event_handler(struct intel_guc_ct *ct) ct_try_receive_message(ct); } + +void intel_guc_ct_print_info(struct intel_guc_ct *ct, + struct drm_printer *p) +{ + drm_printf(p, "CT %s\n", enableddisabled(ct->enabled)); + + if (!ct->enabled) + return; + + drm_printf(p, "H2G Space: %u\n", + atomic_read(&ct->ctbs.send.space) * 4); + drm_printf(p, "Head: %u\n", + ct->ctbs.send.desc->head); + drm_printf(p, "Tail: %u\n", + ct->ctbs.send.desc->tail); + drm_printf(p, "G2H Space: %u\n", + atomic_read(&ct->ctbs.recv.space) * 4); + drm_printf(p, "Head: %u\n", + ct->ctbs.recv.desc->head); + drm_printf(p, "Tail: %u\n", + ct->ctbs.recv.desc->tail); +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h index 2758ee849a59..f709a19c7e21 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h @@ -16,6 +16,7 @@ struct i915_vma; struct intel_guc; +struct drm_printer; /** * DOC: Command Transport (CT). @@ -114,4 +115,6 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, u32 *response_buf, u32 response_buf_size, u32 flags); void intel_guc_ct_event_handler(struct intel_guc_ct *ct); +void intel_guc_ct_print_info(struct intel_guc_ct *ct, struct drm_printer *p); + #endif /* _INTEL_GUC_CT_H_ */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c index fe7cb7b29a1e..7a454c91a736 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c @@ -9,6 +9,8 @@ #include "intel_guc.h" #include "intel_guc_debugfs.h" #include "intel_guc_log_debugfs.h" +#include "gt/uc/intel_guc_ct.h" +#include "gt/uc/intel_guc_submission.h" static int guc_info_show(struct seq_file *m, void *data) { @@ -22,16 +24,35 @@ static int guc_info_show(struct seq_file *m, void *data) drm_puts(&p, "\n"); intel_guc_log_info(&guc->log, &p); - /* Add more as required ... */ + if (!intel_guc_submission_is_used(guc)) + return 0; + + intel_guc_ct_print_info(&guc->ct, &p); + intel_guc_submission_print_info(guc, &p); return 0; } DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_info); +static int guc_registered_contexts_show(struct seq_file *m, void *data) +{ + struct intel_guc *guc = m->private; + struct drm_printer p = drm_seq_file_printer(m); + + if (!intel_guc_submission_is_used(guc)) + return -ENODEV; + + intel_guc_submission_print_context_info(guc, &p); + + return 0; +} +DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_registered_contexts); + void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root) { static const struct debugfs_gt_file files[] = { { "guc_info", &guc_info_fops, NULL }, + { "guc_registered_contexts", &guc_registered_contexts_fops, NULL }, }; if (!intel_guc_is_supported(guc)) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index e4ce21c9b7ef..e6e5364beb1c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1609,3 +1609,58 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, return 0; } + +void intel_guc_submission_print_info(struct intel_guc *guc, + struct drm_printer *p) +{ + struct i915_sched_engine *sched_engine = guc->sched_engine; + struct rb_node *rb; + unsigned long flags; + + if (!sched_engine) + return; + + drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n", + atomic_read(&guc->outstanding_submission_g2h)); + drm_printf(p, "GuC tasklet count: %u\n\n", + atomic_read(&sched_engine->tasklet.count)); + + spin_lock_irqsave(&sched_engine->lock, flags); + drm_printf(p, "Requests in GuC submit tasklet:\n"); + for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { + struct i915_priolist *pl = to_priolist(rb); + struct i915_request *rq; + + priolist_for_each_request(rq, pl) + drm_printf(p, "guc_id=%u, seqno=%llu\n", + rq->context->guc_id, + rq->fence.seqno); + } + spin_unlock_irqrestore(&sched_engine->lock, flags); + drm_printf(p, "\n"); +} + +void intel_guc_submission_print_context_info(struct intel_guc *guc, + struct drm_printer *p) +{ + struct intel_context *ce; + unsigned long index; + + xa_for_each(&guc->context_lookup, index, ce) { + drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id); + drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); + drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", + ce->ring->head, + ce->lrc_reg_state[CTX_RING_HEAD]); + drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", + ce->ring->tail, + ce->lrc_reg_state[CTX_RING_TAIL]); + drm_printf(p, "\t\tContext Pin Count: %u\n", + atomic_read(&ce->pin_count)); + drm_printf(p, "\t\tGuC ID Ref Count: %u\n", + atomic_read(&ce->guc_id_ref)); + drm_printf(p, "\t\tSchedule State: 0x%x, 0x%x\n\n", + ce->guc_state.sched_state, + atomic_read(&ce->guc_sched_state_no_lock)); + } +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h index 3f7005018939..2b9470c90558 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h @@ -10,6 +10,7 @@ #include "intel_guc.h" +struct drm_printer; struct intel_engine_cs; void intel_guc_submission_init_early(struct intel_guc *guc); @@ -20,6 +21,10 @@ void intel_guc_submission_fini(struct intel_guc *guc); int intel_guc_preempt_work_create(struct intel_guc *guc); void intel_guc_preempt_work_destroy(struct intel_guc *guc); int intel_guc_submission_setup(struct intel_engine_cs *engine); +void intel_guc_submission_print_info(struct intel_guc *guc, + struct drm_printer *p); +void intel_guc_submission_print_context_info(struct intel_guc *guc, + struct drm_printer *p); static inline bool intel_guc_submission_is_supported(struct intel_guc *guc) { -- cgit v1.2.3 From dbf9da8d55efd55c0f8ad448fb997410a33c2c75 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 21 Jul 2021 14:51:00 -0700 Subject: drm/i915/guc: Add trace point for GuC submit Add trace point for GuC submit. Extended existing request trace points to include submit fence value,, guc_id, and ring tail value. v2: Fix white space alignment in i915_request_add trace point v3: Delete dep_from , dep_to (Tvrtko) Cc: John Harrison Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210721215101.139794-18-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 3 +++ drivers/gpu/drm/i915/i915_trace.h | 23 +++++++++++++++++++---- 2 files changed, 22 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index e6e5364beb1c..d47a8358c831 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -418,6 +418,7 @@ resubmit: guc->stalled_request = last; return false; } + trace_i915_request_guc_submit(last); } guc->stalled_request = NULL; @@ -638,6 +639,8 @@ static int guc_bypass_tasklet_submit(struct intel_guc *guc, ret = guc_add_request(guc, rq); if (ret == -EBUSY) guc->stalled_request = rq; + else + trace_i915_request_guc_submit(rq); return ret; } diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 6778ad2a14a4..478f5427531d 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -794,30 +794,40 @@ DECLARE_EVENT_CLASS(i915_request, TP_STRUCT__entry( __field(u32, dev) __field(u64, ctx) + __field(u32, guc_id) __field(u16, class) __field(u16, instance) __field(u32, seqno) + __field(u32, tail) ), TP_fast_assign( __entry->dev = rq->engine->i915->drm.primary->index; __entry->class = rq->engine->uabi_class; __entry->instance = rq->engine->uabi_instance; + __entry->guc_id = rq->context->guc_id; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; + __entry->tail = rq->tail; ), - TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u", + TP_printk("dev=%u, engine=%u:%u, guc_id=%u, ctx=%llu, seqno=%u, tail=%u", __entry->dev, __entry->class, __entry->instance, - __entry->ctx, __entry->seqno) + __entry->guc_id, __entry->ctx, __entry->seqno, + __entry->tail) ); DEFINE_EVENT(i915_request, i915_request_add, - TP_PROTO(struct i915_request *rq), - TP_ARGS(rq) + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq) ); #if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) +DEFINE_EVENT(i915_request, i915_request_guc_submit, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq) +); + DEFINE_EVENT(i915_request, i915_request_submit, TP_PROTO(struct i915_request *rq), TP_ARGS(rq) @@ -887,6 +897,11 @@ TRACE_EVENT(i915_request_out, #else #if !defined(TRACE_HEADER_MULTI_READ) +static inline void +trace_i915_request_guc_submit(struct i915_request *rq) +{ +} + static inline void trace_i915_request_submit(struct i915_request *rq) { -- cgit v1.2.3 From e03b59064be4665a44963c1034246ab7ca39151a Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 21 Jul 2021 14:51:01 -0700 Subject: drm/i915: Add intel_context tracing Add intel_context tracing. These trace points are particular helpful when debugging the GuC firmware and can be enabled via CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS kernel config option. Cc: John Harrison Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210721215101.139794-19-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_context.c | 6 + drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 14 +++ drivers/gpu/drm/i915/i915_trace.h | 145 ++++++++++++++++++++++ 3 files changed, 165 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index cba2521d26f4..baa05fddd690 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -8,6 +8,7 @@ #include "i915_drv.h" #include "i915_globals.h" +#include "i915_trace.h" #include "intel_context.h" #include "intel_engine.h" @@ -28,6 +29,7 @@ static void rcu_context_free(struct rcu_head *rcu) { struct intel_context *ce = container_of(rcu, typeof(*ce), rcu); + trace_intel_context_free(ce); kmem_cache_free(global.slab_ce, ce); } @@ -46,6 +48,7 @@ intel_context_create(struct intel_engine_cs *engine) return ERR_PTR(-ENOMEM); intel_context_init(ce, engine); + trace_intel_context_create(ce); return ce; } @@ -268,6 +271,8 @@ int __intel_context_do_pin_ww(struct intel_context *ce, GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */ + trace_intel_context_do_pin(ce); + err_unlock: mutex_unlock(&ce->pin_mutex); err_post_unpin: @@ -323,6 +328,7 @@ void __intel_context_do_unpin(struct intel_context *ce, int sub) */ intel_context_get(ce); intel_context_active_release(ce); + trace_intel_context_do_unpin(ce); intel_context_put(ce); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index d47a8358c831..26aadad10b12 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -344,6 +344,7 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) err = intel_guc_send_nb(guc, action, len, g2h_len_dw); if (!enabled && !err) { + trace_intel_context_sched_enable(ce); atomic_inc(&guc->outstanding_submission_g2h); set_context_enabled(ce); } else if (!enabled) { @@ -815,6 +816,8 @@ static int register_context(struct intel_context *ce) u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) + ce->guc_id * sizeof(struct guc_lrc_desc); + trace_intel_context_register(ce); + return __guc_action_register_context(guc, ce->guc_id, offset); } @@ -835,6 +838,8 @@ static int deregister_context(struct intel_context *ce, u32 guc_id) { struct intel_guc *guc = ce_to_guc(ce); + trace_intel_context_deregister(ce); + return __guc_action_deregister_context(guc, guc_id); } @@ -908,6 +913,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce) * registering this context. */ if (context_registered) { + trace_intel_context_steal_guc_id(ce); set_context_wait_for_deregister_to_register(ce); intel_context_get(ce); @@ -971,6 +977,7 @@ static void __guc_context_sched_disable(struct intel_guc *guc, GEM_BUG_ON(guc_id == GUC_INVALID_LRC_ID); + trace_intel_context_sched_disable(ce); intel_context_get(ce); guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), @@ -1133,6 +1140,9 @@ static void __guc_signal_context_fence(struct intel_context *ce) lockdep_assert_held(&ce->guc_state.lock); + if (!list_empty(&ce->guc_state.fences)) + trace_intel_context_fence_release(ce); + list_for_each_entry(rq, &ce->guc_state.fences, guc_fence_link) i915_sw_fence_complete(&rq->submit); @@ -1538,6 +1548,8 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc, if (unlikely(!ce)) return -EPROTO; + trace_intel_context_deregister_done(ce); + if (context_wait_for_deregister_to_register(ce)) { struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; @@ -1589,6 +1601,8 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, return -EPROTO; } + trace_intel_context_sched_done(ce); + if (context_pending_enable(ce)) { clr_context_pending_enable(ce); } else if (context_pending_disable(ce)) { diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 478f5427531d..68b70626c3e2 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -895,6 +895,91 @@ TRACE_EVENT(i915_request_out, __entry->ctx, __entry->seqno, __entry->completed) ); +DECLARE_EVENT_CLASS(intel_context, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce), + + TP_STRUCT__entry( + __field(u32, guc_id) + __field(int, pin_count) + __field(u32, sched_state) + __field(u32, guc_sched_state_no_lock) + ), + + TP_fast_assign( + __entry->guc_id = ce->guc_id; + __entry->pin_count = atomic_read(&ce->pin_count); + __entry->sched_state = ce->guc_state.sched_state; + __entry->guc_sched_state_no_lock = + atomic_read(&ce->guc_sched_state_no_lock); + ), + + TP_printk("guc_id=%d, pin_count=%d sched_state=0x%x,0x%x", + __entry->guc_id, __entry->pin_count, + __entry->sched_state, + __entry->guc_sched_state_no_lock) +); + +DEFINE_EVENT(intel_context, intel_context_register, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_deregister, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_deregister_done, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_sched_enable, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_sched_disable, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_sched_done, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_create, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_fence_release, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_free, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_steal_guc_id, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_do_pin, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_do_unpin, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + #else #if !defined(TRACE_HEADER_MULTI_READ) static inline void @@ -921,6 +1006,66 @@ static inline void trace_i915_request_out(struct i915_request *rq) { } + +static inline void +trace_intel_context_register(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_deregister(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_deregister_done(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_sched_enable(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_sched_disable(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_sched_done(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_create(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_fence_release(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_free(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_steal_guc_id(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_do_pin(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_do_unpin(struct intel_context *ce) +{ +} #endif #endif -- cgit v1.2.3 From 7894375e27039ab0c4da147ee294209f411c9e28 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 20 Jul 2021 16:20:11 -0700 Subject: drm/i915/gt: fix platform prefix gen8_clear_engine_error_register() is actually not used by GRAPHICS_VER >= 8, since for those we are using another register that is not engine-dependent. Fix the platform prefix, to make clear we are not using any GEN6_RING_FAULT_REG_* one GRAPHICS_VER >= 8. Signed-off-by: Lucas De Marchi Reviewed-by: Tvrtko Ursulin Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210720232014.3302645-2-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index acfdd53b2678..46441607d18b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -205,7 +205,7 @@ static void clear_register(struct intel_uncore *uncore, i915_reg_t reg) intel_uncore_rmw(uncore, reg, 0, 0); } -static void gen8_clear_engine_error_register(struct intel_engine_cs *engine) +static void gen6_clear_engine_error_register(struct intel_engine_cs *engine) { GEN6_RING_FAULT_REG_RMW(engine, RING_FAULT_VALID, 0); GEN6_RING_FAULT_REG_POSTING_READ(engine); @@ -251,7 +251,7 @@ intel_gt_clear_error_registers(struct intel_gt *gt, enum intel_engine_id id; for_each_engine_masked(engine, gt, engine_mask, id) - gen8_clear_engine_error_register(engine); + gen6_clear_engine_error_register(engine); } } -- cgit v1.2.3 From f9be30003fb372387565dcb9789b286f51e6e808 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 20 Jul 2021 16:20:12 -0700 Subject: drm/i915/gt: nuke unused legacy engine hw_id The engine hw_id is only used by RING_FAULT_REG(), which is not used by GRAPHICS_VER >= 8. We did use hw_id on recent platforms to set the engine's guc_id, but that is not the case anymore since commit c784e5249e77 ("drm/i915/guc: Update to use firmware v49.0.1"): now we only use class and id information to generate guc_id. We tend to keep adding new defines just to be consistent, but let's try to remove them and let them defined to 0 for engines that only exist on gen8+ platforms. v2: Reword commit message and add information about when we stopped using hw_id (Matt Roper) Signed-off-by: Lucas De Marchi Reviewed-by: Tvrtko Ursulin Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210720232014.3302645-3-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 4 ---- drivers/gpu/drm/i915/gt/intel_engine_types.h | 4 ---- 2 files changed, 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index c1c96ced2a4b..f720a864142b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -80,7 +80,6 @@ static const struct engine_info intel_engines[] = { }, }, [VCS1] = { - .hw_id = VCS1_HW, .class = VIDEO_DECODE_CLASS, .instance = 1, .mmio_bases = { @@ -89,7 +88,6 @@ static const struct engine_info intel_engines[] = { }, }, [VCS2] = { - .hw_id = VCS2_HW, .class = VIDEO_DECODE_CLASS, .instance = 2, .mmio_bases = { @@ -97,7 +95,6 @@ static const struct engine_info intel_engines[] = { }, }, [VCS3] = { - .hw_id = VCS3_HW, .class = VIDEO_DECODE_CLASS, .instance = 3, .mmio_bases = { @@ -114,7 +111,6 @@ static const struct engine_info intel_engines[] = { }, }, [VECS1] = { - .hw_id = VECS1_HW, .class = VIDEO_ENHANCEMENT_CLASS, .instance = 1, .mmio_bases = { diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 1cb9c3b70b29..a107eb58ffa2 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -34,10 +34,6 @@ #define VCS0_HW 1 #define BCS0_HW 2 #define VECS0_HW 3 -#define VCS1_HW 4 -#define VCS2_HW 6 -#define VCS3_HW 7 -#define VECS1_HW 12 /* Gen11+ HW Engine class + instance */ #define RENDER_CLASS 0 -- cgit v1.2.3 From 265b5ee0d32bbb3439bfcce8a7b60ec2f4c0acc5 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 20 Jul 2021 16:20:13 -0700 Subject: drm/i915/gt: rename legacy engine->hw_id to engine->gen6_hw_id We kept adding new engines and for that increasing hw_id unnecessarily: it's not used since GRAPHICS_VER == 8. Prepend "gen6" to the field and try to pack it in the structs to give a hint this field is actually not used in recent platforms. Signed-off-by: Lucas De Marchi Reviewed-by: Tvrtko Ursulin Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210720232014.3302645-4-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 12 ++++++------ drivers/gpu/drm/i915/gt/intel_engine_types.h | 2 +- drivers/gpu/drm/i915/i915_reg.h | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index f720a864142b..9c8cba1c6cd1 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -42,7 +42,7 @@ #define MAX_MMIO_BASES 3 struct engine_info { - unsigned int hw_id; + u8 gen6_hw_id; u8 class; u8 instance; /* mmio bases table *must* be sorted in reverse graphics_ver order */ @@ -54,7 +54,7 @@ struct engine_info { static const struct engine_info intel_engines[] = { [RCS0] = { - .hw_id = RCS0_HW, + .gen6_hw_id = RCS0_HW, .class = RENDER_CLASS, .instance = 0, .mmio_bases = { @@ -62,7 +62,7 @@ static const struct engine_info intel_engines[] = { }, }, [BCS0] = { - .hw_id = BCS0_HW, + .gen6_hw_id = BCS0_HW, .class = COPY_ENGINE_CLASS, .instance = 0, .mmio_bases = { @@ -70,7 +70,7 @@ static const struct engine_info intel_engines[] = { }, }, [VCS0] = { - .hw_id = VCS0_HW, + .gen6_hw_id = VCS0_HW, .class = VIDEO_DECODE_CLASS, .instance = 0, .mmio_bases = { @@ -102,7 +102,7 @@ static const struct engine_info intel_engines[] = { }, }, [VECS0] = { - .hw_id = VECS0_HW, + .gen6_hw_id = VECS0_HW, .class = VIDEO_ENHANCEMENT_CLASS, .instance = 0, .mmio_bases = { @@ -290,7 +290,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->i915 = i915; engine->gt = gt; engine->uncore = gt->uncore; - engine->hw_id = info->hw_id; + engine->gen6_hw_id = info->gen6_hw_id; guc_class = engine_class_to_guc_class(info->class); engine->guc_id = MAKE_GUC_ID(guc_class, info->instance); engine->mmio_base = __engine_mmio_base(i915, info->mmio_bases); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index a107eb58ffa2..266422d8d1b1 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -264,11 +264,11 @@ struct intel_engine_cs { enum intel_engine_id id; enum intel_engine_id legacy_idx; - unsigned int hw_id; unsigned int guc_id; intel_engine_mask_t mask; + u8 gen6_hw_id; u8 class; u8 instance; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 1022400dd1bd..6a2ae109f611 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2572,7 +2572,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define ARB_MODE_BWGTLB_DISABLE (1 << 9) #define ARB_MODE_SWIZZLE_BDW (1 << 1) #define RENDER_HWS_PGA_GEN7 _MMIO(0x04080) -#define RING_FAULT_REG(engine) _MMIO(0x4094 + 0x100 * (engine)->hw_id) +#define RING_FAULT_REG(engine) _MMIO(0x4094 + 0x100 * (engine)->gen6_hw_id) #define GEN8_RING_FAULT_REG _MMIO(0x4094) #define GEN12_RING_FAULT_REG _MMIO(0xcec4) #define GEN8_RING_FAULT_ENGINE_ID(x) (((x) >> 12) & 0x7) -- cgit v1.2.3 From eea97e42f48bff0706b620730799b5057c9caf90 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 21 Jul 2021 15:30:30 -0700 Subject: drm/i915/xehp: VDBOX/VEBOX fusing registers are enable-based On Xe_HP the fusing register is renamed and changed to have the "enable" semantics, but otherwise remains compatible (mmio address, bitmask ranges) with older platforms. To simplify things we do not add a new register definition but just stop inverting the fusing masks before processing them. Bspec: 52615 Cc: Daniele Ceraolo Spurio Signed-off-by: Tvrtko Ursulin Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20210721223043.834562-6-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 9c8cba1c6cd1..4168b9fc59e1 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -486,7 +486,14 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) if (GRAPHICS_VER(i915) < 11) return info->engine_mask; - media_fuse = ~intel_uncore_read(uncore, GEN11_GT_VEBOX_VDBOX_DISABLE); + /* + * On newer platforms the fusing register is called 'enable' and has + * enable semantics, while on older platforms it is called 'disable' + * and bits have disable semantices. + */ + media_fuse = intel_uncore_read(uncore, GEN11_GT_VEBOX_VDBOX_DISABLE); + if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) + media_fuse = ~media_fuse; vdbox_mask = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK; vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >> -- cgit v1.2.3 From 81340cf3bddded4fe23a55148152e6d5e2460351 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 15 Jul 2021 11:15:36 +0100 Subject: drm/i915/uapi: reject set_domain for discrete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CPU domain should be static for discrete, and on DG1 we don't need any flushing since everything is already coherent, so really all this does is an object wait, for which we have an ioctl. Longer term the desired caching should be an immutable creation time property for the BO, which can be set with something like gem_create_ext. One other user is iris + userptr, which uses the set_domain to probe all the pages to check if the GUP succeeds, however we now have a PROBE flag for this purpose. v2: add some more kernel doc, also add the implicit rules with caching Suggested-by: Daniel Vetter Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Jordan Justen Cc: Kenneth Graunke Cc: Jason Ekstrand Cc: Daniel Vetter Cc: Ramalingam C Reviewed-by: Ramalingam C Acked-by: Kenneth Graunke Link: https://patchwork.freedesktop.org/patch/msgid/20210715101536.2606307-5-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++ include/uapi/drm/i915_drm.h | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 43004bef55cb..b684a62bf3b0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -490,6 +490,9 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, u32 write_domain = args->write_domain; int err; + if (IS_DGFX(to_i915(dev))) + return -ENODEV; + /* Only handle setting domains to types used by the CPU. */ if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) return -EINVAL; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 0aea82657cdc..975087553ea0 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -923,6 +923,25 @@ struct drm_i915_gem_mmap_offset { * - I915_GEM_DOMAIN_GTT: Mappable aperture domain * * All other domains are rejected. + * + * Note that for discrete, starting from DG1, this is no longer supported, and + * is instead rejected. On such platforms the CPU domain is effectively static, + * where we also only support a single &drm_i915_gem_mmap_offset cache mode, + * which can't be set explicitly and instead depends on the object placements, + * as per the below. + * + * Implicit caching rules, starting from DG1: + * + * - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions) + * contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and + * mapped as write-combined only. + * + * - Everything else is always allocated and mapped as write-back, with the + * guarantee that everything is also coherent with the GPU. + * + * Note that this is likely to change in the future again, where we might need + * more flexibility on future devices, so making this all explicit as part of a + * new &drm_i915_gem_create_ext extension is probable. */ struct drm_i915_gem_set_domain { /** @handle: Handle for the object. */ -- cgit v1.2.3 From 0b03d93fde21d030faf9c4b6c888ad80cfcd0d4b Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Thu, 22 Jul 2021 12:20:41 -0700 Subject: drm/i915: Extend Wa_1406941453 to adl-p MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Workaround also needed for alderlake-P. HSDES: 14010801662 Cc: Matt Roper Signed-off-by: José Roberto de Souza Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210722192041.92346-1-jose.souza@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 247f0331ebee..b84d1d816898 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1583,8 +1583,8 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) } if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) || - IS_ALDERLAKE_S(i915)) { - /* Wa_1406941453:tgl,rkl,dg1,adl-s */ + IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) { + /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */ wa_masked_en(wal, GEN10_SAMPLER_MODE, ENABLE_SMALLPL); -- cgit v1.2.3 From d8905ba705ab526a0979541e39e971173c31de1b Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 13 Jul 2021 20:15:00 -0700 Subject: drm/i915/xehp: Define multicast register ranges MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since we can't steer multicast register reads during ring-based workaround verification, we need to define the multicast ranges where failure to steer could potentially cause us to read back from a fused-off register instance. As with gen12, we can ignore the multicast ranges that the bspec describes as 'SQIDI' since all instances of those registers will always be present and we'll always be able to read back a workaround value that was written with multicast. Bspec: 66534 Cc: José Roberto de Souza Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20210714031540.3539704-11-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index b84d1d816898..3aa5ce3cda8b 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1984,12 +1984,30 @@ static const struct mcr_range mcr_ranges_gen12[] = { {}, }; +static const struct mcr_range mcr_ranges_xehp[] = { + { .start = 0x4000, .end = 0x4aff }, + { .start = 0x5200, .end = 0x52ff }, + { .start = 0x5400, .end = 0x7fff }, + { .start = 0x8140, .end = 0x815f }, + { .start = 0x8c80, .end = 0x8dff }, + { .start = 0x94d0, .end = 0x955f }, + { .start = 0x9680, .end = 0x96ff }, + { .start = 0xb000, .end = 0xb3ff }, + { .start = 0xc800, .end = 0xcfff }, + { .start = 0xd800, .end = 0xd8ff }, + { .start = 0xdc00, .end = 0xffff }, + { .start = 0x17000, .end = 0x17fff }, + { .start = 0x24a00, .end = 0x24a7f }, +}; + static bool mcr_range(struct drm_i915_private *i915, u32 offset) { const struct mcr_range *mcr_ranges; int i; - if (GRAPHICS_VER(i915) >= 12) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) + mcr_ranges = mcr_ranges_xehp; + else if (GRAPHICS_VER(i915) >= 12) mcr_ranges = mcr_ranges_gen12; else if (GRAPHICS_VER(i915) >= 8) mcr_ranges = mcr_ranges_gen8; -- cgit v1.2.3 From 938c778f6a22fa1251fe48f175006404f18fb8f3 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 23 Jul 2021 12:10:24 -0700 Subject: drm/i915/xehp: Extra media engines - Part 1 (engine definitions) Xe_HP can have a lot of extra media engines. This patch adds the basic definitions for them. v2: - Re-order intel_gt_info and intel_device_info slightly to avoid unnecessary padding now that we've increased the size of intel_engine_mask_t. (Tvrtko) v3: - Drop the .hw_id assignments. (Lucas) v4: - Fix graphics_ver typo for VCS4 (should be 12, not 11). (Lucas) Cc: Tvrtko Ursulin Cc: Lucas De Marchi Signed-off-by: John Harrison Signed-off-by: Tomas Winkler Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20210723191024.1553405-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 7 ++--- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 44 ++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 14 ++++++--- drivers/gpu/drm/i915/gt/intel_gt_types.h | 5 ++-- drivers/gpu/drm/i915/i915_pci.c | 5 ++-- drivers/gpu/drm/i915/i915_reg.h | 6 ++++ drivers/gpu/drm/i915/intel_device_info.h | 3 +- 7 files changed, 71 insertions(+), 13 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index b29eb9fd0009..461844dffd7e 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -279,7 +279,7 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) if (mode & EMIT_INVALIDATE) aux_inv = rq->engine->mask & ~BIT(BCS0); if (aux_inv) - cmd += 2 * hweight8(aux_inv) + 2; + cmd += 2 * hweight32(aux_inv) + 2; cs = intel_ring_begin(rq, cmd); if (IS_ERR(cs)) @@ -313,9 +313,8 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) struct intel_engine_cs *engine; unsigned int tmp; - *cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv)); - for_each_engine_masked(engine, rq->engine->gt, - aux_inv, tmp) { + *cs++ = MI_LOAD_REGISTER_IMM(hweight32(aux_inv)); + for_each_engine_masked(engine, rq->engine->gt, aux_inv, tmp) { *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine)); *cs++ = AUX_INV; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 4168b9fc59e1..67c61908bc82 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -101,6 +101,34 @@ static const struct engine_info intel_engines[] = { { .graphics_ver = 11, .base = GEN11_BSD4_RING_BASE } }, }, + [VCS4] = { + .class = VIDEO_DECODE_CLASS, + .instance = 4, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_BSD5_RING_BASE } + }, + }, + [VCS5] = { + .class = VIDEO_DECODE_CLASS, + .instance = 5, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_BSD6_RING_BASE } + }, + }, + [VCS6] = { + .class = VIDEO_DECODE_CLASS, + .instance = 6, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_BSD7_RING_BASE } + }, + }, + [VCS7] = { + .class = VIDEO_DECODE_CLASS, + .instance = 7, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_BSD8_RING_BASE } + }, + }, [VECS0] = { .gen6_hw_id = VECS0_HW, .class = VIDEO_ENHANCEMENT_CLASS, @@ -117,6 +145,20 @@ static const struct engine_info intel_engines[] = { { .graphics_ver = 11, .base = GEN11_VEBOX2_RING_BASE } }, }, + [VECS2] = { + .class = VIDEO_ENHANCEMENT_CLASS, + .instance = 2, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_VEBOX3_RING_BASE } + }, + }, + [VECS3] = { + .class = VIDEO_ENHANCEMENT_CLASS, + .instance = 3, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_VEBOX4_RING_BASE } + }, + }, }; /** @@ -265,6 +307,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH)); BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH)); + BUILD_BUG_ON(I915_MAX_VCS > (MAX_ENGINE_INSTANCE + 1)); + BUILD_BUG_ON(I915_MAX_VECS > (MAX_ENGINE_INSTANCE + 1)); if (GEM_DEBUG_WARN_ON(id >= ARRAY_SIZE(gt->engine))) return -EINVAL; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 266422d8d1b1..8f1f2f12d6f5 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -42,7 +42,7 @@ #define COPY_ENGINE_CLASS 3 #define OTHER_CLASS 4 #define MAX_ENGINE_CLASS 4 -#define MAX_ENGINE_INSTANCE 3 +#define MAX_ENGINE_INSTANCE 7 #define I915_MAX_SLICES 3 #define I915_MAX_SUBSLICES 8 @@ -60,7 +60,7 @@ struct intel_gt; struct intel_ring; struct intel_uncore; -typedef u8 intel_engine_mask_t; +typedef u32 intel_engine_mask_t; #define ALL_ENGINES ((intel_engine_mask_t)~0ul) struct intel_hw_status_page { @@ -97,8 +97,8 @@ struct i915_ctx_workarounds { struct i915_vma *vma; }; -#define I915_MAX_VCS 4 -#define I915_MAX_VECS 2 +#define I915_MAX_VCS 8 +#define I915_MAX_VECS 4 /* * Engine IDs definitions. @@ -111,9 +111,15 @@ enum intel_engine_id { VCS1, VCS2, VCS3, + VCS4, + VCS5, + VCS6, + VCS7, #define _VCS(n) (VCS0 + (n)) VECS0, VECS1, + VECS2, + VECS3, #define _VECS(n) (VECS0 + (n)) I915_NUM_ENGINES #define INVALID_ENGINE ((enum intel_engine_id)-1) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index d93d578a4105..97a5075288d2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -174,13 +174,14 @@ struct intel_gt { struct intel_gt_info { intel_engine_mask_t engine_mask; + + u32 l3bank_mask; + u8 num_engines; /* Media engine access to SFC per instance */ u8 vdbox_sfc_access; - u32 l3bank_mask; - /* Slice/subslice/EU info */ struct sseu_dev_info sseu; } info; diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 19f665b8ffc3..94b5418a86c7 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1001,8 +1001,9 @@ static const struct intel_device_info xehpsdv_info = { .pipe_mask = 0, .platform_engine_mask = BIT(RCS0) | BIT(BCS0) | - BIT(VECS0) | BIT(VECS1) | - BIT(VCS0) | BIT(VCS1) | BIT(VCS2) | BIT(VCS3), + BIT(VECS0) | BIT(VECS1) | BIT(VECS2) | BIT(VECS3) | + BIT(VCS0) | BIT(VCS1) | BIT(VCS2) | BIT(VCS3) | + BIT(VCS4) | BIT(VCS5) | BIT(VCS6) | BIT(VCS7), .require_force_probe = 1, }; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 6a2ae109f611..42accdfa860f 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2516,9 +2516,15 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN11_BSD2_RING_BASE 0x1c4000 #define GEN11_BSD3_RING_BASE 0x1d0000 #define GEN11_BSD4_RING_BASE 0x1d4000 +#define XEHP_BSD5_RING_BASE 0x1e0000 +#define XEHP_BSD6_RING_BASE 0x1e4000 +#define XEHP_BSD7_RING_BASE 0x1f0000 +#define XEHP_BSD8_RING_BASE 0x1f4000 #define VEBOX_RING_BASE 0x1a000 #define GEN11_VEBOX_RING_BASE 0x1c8000 #define GEN11_VEBOX2_RING_BASE 0x1d8000 +#define XEHP_VEBOX3_RING_BASE 0x1e8000 +#define XEHP_VEBOX4_RING_BASE 0x1f8000 #define BLT_RING_BASE 0x22000 #define RING_TAIL(base) _MMIO((base) + 0x30) #define RING_HEAD(base) _MMIO((base) + 0x34) diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 4447b121c8c2..50ac43d4047f 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -172,7 +172,6 @@ struct intel_device_info { u8 media_ver; u8 media_rel; - u8 gt; /* GT number, 0 if undefined */ intel_engine_mask_t platform_engine_mask; /* Engines supported by the HW */ enum intel_platform platform; @@ -188,6 +187,8 @@ struct intel_device_info { u32 display_mmio_offset; + u8 gt; /* GT number, 0 if undefined */ + u8 pipe_mask; u8 cpu_transcoder_mask; -- cgit v1.2.3 From 1b16b6b696728ffb5bd1eb97bd13b6c304ccd980 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 23 Jul 2021 10:42:12 -0700 Subject: drm/i915/xehp: Extra media engines - Part 2 (interrupts) Xe_HP can have a lot of extra media engines. This patch adds the interrupt handler support for them. Cc: Tvrtko Ursulin Cc: Daniele Ceraolo Spurio Signed-off-by: John Harrison Signed-off-by: Matt Roper Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20210723174239.1551352-4-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_irq.c | 13 ++++++++++++- drivers/gpu/drm/i915/i915_reg.h | 3 +++ 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index c13462274fe8..b2de83be4d97 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -184,7 +184,13 @@ void gen11_gt_irq_reset(struct intel_gt *gt) intel_uncore_write(uncore, GEN11_BCS_RSVD_INTR_MASK, ~0); intel_uncore_write(uncore, GEN11_VCS0_VCS1_INTR_MASK, ~0); intel_uncore_write(uncore, GEN11_VCS2_VCS3_INTR_MASK, ~0); + if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5)) + intel_uncore_write(uncore, GEN12_VCS4_VCS5_INTR_MASK, ~0); + if (HAS_ENGINE(gt, VCS6) || HAS_ENGINE(gt, VCS7)) + intel_uncore_write(uncore, GEN12_VCS6_VCS7_INTR_MASK, ~0); intel_uncore_write(uncore, GEN11_VECS0_VECS1_INTR_MASK, ~0); + if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3)) + intel_uncore_write(uncore, GEN12_VECS2_VECS3_INTR_MASK, ~0); intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0); intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK, ~0); @@ -218,8 +224,13 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt) intel_uncore_write(uncore, GEN11_BCS_RSVD_INTR_MASK, ~smask); intel_uncore_write(uncore, GEN11_VCS0_VCS1_INTR_MASK, ~dmask); intel_uncore_write(uncore, GEN11_VCS2_VCS3_INTR_MASK, ~dmask); + if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5)) + intel_uncore_write(uncore, GEN12_VCS4_VCS5_INTR_MASK, ~dmask); + if (HAS_ENGINE(gt, VCS6) || HAS_ENGINE(gt, VCS7)) + intel_uncore_write(uncore, GEN12_VCS6_VCS7_INTR_MASK, ~dmask); intel_uncore_write(uncore, GEN11_VECS0_VECS1_INTR_MASK, ~dmask); - + if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3)) + intel_uncore_write(uncore, GEN12_VECS2_VECS3_INTR_MASK, ~dmask); /* * RPS interrupts will get enabled/disabled on demand when RPS itself * is enabled/disabled. diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 42accdfa860f..0b2eaf7594aa 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8079,7 +8079,10 @@ enum { #define GEN11_BCS_RSVD_INTR_MASK _MMIO(0x1900a0) #define GEN11_VCS0_VCS1_INTR_MASK _MMIO(0x1900a8) #define GEN11_VCS2_VCS3_INTR_MASK _MMIO(0x1900ac) +#define GEN12_VCS4_VCS5_INTR_MASK _MMIO(0x1900b0) +#define GEN12_VCS6_VCS7_INTR_MASK _MMIO(0x1900b4) #define GEN11_VECS0_VECS1_INTR_MASK _MMIO(0x1900d0) +#define GEN12_VECS2_VECS3_INTR_MASK _MMIO(0x1900d4) #define GEN11_GUC_SG_INTR_MASK _MMIO(0x1900e8) #define GEN11_GPM_WGBOXPERF_INTR_MASK _MMIO(0x1900ec) #define GEN11_CRYPTO_RSVD_INTR_MASK _MMIO(0x1900f0) -- cgit v1.2.3 From ddabf72176af198e450257249d946a8b317a1ac5 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 23 Jul 2021 10:42:13 -0700 Subject: drm/i915/xehp: Extra media engines - Part 3 (reset) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Xe_HP can have a lot of extra media engines. This patch adds the reset support for them. Signed-off-by: John Harrison Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20210723174239.1551352-5-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_reset.c | 6 ++++++ drivers/gpu/drm/i915/i915_reg.h | 8 ++++++++ 2 files changed, 14 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 72251638d4ea..9586613ee399 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -515,8 +515,14 @@ static int gen11_reset_engines(struct intel_gt *gt, [VCS1] = GEN11_GRDOM_MEDIA2, [VCS2] = GEN11_GRDOM_MEDIA3, [VCS3] = GEN11_GRDOM_MEDIA4, + [VCS4] = GEN11_GRDOM_MEDIA5, + [VCS5] = GEN11_GRDOM_MEDIA6, + [VCS6] = GEN11_GRDOM_MEDIA7, + [VCS7] = GEN11_GRDOM_MEDIA8, [VECS0] = GEN11_GRDOM_VECS, [VECS1] = GEN11_GRDOM_VECS2, + [VECS2] = GEN11_GRDOM_VECS3, + [VECS3] = GEN11_GRDOM_VECS4, }; struct intel_engine_cs *engine; intel_engine_mask_t tmp; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 0b2eaf7594aa..be945dc74d6f 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -395,10 +395,18 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN11_GRDOM_MEDIA2 (1 << 6) #define GEN11_GRDOM_MEDIA3 (1 << 7) #define GEN11_GRDOM_MEDIA4 (1 << 8) +#define GEN11_GRDOM_MEDIA5 (1 << 9) +#define GEN11_GRDOM_MEDIA6 (1 << 10) +#define GEN11_GRDOM_MEDIA7 (1 << 11) +#define GEN11_GRDOM_MEDIA8 (1 << 12) #define GEN11_GRDOM_VECS (1 << 13) #define GEN11_GRDOM_VECS2 (1 << 14) +#define GEN11_GRDOM_VECS3 (1 << 15) +#define GEN11_GRDOM_VECS4 (1 << 16) #define GEN11_GRDOM_SFC0 (1 << 17) #define GEN11_GRDOM_SFC1 (1 << 18) +#define GEN11_GRDOM_SFC2 (1 << 19) +#define GEN11_GRDOM_SFC3 (1 << 20) #define GEN11_VCS_SFC_RESET_BIT(instance) (GEN11_GRDOM_SFC0 << ((instance) >> 1)) #define GEN11_VECS_SFC_RESET_BIT(instance) (GEN11_GRDOM_SFC0 << (instance)) -- cgit v1.2.3 From bfac1e2b6e2d6b3fdb3d8f997187d6f1c63ea965 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 23 Jul 2021 10:42:14 -0700 Subject: drm/i915/xehp: Xe_HP forcewake support Implement Xe_HP forcewake handling. While we're at it, let's reorder to the forcewake assignment if/else ladder to match our usual driver conventions. Co-authored-by: Daniele Ceraolo Spurio Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Stuart Summers Signed-off-by: Matt Roper Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20210723174239.1551352-6-matthew.d.roper@intel.com --- .../gpu/drm/i915/gt/intel_execlists_submission.c | 4 + drivers/gpu/drm/i915/intel_uncore.c | 336 +++++++++++++++++---- drivers/gpu/drm/i915/intel_uncore.h | 14 +- drivers/gpu/drm/i915/selftests/intel_uncore.c | 2 + 4 files changed, 302 insertions(+), 54 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 87cedaeb4bf8..be99a74e6e09 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3357,6 +3357,10 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base)); execlists->ctrl_reg = uncore->regs + i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base)); + + engine->fw_domain = intel_uncore_forcewake_for_reg(engine->uncore, + RING_EXECLIST_CONTROL(engine->mmio_base), + FW_REG_WRITE); } else { execlists->submit_reg = uncore->regs + i915_mmio_reg_offset(RING_ELSP(base)); diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index d067524f9162..676b0052f01e 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -24,6 +24,8 @@ #include #include +#include "gt/intel_lrc_reg.h" /* for shadow reg list */ + #include "i915_drv.h" #include "i915_trace.h" #include "i915_vgpu.h" @@ -68,8 +70,14 @@ static const char * const forcewake_domain_names[] = { "vdbox1", "vdbox2", "vdbox3", + "vdbox4", + "vdbox5", + "vdbox6", + "vdbox7", "vebox0", "vebox1", + "vebox2", + "vebox3", }; const char * @@ -952,30 +960,80 @@ static const i915_reg_t gen8_shadowed_regs[] = { }; static const i915_reg_t gen11_shadowed_regs[] = { - RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ - GEN6_RPNSWREQ, /* 0xA008 */ - GEN6_RC_VIDEO_FREQ, /* 0xA00C */ - RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ - RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ - RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ - RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ - RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ - RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ - RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ + RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ + RING_EXECLIST_CONTROL(RENDER_RING_BASE), /* 0x2550 */ + GEN6_RPNSWREQ, /* 0xA008 */ + GEN6_RC_VIDEO_FREQ, /* 0xA00C */ + RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ + RING_EXECLIST_CONTROL(BLT_RING_BASE), /* 0x22550 */ + RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD_RING_BASE), /* 0x1C0550 */ + RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD2_RING_BASE), /* 0x1C4550 */ + RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ + RING_EXECLIST_CONTROL(GEN11_VEBOX_RING_BASE), /* 0x1C8550 */ + RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD3_RING_BASE), /* 0x1D0550 */ + RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD4_RING_BASE), /* 0x1D4550 */ + RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ + RING_EXECLIST_CONTROL(GEN11_VEBOX2_RING_BASE), /* 0x1D8550 */ /* TODO: Other registers are not yet used */ }; static const i915_reg_t gen12_shadowed_regs[] = { - RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ - GEN6_RPNSWREQ, /* 0xA008 */ - GEN6_RC_VIDEO_FREQ, /* 0xA00C */ - RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ - RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ - RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ - RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ - RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ - RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ - RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ + RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ + RING_EXECLIST_CONTROL(RENDER_RING_BASE), /* 0x2550 */ + GEN6_RPNSWREQ, /* 0xA008 */ + GEN6_RC_VIDEO_FREQ, /* 0xA00C */ + RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ + RING_EXECLIST_CONTROL(BLT_RING_BASE), /* 0x22550 */ + RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD_RING_BASE), /* 0x1C0550 */ + RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD2_RING_BASE), /* 0x1C4550 */ + RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ + RING_EXECLIST_CONTROL(GEN11_VEBOX_RING_BASE), /* 0x1C8550 */ + RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD3_RING_BASE), /* 0x1D0550 */ + RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD4_RING_BASE), /* 0x1D4550 */ + RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ + RING_EXECLIST_CONTROL(GEN11_VEBOX2_RING_BASE), /* 0x1D8550 */ + /* TODO: Other registers are not yet used */ +}; + +static const i915_reg_t xehp_shadowed_regs[] = { + RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ + RING_EXECLIST_CONTROL(RENDER_RING_BASE), /* 0x2550 */ + GEN6_RPNSWREQ, /* 0xA008 */ + GEN6_RC_VIDEO_FREQ, /* 0xA00C */ + RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ + RING_EXECLIST_CONTROL(BLT_RING_BASE), /* 0x22550 */ + RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD_RING_BASE), /* 0x1C0550 */ + RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD2_RING_BASE), /* 0x1C4550 */ + RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ + RING_EXECLIST_CONTROL(GEN11_VEBOX_RING_BASE), /* 0x1C8550 */ + RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD3_RING_BASE), /* 0x1D0550 */ + RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD4_RING_BASE), /* 0x1D4550 */ + RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ + RING_EXECLIST_CONTROL(GEN11_VEBOX2_RING_BASE), /* 0x1D8550 */ + RING_TAIL(XEHP_BSD5_RING_BASE), /* 0x1E0000 (base) */ + RING_EXECLIST_CONTROL(XEHP_BSD5_RING_BASE), /* 0x1E0550 */ + RING_TAIL(XEHP_BSD6_RING_BASE), /* 0x1E4000 (base) */ + RING_EXECLIST_CONTROL(XEHP_BSD6_RING_BASE), /* 0x1E4550 */ + RING_TAIL(XEHP_VEBOX3_RING_BASE), /* 0x1E8000 (base) */ + RING_EXECLIST_CONTROL(XEHP_VEBOX3_RING_BASE), /* 0x1E8550 */ + RING_TAIL(XEHP_BSD7_RING_BASE), /* 0x1F0000 (base) */ + RING_EXECLIST_CONTROL(XEHP_BSD7_RING_BASE), /* 0x1F0550 */ + RING_TAIL(XEHP_BSD8_RING_BASE), /* 0x1F4000 (base) */ + RING_EXECLIST_CONTROL(XEHP_BSD8_RING_BASE), /* 0x1F4550 */ + RING_TAIL(XEHP_VEBOX4_RING_BASE), /* 0x1F8000 (base) */ + RING_EXECLIST_CONTROL(XEHP_VEBOX4_RING_BASE), /* 0x1F8550 */ /* TODO: Other registers are not yet used */ }; @@ -991,17 +1049,18 @@ static int mmio_reg_cmp(u32 key, const i915_reg_t *reg) return 0; } -#define __is_genX_shadowed(x) \ -static bool is_gen##x##_shadowed(u32 offset) \ +#define __is_X_shadowed(x) \ +static bool is_##x##_shadowed(u32 offset) \ { \ - const i915_reg_t *regs = gen##x##_shadowed_regs; \ - return BSEARCH(offset, regs, ARRAY_SIZE(gen##x##_shadowed_regs), \ + const i915_reg_t *regs = x##_shadowed_regs; \ + return BSEARCH(offset, regs, ARRAY_SIZE(x##_shadowed_regs), \ mmio_reg_cmp); \ } -__is_genX_shadowed(8) -__is_genX_shadowed(11) -__is_genX_shadowed(12) +__is_X_shadowed(gen8) +__is_X_shadowed(gen11) +__is_X_shadowed(gen12) +__is_X_shadowed(xehp) static enum forcewake_domains gen6_reg_write_fw_domains(struct intel_uncore *uncore, i915_reg_t reg) @@ -1065,6 +1124,15 @@ static const struct intel_forcewake_range __chv_fw_ranges[] = { __fwd; \ }) +#define __xehp_fwtable_reg_write_fw_domains(uncore, offset) \ +({ \ + enum forcewake_domains __fwd = 0; \ + const u32 __offset = (offset); \ + if (!is_xehp_shadowed(__offset)) \ + __fwd = find_fw_domain(uncore, __offset); \ + __fwd; \ +}) + /* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ static const struct intel_forcewake_range __gen9_fw_ranges[] = { GEN_FW_RANGE(0x0, 0xaff, FORCEWAKE_GT), @@ -1249,6 +1317,145 @@ static const struct intel_forcewake_range __gen12_fw_ranges[] = { 0x1d3f00 - 0x1d3fff: VD2 */ }; +/* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ +static const struct intel_forcewake_range __xehp_fw_ranges[] = { + GEN_FW_RANGE(0x0, 0x1fff, 0), /* + 0x0 - 0xaff: reserved + 0xb00 - 0x1fff: always on */ + GEN_FW_RANGE(0x2000, 0x26ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x2700, 0x4aff, FORCEWAKE_GT), + GEN_FW_RANGE(0x4b00, 0x51ff, 0), /* + 0x4b00 - 0x4fff: reserved + 0x5000 - 0x51ff: always on */ + GEN_FW_RANGE(0x5200, 0x7fff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x8000, 0x813f, FORCEWAKE_GT), + GEN_FW_RANGE(0x8140, 0x815f, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x8160, 0x81ff, 0), /* + 0x8160 - 0x817f: reserved + 0x8180 - 0x81ff: always on */ + GEN_FW_RANGE(0x8200, 0x82ff, FORCEWAKE_GT), + GEN_FW_RANGE(0x8300, 0x84ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x8500, 0x94cf, FORCEWAKE_GT), /* + 0x8500 - 0x87ff: gt + 0x8800 - 0x8fff: reserved + 0x9000 - 0x947f: gt + 0x9480 - 0x94cf: reserved */ + GEN_FW_RANGE(0x94d0, 0x955f, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x9560, 0x97ff, 0), /* + 0x9560 - 0x95ff: always on + 0x9600 - 0x97ff: reserved */ + GEN_FW_RANGE(0x9800, 0xcfff, FORCEWAKE_GT), /* + 0x9800 - 0xb4ff: gt + 0xb500 - 0xbfff: reserved + 0xc000 - 0xcfff: gt */ + GEN_FW_RANGE(0xd000, 0xd7ff, 0), + GEN_FW_RANGE(0xd800, 0xdbff, FORCEWAKE_GT), + GEN_FW_RANGE(0xdc00, 0xdcff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0xdd00, 0xde7f, FORCEWAKE_GT), /* + 0xdd00 - 0xddff: gt + 0xde00 - 0xde7f: reserved */ + GEN_FW_RANGE(0xde80, 0xe8ff, FORCEWAKE_RENDER), /* + 0xde80 - 0xdfff: render + 0xe000 - 0xe0ff: reserved + 0xe100 - 0xe8ff: render */ + GEN_FW_RANGE(0xe900, 0xffff, FORCEWAKE_GT), /* + 0xe900 - 0xe9ff: gt + 0xea00 - 0xefff: reserved + 0xf000 - 0xffff: gt */ + GEN_FW_RANGE(0x10000, 0x13fff, 0), /* + 0x10000 - 0x11fff: reserved + 0x12000 - 0x127ff: always on + 0x12800 - 0x13fff: reserved */ + GEN_FW_RANGE(0x14000, 0x141ff, FORCEWAKE_MEDIA_VDBOX0), + GEN_FW_RANGE(0x14200, 0x143ff, FORCEWAKE_MEDIA_VDBOX2), + GEN_FW_RANGE(0x14400, 0x145ff, FORCEWAKE_MEDIA_VDBOX4), + GEN_FW_RANGE(0x14600, 0x147ff, FORCEWAKE_MEDIA_VDBOX6), + GEN_FW_RANGE(0x14800, 0x1ffff, FORCEWAKE_RENDER), /* + 0x14800 - 0x14fff: render + 0x15000 - 0x16dff: reserved + 0x16e00 - 0x1ffff: render */ + GEN_FW_RANGE(0x20000, 0x21fff, FORCEWAKE_MEDIA_VDBOX0), /* + 0x20000 - 0x20fff: VD0 + 0x21000 - 0x21fff: reserved */ + GEN_FW_RANGE(0x22000, 0x23fff, FORCEWAKE_GT), + GEN_FW_RANGE(0x24000, 0x2417f, 0), /* + 0x24000 - 0x2407f: always on + 0x24080 - 0x2417f: reserved */ + GEN_FW_RANGE(0x24180, 0x249ff, FORCEWAKE_GT), /* + 0x24180 - 0x241ff: gt + 0x24200 - 0x249ff: reserved */ + GEN_FW_RANGE(0x24a00, 0x251ff, FORCEWAKE_RENDER), /* + 0x24a00 - 0x24a7f: render + 0x24a80 - 0x251ff: reserved */ + GEN_FW_RANGE(0x25200, 0x25fff, FORCEWAKE_GT), /* + 0x25200 - 0x252ff: gt + 0x25300 - 0x25fff: reserved */ + GEN_FW_RANGE(0x26000, 0x2ffff, FORCEWAKE_RENDER), /* + 0x26000 - 0x27fff: render + 0x28000 - 0x29fff: reserved + 0x2a000 - 0x2ffff: undocumented */ + GEN_FW_RANGE(0x30000, 0x3ffff, FORCEWAKE_GT), + GEN_FW_RANGE(0x40000, 0x1bffff, 0), + GEN_FW_RANGE(0x1c0000, 0x1c3fff, FORCEWAKE_MEDIA_VDBOX0), /* + 0x1c0000 - 0x1c2bff: VD0 + 0x1c2c00 - 0x1c2cff: reserved + 0x1c2d00 - 0x1c2dff: VD0 + 0x1c2e00 - 0x1c3eff: reserved + 0x1c3f00 - 0x1c3fff: VD0 */ + GEN_FW_RANGE(0x1c4000, 0x1c7fff, FORCEWAKE_MEDIA_VDBOX1), /* + 0x1c4000 - 0x1c6bff: VD1 + 0x1c6c00 - 0x1c6cff: reserved + 0x1c6d00 - 0x1c6dff: VD1 + 0x1c6e00 - 0x1c7fff: reserved */ + GEN_FW_RANGE(0x1c8000, 0x1cbfff, FORCEWAKE_MEDIA_VEBOX0), /* + 0x1c8000 - 0x1ca0ff: VE0 + 0x1ca100 - 0x1cbfff: reserved */ + GEN_FW_RANGE(0x1cc000, 0x1ccfff, FORCEWAKE_MEDIA_VDBOX0), + GEN_FW_RANGE(0x1cd000, 0x1cdfff, FORCEWAKE_MEDIA_VDBOX2), + GEN_FW_RANGE(0x1ce000, 0x1cefff, FORCEWAKE_MEDIA_VDBOX4), + GEN_FW_RANGE(0x1cf000, 0x1cffff, FORCEWAKE_MEDIA_VDBOX6), + GEN_FW_RANGE(0x1d0000, 0x1d3fff, FORCEWAKE_MEDIA_VDBOX2), /* + 0x1d0000 - 0x1d2bff: VD2 + 0x1d2c00 - 0x1d2cff: reserved + 0x1d2d00 - 0x1d2dff: VD2 + 0x1d2e00 - 0x1d3eff: reserved + 0x1d3f00 - 0x1d3fff: VD2 */ + GEN_FW_RANGE(0x1d4000, 0x1d7fff, FORCEWAKE_MEDIA_VDBOX3), /* + 0x1d4000 - 0x1d6bff: VD3 + 0x1d6c00 - 0x1d6cff: reserved + 0x1d6d00 - 0x1d6dff: VD3 + 0x1d6e00 - 0x1d7fff: reserved */ + GEN_FW_RANGE(0x1d8000, 0x1dffff, FORCEWAKE_MEDIA_VEBOX1), /* + 0x1d8000 - 0x1da0ff: VE1 + 0x1da100 - 0x1dffff: reserved */ + GEN_FW_RANGE(0x1e0000, 0x1e3fff, FORCEWAKE_MEDIA_VDBOX4), /* + 0x1e0000 - 0x1e2bff: VD4 + 0x1e2c00 - 0x1e2cff: reserved + 0x1e2d00 - 0x1e2dff: VD4 + 0x1e2e00 - 0x1e3eff: reserved + 0x1e3f00 - 0x1e3fff: VD4 */ + GEN_FW_RANGE(0x1e4000, 0x1e7fff, FORCEWAKE_MEDIA_VDBOX5), /* + 0x1e4000 - 0x1e6bff: VD5 + 0x1e6c00 - 0x1e6cff: reserved + 0x1e6d00 - 0x1e6dff: VD5 + 0x1e6e00 - 0x1e7fff: reserved */ + GEN_FW_RANGE(0x1e8000, 0x1effff, FORCEWAKE_MEDIA_VEBOX2), /* + 0x1e8000 - 0x1ea0ff: VE2 + 0x1ea100 - 0x1effff: reserved */ + GEN_FW_RANGE(0x1f0000, 0x1f3fff, FORCEWAKE_MEDIA_VDBOX6), /* + 0x1f0000 - 0x1f2bff: VD6 + 0x1f2c00 - 0x1f2cff: reserved + 0x1f2d00 - 0x1f2dff: VD6 + 0x1f2e00 - 0x1f3eff: reserved + 0x1f3f00 - 0x1f3fff: VD6 */ + GEN_FW_RANGE(0x1f4000, 0x1f7fff, FORCEWAKE_MEDIA_VDBOX7), /* + 0x1f4000 - 0x1f6bff: VD7 + 0x1f6c00 - 0x1f6cff: reserved + 0x1f6d00 - 0x1f6dff: VD7 + 0x1f6e00 - 0x1f7fff: reserved */ + GEN_FW_RANGE(0x1f8000, 0x1fa0ff, FORCEWAKE_MEDIA_VEBOX3), +}; + static void ilk_dummy_write(struct intel_uncore *uncore) { @@ -1502,6 +1709,7 @@ __gen_write(func, 8) \ __gen_write(func, 16) \ __gen_write(func, 32) +__gen_reg_write_funcs(xehp_fwtable); __gen_reg_write_funcs(gen12_fwtable); __gen_reg_write_funcs(gen11_fwtable); __gen_reg_write_funcs(fwtable); @@ -1582,8 +1790,14 @@ static int __fw_domain_init(struct intel_uncore *uncore, BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX1 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX1)); BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX2 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX2)); BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX3 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX3)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX4 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX4)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX5 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX5)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX6 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX6)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX7 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX7)); BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX0 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX0)); BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX1 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX1)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX2 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX2)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX3 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX3)); d->mask = BIT(domain_id); @@ -1870,36 +2084,36 @@ static int uncore_forcewake_init(struct intel_uncore *uncore) return ret; forcewake_early_sanitize(uncore, 0); - if (IS_GRAPHICS_VER(i915, 6, 7)) { - ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen6); - - if (IS_VALLEYVIEW(i915)) { - ASSIGN_FW_DOMAINS_TABLE(uncore, __vlv_fw_ranges); - ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); - } else { - ASSIGN_READ_MMIO_VFUNCS(uncore, gen6); - } - } else if (GRAPHICS_VER(i915) == 8) { - if (IS_CHERRYVIEW(i915)) { - ASSIGN_FW_DOMAINS_TABLE(uncore, __chv_fw_ranges); - ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); - ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); - } else { - ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen8); - ASSIGN_READ_MMIO_VFUNCS(uncore, gen6); - } - } else if (IS_GRAPHICS_VER(i915, 9, 10)) { - ASSIGN_FW_DOMAINS_TABLE(uncore, __gen9_fw_ranges); - ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); - ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); - } else if (GRAPHICS_VER(i915) == 11) { - ASSIGN_FW_DOMAINS_TABLE(uncore, __gen11_fw_ranges); - ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen11_fwtable); + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + ASSIGN_FW_DOMAINS_TABLE(uncore, __xehp_fw_ranges); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, xehp_fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); - } else { + } else if (GRAPHICS_VER(i915) >= 12) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen12_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen12_fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, gen12_fwtable); + } else if (GRAPHICS_VER(i915) == 11) { + ASSIGN_FW_DOMAINS_TABLE(uncore, __gen11_fw_ranges); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen11_fwtable); + ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); + } else if (IS_GRAPHICS_VER(i915, 9, 10)) { + ASSIGN_FW_DOMAINS_TABLE(uncore, __gen9_fw_ranges); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); + ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); + } else if (IS_CHERRYVIEW(i915)) { + ASSIGN_FW_DOMAINS_TABLE(uncore, __chv_fw_ranges); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); + ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); + } else if (GRAPHICS_VER(i915) == 8) { + ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen8); + ASSIGN_READ_MMIO_VFUNCS(uncore, gen6); + } else if (IS_VALLEYVIEW(i915)) { + ASSIGN_FW_DOMAINS_TABLE(uncore, __vlv_fw_ranges); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen6); + ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); + } else if (IS_GRAPHICS_VER(i915, 6, 7)) { + ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen6); + ASSIGN_READ_MMIO_VFUNCS(uncore, gen6); } uncore->pmic_bus_access_nb.notifier_call = i915_pmic_bus_access_notifier; @@ -1988,6 +2202,22 @@ void intel_uncore_prune_engine_fw_domains(struct intel_uncore *uncore, if (HAS_ENGINE(gt, _VCS(i))) continue; + /* + * Starting with XeHP, the power well for an even-numbered + * VDBOX is also used for shared units within the + * media slice such as SFC. So even if the engine + * itself is fused off, we still need to initialize + * the forcewake domain if any of the other engines + * in the same media slice are present. + */ + if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 50) && i % 2 == 0) { + if ((i + 1 < I915_MAX_VCS) && HAS_ENGINE(gt, _VCS(i + 1))) + continue; + + if (HAS_ENGINE(gt, _VECS(i / 2))) + continue; + } + if (fw_domains & BIT(domain_id)) fw_domain_fini(uncore, domain_id); } diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index a18bdb57af7b..3c0b0a8b5250 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -52,8 +52,14 @@ enum forcewake_domain_id { FW_DOMAIN_ID_MEDIA_VDBOX1, FW_DOMAIN_ID_MEDIA_VDBOX2, FW_DOMAIN_ID_MEDIA_VDBOX3, + FW_DOMAIN_ID_MEDIA_VDBOX4, + FW_DOMAIN_ID_MEDIA_VDBOX5, + FW_DOMAIN_ID_MEDIA_VDBOX6, + FW_DOMAIN_ID_MEDIA_VDBOX7, FW_DOMAIN_ID_MEDIA_VEBOX0, FW_DOMAIN_ID_MEDIA_VEBOX1, + FW_DOMAIN_ID_MEDIA_VEBOX2, + FW_DOMAIN_ID_MEDIA_VEBOX3, FW_DOMAIN_ID_COUNT }; @@ -66,10 +72,16 @@ enum forcewake_domains { FORCEWAKE_MEDIA_VDBOX1 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX1), FORCEWAKE_MEDIA_VDBOX2 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX2), FORCEWAKE_MEDIA_VDBOX3 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX3), + FORCEWAKE_MEDIA_VDBOX4 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX4), + FORCEWAKE_MEDIA_VDBOX5 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX5), + FORCEWAKE_MEDIA_VDBOX6 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX6), + FORCEWAKE_MEDIA_VDBOX7 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX7), FORCEWAKE_MEDIA_VEBOX0 = BIT(FW_DOMAIN_ID_MEDIA_VEBOX0), FORCEWAKE_MEDIA_VEBOX1 = BIT(FW_DOMAIN_ID_MEDIA_VEBOX1), + FORCEWAKE_MEDIA_VEBOX2 = BIT(FW_DOMAIN_ID_MEDIA_VEBOX2), + FORCEWAKE_MEDIA_VEBOX3 = BIT(FW_DOMAIN_ID_MEDIA_VEBOX3), - FORCEWAKE_ALL = BIT(FW_DOMAIN_ID_COUNT) - 1 + FORCEWAKE_ALL = BIT(FW_DOMAIN_ID_COUNT) - 1, }; struct intel_uncore_funcs { diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index 8ef9e6a4ad05..720b60853f8b 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -68,6 +68,7 @@ static int intel_shadow_table_check(void) { gen8_shadowed_regs, ARRAY_SIZE(gen8_shadowed_regs) }, { gen11_shadowed_regs, ARRAY_SIZE(gen11_shadowed_regs) }, { gen12_shadowed_regs, ARRAY_SIZE(gen12_shadowed_regs) }, + { xehp_shadowed_regs, ARRAY_SIZE(xehp_shadowed_regs) }, }; const i915_reg_t *reg; unsigned int i, j; @@ -103,6 +104,7 @@ int intel_uncore_mock_selftests(void) { __gen9_fw_ranges, ARRAY_SIZE(__gen9_fw_ranges), true }, { __gen11_fw_ranges, ARRAY_SIZE(__gen11_fw_ranges), true }, { __gen12_fw_ranges, ARRAY_SIZE(__gen12_fw_ranges), true }, + { __xehp_fw_ranges, ARRAY_SIZE(__xehp_fw_ranges), true }, }; int err, i; -- cgit v1.2.3 From 816753c06f23773c83763cfef3ded32a387e54e1 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 22 Jul 2021 17:25:51 -0700 Subject: drm/i915/gt: nuke gen6_hw_id This is only used by GRAPHICS_VER == 6 and GRAPHICS_VER == 7. All other recent platforms do not depend on this field, so it doesn't make much sense to keep it generic like that. Instead, just do a mapping from engine class to HW ID in the single place that is needed. v2: use macros with the direct register address instead of calculating from the legacy HW_ID (Matt Roper) Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210723002551.3906535-1-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 6 ------ drivers/gpu/drm/i915/gt/intel_engine_types.h | 10 +--------- drivers/gpu/drm/i915/i915_reg.h | 11 ++++++++++- 3 files changed, 11 insertions(+), 16 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 67c61908bc82..43c3fb2aeb13 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -42,7 +42,6 @@ #define MAX_MMIO_BASES 3 struct engine_info { - u8 gen6_hw_id; u8 class; u8 instance; /* mmio bases table *must* be sorted in reverse graphics_ver order */ @@ -54,7 +53,6 @@ struct engine_info { static const struct engine_info intel_engines[] = { [RCS0] = { - .gen6_hw_id = RCS0_HW, .class = RENDER_CLASS, .instance = 0, .mmio_bases = { @@ -62,7 +60,6 @@ static const struct engine_info intel_engines[] = { }, }, [BCS0] = { - .gen6_hw_id = BCS0_HW, .class = COPY_ENGINE_CLASS, .instance = 0, .mmio_bases = { @@ -70,7 +67,6 @@ static const struct engine_info intel_engines[] = { }, }, [VCS0] = { - .gen6_hw_id = VCS0_HW, .class = VIDEO_DECODE_CLASS, .instance = 0, .mmio_bases = { @@ -130,7 +126,6 @@ static const struct engine_info intel_engines[] = { }, }, [VECS0] = { - .gen6_hw_id = VECS0_HW, .class = VIDEO_ENHANCEMENT_CLASS, .instance = 0, .mmio_bases = { @@ -334,7 +329,6 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->i915 = i915; engine->gt = gt; engine->uncore = gt->uncore; - engine->gen6_hw_id = info->gen6_hw_id; guc_class = engine_class_to_guc_class(info->class); engine->guc_id = MAKE_GUC_ID(guc_class, info->instance); engine->mmio_base = __engine_mmio_base(i915, info->mmio_bases); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 8f1f2f12d6f5..3f308a920b50 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -28,14 +28,7 @@ #include "intel_wakeref.h" #include "intel_workarounds_types.h" -/* Legacy HW Engine ID */ - -#define RCS0_HW 0 -#define VCS0_HW 1 -#define BCS0_HW 2 -#define VECS0_HW 3 - -/* Gen11+ HW Engine class + instance */ +/* HW Engine class + instance */ #define RENDER_CLASS 0 #define VIDEO_DECODE_CLASS 1 #define VIDEO_ENHANCEMENT_CLASS 2 @@ -274,7 +267,6 @@ struct intel_engine_cs { intel_engine_mask_t mask; - u8 gen6_hw_id; u8 class; u8 instance; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index be945dc74d6f..314194b419f0 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2586,7 +2586,16 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define ARB_MODE_BWGTLB_DISABLE (1 << 9) #define ARB_MODE_SWIZZLE_BDW (1 << 1) #define RENDER_HWS_PGA_GEN7 _MMIO(0x04080) -#define RING_FAULT_REG(engine) _MMIO(0x4094 + 0x100 * (engine)->gen6_hw_id) + +#define _RING_FAULT_REG_RCS 0x4094 +#define _RING_FAULT_REG_VCS 0x4194 +#define _RING_FAULT_REG_BCS 0x4294 +#define _RING_FAULT_REG_VECS 0x4394 +#define RING_FAULT_REG(engine) _MMIO(_PICK((engine)->class, \ + _RING_FAULT_REG_RCS, \ + _RING_FAULT_REG_VCS, \ + _RING_FAULT_REG_VECS, \ + _RING_FAULT_REG_BCS)) #define GEN8_RING_FAULT_REG _MMIO(0x4094) #define GEN12_RING_FAULT_REG _MMIO(0xcec4) #define GEN8_RING_FAULT_ENGINE_ID(x) (((x) >> 12) & 0x7) -- cgit v1.2.3 From f3170ba8c907e9936822ad9e0b2eee8e2281615b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 23 Jul 2021 12:21:35 -0500 Subject: drm/i915/gem: Check object_can_migrate from object_migrate We don't roll them together entirely because there are still a couple cases where we want a separate can_migrate check. For instance, the display code checks that you can migrate a buffer to LMEM before it accepts it in fb_create. The dma-buf import code also uses it to do an early check and return a different error code if someone tries to attach a LMEM-only dma-buf to another driver. However, no one actually wants to call object_migrate when can_migrate has failed. The stated intention is for self-tests but none of those actually take advantage of this unsafe migration. Signed-off-by: Jason Ekstrand Cc: Daniel Vetter Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210723172142.3273510-2-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 13 ++----------- drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c | 15 --------------- 2 files changed, 2 insertions(+), 26 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 5c21cff33199..d09bd9bdb38a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -584,12 +584,6 @@ bool i915_gem_object_can_migrate(struct drm_i915_gem_object *obj, * completed yet, and to accomplish that, i915_gem_object_wait_migration() * must be called. * - * This function is a bit more permissive than i915_gem_object_can_migrate() - * to allow for migrating objects where the caller knows exactly what is - * happening. For example within selftests. More specifically this - * function allows migrating I915_BO_ALLOC_USER objects to regions - * that are not in the list of allowable regions. - * * Note: the @ww parameter is not used yet, but included to make sure * callers put some effort into obtaining a valid ww ctx if one is * available. @@ -616,11 +610,8 @@ int i915_gem_object_migrate(struct drm_i915_gem_object *obj, if (obj->mm.region == mr) return 0; - if (!i915_gem_object_evictable(obj)) - return -EBUSY; - - if (!obj->ops->migrate) - return -EOPNOTSUPP; + if (!i915_gem_object_can_migrate(obj, id)) + return -EINVAL; return obj->ops->migrate(obj, mr); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c index 0b7144d2991c..28a700f08b49 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -61,11 +61,6 @@ static int igt_create_migrate(struct intel_gt *gt, enum intel_region_id src, if (err) continue; - if (!i915_gem_object_can_migrate(obj, dst)) { - err = -EINVAL; - continue; - } - err = i915_gem_object_migrate(obj, &ww, dst); if (err) continue; @@ -114,11 +109,6 @@ static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww, return err; if (i915_gem_object_is_lmem(obj)) { - if (!i915_gem_object_can_migrate(obj, INTEL_REGION_SMEM)) { - pr_err("object can't migrate to smem.\n"); - return -EINVAL; - } - err = i915_gem_object_migrate(obj, ww, INTEL_REGION_SMEM); if (err) { pr_err("Object failed migration to smem\n"); @@ -137,11 +127,6 @@ static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww, } } else { - if (!i915_gem_object_can_migrate(obj, INTEL_REGION_LMEM)) { - pr_err("object can't migrate to lmem.\n"); - return -EINVAL; - } - err = i915_gem_object_migrate(obj, ww, INTEL_REGION_LMEM); if (err) { pr_err("Object failed migration to lmem\n"); -- cgit v1.2.3 From 34c7ef0a375c7ccd56755b7c3c5e7874f8e49428 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 23 Jul 2021 12:21:36 -0500 Subject: drm/i915/gem: Refactor placement setup for i915_gem_object_create* (v2) Since we don't allow changing the set of regions after creation, we can make ext_set_placements() build up the region set directly in the create_ext and assign it to the object later. This is similar to what we did for contexts with the proto-context only simpler because there's no funny object shuffling. This will be used in the next patch to allow us to de-duplicate a bunch of code. Also, since we know the maximum number of regions up-front, we can use a fixed-size temporary array for the regions. This simplifies memory management a bit for this new delayed approach. v2 (Matthew Auld): - Get rid of MAX_N_PLACEMENTS - Drop kfree(placements) from set_placements() v3 (Matthew Auld): - Properly set ext_data->n_placements Signed-off-by: Jason Ekstrand Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210723172142.3273510-3-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_create.c | 82 +++++++++++++++++------------- 1 file changed, 46 insertions(+), 36 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index 51f92e4b1a69..aa687b10dcd4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -27,10 +27,13 @@ static u32 object_max_page_size(struct drm_i915_gem_object *obj) return max_page_size; } -static void object_set_placements(struct drm_i915_gem_object *obj, - struct intel_memory_region **placements, - unsigned int n_placements) +static int object_set_placements(struct drm_i915_gem_object *obj, + struct intel_memory_region **placements, + unsigned int n_placements) { + struct intel_memory_region **arr; + unsigned int i; + GEM_BUG_ON(!n_placements); /* @@ -44,9 +47,20 @@ static void object_set_placements(struct drm_i915_gem_object *obj, obj->mm.placements = &i915->mm.regions[mr->id]; obj->mm.n_placements = 1; } else { - obj->mm.placements = placements; + arr = kmalloc_array(n_placements, + sizeof(struct intel_memory_region *), + GFP_KERNEL); + if (!arr) + return -ENOMEM; + + for (i = 0; i < n_placements; i++) + arr[i] = placements[i]; + + obj->mm.placements = arr; obj->mm.n_placements = n_placements; } + + return 0; } static int i915_gem_publish(struct drm_i915_gem_object *obj, @@ -148,7 +162,9 @@ i915_gem_dumb_create(struct drm_file *file, return -ENOMEM; mr = intel_memory_region_by_type(to_i915(dev), mem_type); - object_set_placements(obj, &mr, 1); + ret = object_set_placements(obj, &mr, 1); + if (ret) + goto object_free; ret = i915_gem_setup(obj, args->size); if (ret) @@ -184,7 +200,9 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, return -ENOMEM; mr = intel_memory_region_by_type(i915, INTEL_MEMORY_SYSTEM); - object_set_placements(obj, &mr, 1); + ret = object_set_placements(obj, &mr, 1); + if (ret) + goto object_free; ret = i915_gem_setup(obj, args->size); if (ret) @@ -199,7 +217,8 @@ object_free: struct create_ext { struct drm_i915_private *i915; - struct drm_i915_gem_object *vanilla_object; + struct intel_memory_region *placements[INTEL_REGION_UNKNOWN]; + unsigned int n_placements; }; static void repr_placements(char *buf, size_t size, @@ -230,8 +249,7 @@ static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args, struct drm_i915_private *i915 = ext_data->i915; struct drm_i915_gem_memory_class_instance __user *uregions = u64_to_user_ptr(args->regions); - struct drm_i915_gem_object *obj = ext_data->vanilla_object; - struct intel_memory_region **placements; + struct intel_memory_region *placements[INTEL_REGION_UNKNOWN]; u32 mask; int i, ret = 0; @@ -245,6 +263,8 @@ static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args, ret = -EINVAL; } + BUILD_BUG_ON(ARRAY_SIZE(i915->mm.regions) != ARRAY_SIZE(placements)); + BUILD_BUG_ON(ARRAY_SIZE(ext_data->placements) != ARRAY_SIZE(placements)); if (args->num_regions > ARRAY_SIZE(i915->mm.regions)) { drm_dbg(&i915->drm, "num_regions is too large\n"); ret = -EINVAL; @@ -253,21 +273,13 @@ static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args, if (ret) return ret; - placements = kmalloc_array(args->num_regions, - sizeof(struct intel_memory_region *), - GFP_KERNEL); - if (!placements) - return -ENOMEM; - mask = 0; for (i = 0; i < args->num_regions; i++) { struct drm_i915_gem_memory_class_instance region; struct intel_memory_region *mr; - if (copy_from_user(®ion, uregions, sizeof(region))) { - ret = -EFAULT; - goto out_free; - } + if (copy_from_user(®ion, uregions, sizeof(region))) + return -EFAULT; mr = intel_memory_region_lookup(i915, region.memory_class, @@ -293,14 +305,14 @@ static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args, ++uregions; } - if (obj->mm.placements) { + if (ext_data->n_placements) { ret = -EINVAL; goto out_dump; } - object_set_placements(obj, placements, args->num_regions); - if (args->num_regions == 1) - kfree(placements); + ext_data->n_placements = args->num_regions; + for (i = 0; i < args->num_regions; i++) + ext_data->placements[i] = placements[i]; return 0; @@ -308,11 +320,11 @@ out_dump: if (1) { char buf[256]; - if (obj->mm.placements) { + if (ext_data->n_placements) { repr_placements(buf, sizeof(buf), - obj->mm.placements, - obj->mm.n_placements); + ext_data->placements, + ext_data->n_placements); drm_dbg(&i915->drm, "Placements were already set in previous EXT. Existing placements: %s\n", buf); @@ -322,8 +334,6 @@ out_dump: drm_dbg(&i915->drm, "New placements(so far validated): %s\n", buf); } -out_free: - kfree(placements); return ret; } @@ -358,7 +368,6 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_gem_create_ext *args = data; struct create_ext ext_data = { .i915 = i915 }; - struct intel_memory_region **placements_ext; struct drm_i915_gem_object *obj; int ret; @@ -371,21 +380,22 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (!obj) return -ENOMEM; - ext_data.vanilla_object = obj; ret = i915_user_extensions(u64_to_user_ptr(args->extensions), create_extensions, ARRAY_SIZE(create_extensions), &ext_data); - placements_ext = obj->mm.placements; if (ret) goto object_free; - if (!placements_ext) { - struct intel_memory_region *mr = + if (!ext_data.n_placements) { + ext_data.placements[0] = intel_memory_region_by_type(i915, INTEL_MEMORY_SYSTEM); - - object_set_placements(obj, &mr, 1); + ext_data.n_placements = 1; } + ret = object_set_placements(obj, ext_data.placements, + ext_data.n_placements); + if (ret) + goto object_free; ret = i915_gem_setup(obj, args->size); if (ret) @@ -395,7 +405,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, object_free: if (obj->mm.n_placements > 1) - kfree(placements_ext); + kfree(obj->mm.placements); i915_gem_object_free(obj); return ret; } -- cgit v1.2.3 From 82ec88e11d46e3d981e2db854fa5ab037c2c0f1f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 23 Jul 2021 12:21:37 -0500 Subject: drm/i915/gem: Call i915_gem_flush_free_objects() in i915_gem_dumb_create() This doesn't really fix anything serious since the chances of a client creating and destroying a mass of dumb BOs is pretty low. However, it is called by the other two create IOCTLs to garbage collect old objects. Call it here too for consistency. Signed-off-by: Jason Ekstrand Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210723172142.3273510-4-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_create.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index aa687b10dcd4..adcce37c04b8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -151,6 +151,8 @@ i915_gem_dumb_create(struct drm_file *file, if (args->pitch < args->width) return -EINVAL; + i915_gem_flush_free_objects(i915); + args->size = mul_u32_u32(args->pitch, args->height); mem_type = INTEL_MEMORY_SYSTEM; -- cgit v1.2.3 From bf947c989c1642d89062a03121f30760dca10c06 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 23 Jul 2021 12:21:38 -0500 Subject: drm/i915/gem: Unify user object creation (v3) Instead of hand-rolling the same three calls in each function, pull them into an i915_gem_object_create_user helper. Apart from re-ordering of the placements array ENOMEM check, there should be no functional change. v2 (Matthew Auld): - Add the call to i915_gem_flush_free_objects() from i915_gem_dumb_create() in a separate patch - Move i915_gem_object_alloc() below the simple error checks v3 (Matthew Auld): - Add __ to i915_gem_object_create_user and kerneldoc which warns the caller that it's not validating anything. Signed-off-by: Jason Ekstrand Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210723172142.3273510-5-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_create.c | 119 +++++++++++++---------------- drivers/gpu/drm/i915/gem/i915_gem_object.h | 4 + 2 files changed, 58 insertions(+), 65 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index adcce37c04b8..23fee13a3384 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -11,13 +11,14 @@ #include "i915_trace.h" #include "i915_user_extensions.h" -static u32 object_max_page_size(struct drm_i915_gem_object *obj) +static u32 object_max_page_size(struct intel_memory_region **placements, + unsigned int n_placements) { u32 max_page_size = 0; int i; - for (i = 0; i < obj->mm.n_placements; i++) { - struct intel_memory_region *mr = obj->mm.placements[i]; + for (i = 0; i < n_placements; i++) { + struct intel_memory_region *mr = placements[i]; GEM_BUG_ON(!is_power_of_2(mr->min_page_size)); max_page_size = max_t(u32, max_page_size, mr->min_page_size); @@ -81,22 +82,46 @@ static int i915_gem_publish(struct drm_i915_gem_object *obj, return 0; } -static int -i915_gem_setup(struct drm_i915_gem_object *obj, u64 size) +/** + * Creates a new object using the same path as DRM_I915_GEM_CREATE_EXT + * @i915: i915 private + * @size: size of the buffer, in bytes + * @placements: possible placement regions, in priority order + * @n_placements: number of possible placement regions + * + * This function is exposed primarily for selftests and does very little + * error checking. It is assumed that the set of placement regions has + * already been verified to be valid. + */ +struct drm_i915_gem_object * +__i915_gem_object_create_user(struct drm_i915_private *i915, u64 size, + struct intel_memory_region **placements, + unsigned int n_placements) { - struct intel_memory_region *mr = obj->mm.placements[0]; + struct intel_memory_region *mr = placements[0]; + struct drm_i915_gem_object *obj; unsigned int flags; int ret; - size = round_up(size, object_max_page_size(obj)); + i915_gem_flush_free_objects(i915); + + size = round_up(size, object_max_page_size(placements, n_placements)); if (size == 0) - return -EINVAL; + return ERR_PTR(-EINVAL); /* For most of the ABI (e.g. mmap) we think in system pages */ GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); if (i915_gem_object_size_2big(size)) - return -E2BIG; + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(); + if (!obj) + return ERR_PTR(-ENOMEM); + + ret = object_set_placements(obj, placements, n_placements); + if (ret) + goto object_free; /* * I915_BO_ALLOC_USER will make sure the object is cleared before @@ -106,12 +131,18 @@ i915_gem_setup(struct drm_i915_gem_object *obj, u64 size) ret = mr->ops->init_object(mr, obj, size, 0, flags); if (ret) - return ret; + goto object_free; GEM_BUG_ON(size != obj->base.size); trace_i915_gem_object_create(obj); - return 0; + return obj; + +object_free: + if (obj->mm.n_placements > 1) + kfree(obj->mm.placements); + i915_gem_object_free(obj); + return ERR_PTR(ret); } int @@ -124,7 +155,6 @@ i915_gem_dumb_create(struct drm_file *file, enum intel_memory_type mem_type; int cpp = DIV_ROUND_UP(args->bpp, 8); u32 format; - int ret; switch (cpp) { case 1: @@ -151,32 +181,19 @@ i915_gem_dumb_create(struct drm_file *file, if (args->pitch < args->width) return -EINVAL; - i915_gem_flush_free_objects(i915); - args->size = mul_u32_u32(args->pitch, args->height); mem_type = INTEL_MEMORY_SYSTEM; if (HAS_LMEM(to_i915(dev))) mem_type = INTEL_MEMORY_LOCAL; - obj = i915_gem_object_alloc(); - if (!obj) - return -ENOMEM; - mr = intel_memory_region_by_type(to_i915(dev), mem_type); - ret = object_set_placements(obj, &mr, 1); - if (ret) - goto object_free; - ret = i915_gem_setup(obj, args->size); - if (ret) - goto object_free; + obj = __i915_gem_object_create_user(to_i915(dev), args->size, &mr, 1); + if (IS_ERR(obj)) + return PTR_ERR(obj); return i915_gem_publish(obj, file, &args->size, &args->handle); - -object_free: - i915_gem_object_free(obj); - return ret; } /** @@ -193,28 +210,14 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_create *args = data; struct drm_i915_gem_object *obj; struct intel_memory_region *mr; - int ret; - - i915_gem_flush_free_objects(i915); - - obj = i915_gem_object_alloc(); - if (!obj) - return -ENOMEM; mr = intel_memory_region_by_type(i915, INTEL_MEMORY_SYSTEM); - ret = object_set_placements(obj, &mr, 1); - if (ret) - goto object_free; - ret = i915_gem_setup(obj, args->size); - if (ret) - goto object_free; + obj = __i915_gem_object_create_user(i915, args->size, &mr, 1); + if (IS_ERR(obj)) + return PTR_ERR(obj); return i915_gem_publish(obj, file, &args->size, &args->handle); - -object_free: - i915_gem_object_free(obj); - return ret; } struct create_ext { @@ -376,38 +379,24 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, if (args->flags) return -EINVAL; - i915_gem_flush_free_objects(i915); - - obj = i915_gem_object_alloc(); - if (!obj) - return -ENOMEM; - ret = i915_user_extensions(u64_to_user_ptr(args->extensions), create_extensions, ARRAY_SIZE(create_extensions), &ext_data); if (ret) - goto object_free; + return ret; if (!ext_data.n_placements) { ext_data.placements[0] = intel_memory_region_by_type(i915, INTEL_MEMORY_SYSTEM); ext_data.n_placements = 1; } - ret = object_set_placements(obj, ext_data.placements, - ext_data.n_placements); - if (ret) - goto object_free; - ret = i915_gem_setup(obj, args->size); - if (ret) - goto object_free; + obj = __i915_gem_object_create_user(i915, args->size, + ext_data.placements, + ext_data.n_placements); + if (IS_ERR(obj)) + return PTR_ERR(obj); return i915_gem_publish(obj, file, &args->size, &args->handle); - -object_free: - if (obj->mm.n_placements > 1) - kfree(obj->mm.placements); - i915_gem_object_free(obj); - return ret; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index f3ede43282dc..0896ac532f5e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -61,6 +61,10 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915, struct drm_i915_gem_object * i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915, const void *data, resource_size_t size); +struct drm_i915_gem_object * +__i915_gem_object_create_user(struct drm_i915_private *i915, u64 size, + struct intel_memory_region **placements, + unsigned int n_placements); extern const struct drm_i915_gem_object_ops i915_gem_shmem_ops; -- cgit v1.2.3 From 75e382850b7ea516cbeaecf2dd22dd040e144ad9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 23 Jul 2021 12:21:39 -0500 Subject: drm/i915/gem/ttm: Only call __i915_gem_object_set_pages if needed __i915_ttm_get_pages does two things. First, it calls ttm_bo_validate() to check the given placement and migrate the BO if needed. Then, it updates the GEM object to match, in case the object was migrated. If no migration occured, however, we might still have pages on the GEM object in which case we don't need to fetch them from TTM and call __i915_gem_object_set_pages. This hasn't been a problem before because the primary user of __i915_ttm_get_pages is __i915_gem_object_get_pages which only calls it if the GEM object doesn't have pages. However, i915_ttm_migrate also uses __i915_ttm_get_pages to do the migration so this meant it was unsafe to call on an already populated object. This patch checks i915_gem_object_has_pages() before trying to __i915_gem_object_set_pages so i915_ttm_migrate is safe to call, even on populated objects. Signed-off-by: Jason Ekstrand Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210723172142.3273510-6-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index f253b11e9e36..771eb2963123 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -662,13 +662,14 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, i915_ttm_adjust_gem_after_move(obj); } - GEM_WARN_ON(obj->mm.pages); - /* Object either has a page vector or is an iomem object */ - st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj->ttm.cached_io_st; - if (IS_ERR(st)) - return PTR_ERR(st); + if (!i915_gem_object_has_pages(obj)) { + /* Object either has a page vector or is an iomem object */ + st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj->ttm.cached_io_st; + if (IS_ERR(st)) + return PTR_ERR(st); - __i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl)); + __i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl)); + } return ret; } -- cgit v1.2.3 From 76b62448dc8ffc3cfcfc40603d965b710a1840e3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 23 Jul 2021 12:21:40 -0500 Subject: drm/i915/gem: Always call obj->ops->migrate unless can_migrate fails Without TTM, we have no such hook so we exit early but this is fine because we use TTM on all LMEM platforms and, on integrated platforms, there is no real migration. If we do have the hook, it's better to just let TTM handle the migration because it knows where things are actually placed. This fixes a bug where i915_gem_object_migrate fails to migrate newly created LMEM objects. In that scenario, the object has obj->mm.region set to LMEM but TTM has it in SMEM because that's where all new objects are placed there prior to getting actual pages. When we invoke i915_gem_object_migrate, it exits early because, from the point of view of the GEM object, it's already in LMEM and no migration is needed. Then, when we try to pin the pages, __i915_ttm_get_pages is called which, unaware of our failed attempt at a migration, places the object in SMEM. This only happens on newly created objects because they have this weird state where TTM thinks they're in SMEM, GEM thinks they're in LMEM, and the reality is that they don't exist at all. It's better if GEM just always calls into TTM and let's TTM handle things. That way the lies stay better contained. Once the migration is complete, the object will have pages, obj->mm.region will be correct, and we're done lying. Signed-off-by: Jason Ekstrand Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210723172142.3273510-7-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index d09bd9bdb38a..9d3497e1235a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -607,12 +607,15 @@ int i915_gem_object_migrate(struct drm_i915_gem_object *obj, mr = i915->mm.regions[id]; GEM_BUG_ON(!mr); - if (obj->mm.region == mr) - return 0; - if (!i915_gem_object_can_migrate(obj, id)) return -EINVAL; + if (!obj->ops->migrate) { + if (GEM_WARN_ON(obj->mm.region != mr)) + return -EINVAL; + return 0; + } + return obj->ops->migrate(obj, mr); } -- cgit v1.2.3 From d7b2cb380b3a67894dccf497ded604f52bc05346 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 23 Jul 2021 12:21:41 -0500 Subject: drm/i915/gem: Correct the locking and pin pattern for dma-buf (v8) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If our exported dma-bufs are imported by another instance of our driver, that instance will typically have the imported dma-bufs locked during dma_buf_map_attachment(). But the exporter also locks the same reservation object in the map_dma_buf() callback, which leads to recursive locking. So taking the lock inside _pin_pages_unlocked() is incorrect. Additionally, the current pinning code path is contrary to the defined way that pinning should occur. Remove the explicit pin/unpin from the map/umap functions and move them to the attach/detach allowing correct locking to occur, and to match the static dma-buf drm_prime pattern. Add a live selftest to exercise both dynamic and non-dynamic exports. v2: - Extend the selftest with a fake dynamic importer. - Provide real pin and unpin callbacks to not abuse the interface. v3: (ruhl) - Remove the dynamic export support and move the pinning into the attach/detach path. v4: (ruhl) - Put pages does not need to assert on the dma-resv v5: (jason) - Lock around dma_buf_unmap_attachment() when emulating a dynamic importer in the subtests. - Use pin_pages_unlocked v6: (jason) - Use dma_buf_attach instead of dma_buf_attach_dynamic in the selftests v7: (mauld) - Use __i915_gem_object_get_pages (2 __underscores) instead of the 4 ____underscore version in the selftests v8: (mauld) - Drop the kernel doc from the static i915_gem_dmabuf_attach function - Add missing "err = PTR_ERR()" to a bunch of selftest error cases Reported-by: Michael J. Ruhl Signed-off-by: Thomas Hellström Signed-off-by: Michael J. Ruhl Signed-off-by: Jason Ekstrand Reviewed-by: Jason Ekstrand Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210723172142.3273510-8-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 37 ++++--- .../gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c | 109 ++++++++++++++++++++- 2 files changed, 132 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 616c3a2f1baf..59dc56ae14d6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -12,6 +12,8 @@ #include "i915_gem_object.h" #include "i915_scatterlist.h" +I915_SELFTEST_DECLARE(static bool force_different_devices;) + static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf) { return to_intel_bo(buf->priv); @@ -25,15 +27,11 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme struct scatterlist *src, *dst; int ret, i; - ret = i915_gem_object_pin_pages_unlocked(obj); - if (ret) - goto err; - /* Copy sg so that we make an independent mapping */ st = kmalloc(sizeof(struct sg_table), GFP_KERNEL); if (st == NULL) { ret = -ENOMEM; - goto err_unpin_pages; + goto err; } ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL); @@ -58,8 +56,6 @@ err_free_sg: sg_free_table(st); err_free: kfree(st); -err_unpin_pages: - i915_gem_object_unpin_pages(obj); err: return ERR_PTR(ret); } @@ -68,13 +64,9 @@ static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment, struct sg_table *sg, enum dma_data_direction dir) { - struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf); - dma_unmap_sgtable(attachment->dev, sg, dir, DMA_ATTR_SKIP_CPU_SYNC); sg_free_table(sg); kfree(sg); - - i915_gem_object_unpin_pages(obj); } static int i915_gem_dmabuf_vmap(struct dma_buf *dma_buf, struct dma_buf_map *map) @@ -168,7 +160,25 @@ retry: return err; } +static int i915_gem_dmabuf_attach(struct dma_buf *dmabuf, + struct dma_buf_attachment *attach) +{ + struct drm_i915_gem_object *obj = dma_buf_to_obj(dmabuf); + + return i915_gem_object_pin_pages_unlocked(obj); +} + +static void i915_gem_dmabuf_detach(struct dma_buf *dmabuf, + struct dma_buf_attachment *attach) +{ + struct drm_i915_gem_object *obj = dma_buf_to_obj(dmabuf); + + i915_gem_object_unpin_pages(obj); +} + static const struct dma_buf_ops i915_dmabuf_ops = { + .attach = i915_gem_dmabuf_attach, + .detach = i915_gem_dmabuf_detach, .map_dma_buf = i915_gem_map_dma_buf, .unmap_dma_buf = i915_gem_unmap_dma_buf, .release = drm_gem_dmabuf_release, @@ -204,6 +214,8 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) struct sg_table *pages; unsigned int sg_page_sizes; + assert_object_held(obj); + pages = dma_buf_map_attachment(obj->base.import_attach, DMA_BIDIRECTIONAL); if (IS_ERR(pages)) @@ -241,7 +253,8 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, if (dma_buf->ops == &i915_dmabuf_ops) { obj = dma_buf_to_obj(dma_buf); /* is it from our device? */ - if (obj->base.dev == dev) { + if (obj->base.dev == dev && + !I915_SELFTEST_ONLY(force_different_devices)) { /* * Importing dmabuf exported from out own gem increases * refcount on gem itself instead of f_count of dmabuf. diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index dd74bc09ec88..d4ce01e6ee85 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -35,7 +35,7 @@ static int igt_dmabuf_export(void *arg) static int igt_dmabuf_import_self(void *arg) { struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; + struct drm_i915_gem_object *obj, *import_obj; struct drm_gem_object *import; struct dma_buf *dmabuf; int err; @@ -65,14 +65,118 @@ static int igt_dmabuf_import_self(void *arg) err = -EINVAL; goto out_import; } + import_obj = to_intel_bo(import); + + i915_gem_object_lock(import_obj, NULL); + err = __i915_gem_object_get_pages(import_obj); + i915_gem_object_unlock(import_obj); + if (err) { + pr_err("Same object dma-buf get_pages failed!\n"); + goto out_import; + } err = 0; out_import: - i915_gem_object_put(to_intel_bo(import)); + i915_gem_object_put(import_obj); +out_dmabuf: + dma_buf_put(dmabuf); +out: + i915_gem_object_put(obj); + return err; +} + +static int igt_dmabuf_import_same_driver(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj, *import_obj; + struct drm_gem_object *import; + struct dma_buf *dmabuf; + struct dma_buf_attachment *import_attach; + struct sg_table *st; + long timeout; + int err; + + force_different_devices = true; + obj = i915_gem_object_create_shmem(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_ret; + } + + dmabuf = i915_gem_prime_export(&obj->base, 0); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%ld\n", + PTR_ERR(dmabuf)); + err = PTR_ERR(dmabuf); + goto out; + } + + import = i915_gem_prime_import(&i915->drm, dmabuf); + if (IS_ERR(import)) { + pr_err("i915_gem_prime_import failed with err=%ld\n", + PTR_ERR(import)); + err = PTR_ERR(import); + goto out_dmabuf; + } + + if (import == &obj->base) { + pr_err("i915_gem_prime_import reused gem object!\n"); + err = -EINVAL; + goto out_import; + } + + import_obj = to_intel_bo(import); + + i915_gem_object_lock(import_obj, NULL); + err = __i915_gem_object_get_pages(import_obj); + if (err) { + pr_err("Different objects dma-buf get_pages failed!\n"); + i915_gem_object_unlock(import_obj); + goto out_import; + } + + /* + * If the exported object is not in system memory, something + * weird is going on. TODO: When p2p is supported, this is no + * longer considered weird. + */ + if (obj->mm.region != i915->mm.regions[INTEL_REGION_SMEM]) { + pr_err("Exported dma-buf is not in system memory\n"); + err = -EINVAL; + } + + i915_gem_object_unlock(import_obj); + + /* Now try a fake an importer */ + import_attach = dma_buf_attach(dmabuf, obj->base.dev->dev); + if (IS_ERR(import_attach)) { + err = PTR_ERR(import_attach); + goto out_import; + } + + st = dma_buf_map_attachment(import_attach, DMA_BIDIRECTIONAL); + if (IS_ERR(st)) { + err = PTR_ERR(st); + goto out_detach; + } + + timeout = dma_resv_wait_timeout(dmabuf->resv, false, true, 5 * HZ); + if (!timeout) { + pr_err("dmabuf wait for exclusive fence timed out.\n"); + timeout = -ETIME; + } + err = timeout > 0 ? 0 : timeout; + dma_buf_unmap_attachment(import_attach, st, DMA_BIDIRECTIONAL); +out_detach: + dma_buf_detach(dmabuf, import_attach); +out_import: + i915_gem_object_put(import_obj); out_dmabuf: dma_buf_put(dmabuf); out: i915_gem_object_put(obj); +out_ret: + force_different_devices = false; return err; } @@ -286,6 +390,7 @@ int i915_gem_dmabuf_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_dmabuf_export), + SUBTEST(igt_dmabuf_import_same_driver), }; return i915_subtests(tests, i915); -- cgit v1.2.3 From cdb35d1ed6d216978521b75927acb3b8c50a6cac Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 23 Jul 2021 12:21:42 -0500 Subject: drm/i915/gem: Migrate to system at dma-buf attach time (v7) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Until we support p2p dma or as a complement to that, migrate data to system memory at dma-buf attach time if possible. v2: - Rebase on dynamic exporter. Update the igt_dmabuf_import_same_driver selftest to migrate if we are LMEM capable. v3: - Migrate also in the pin() callback. v4: - Migrate in attach v5: (jason) - Lock around the migration v6: (jason) - Move the can_migrate check outside the lock - Rework the selftests to test more migration conditions. In particular, SMEM, LMEM, and LMEM+SMEM are all checked. v7: (mauld) - Misc style nits Signed-off-by: Thomas Hellström Signed-off-by: Michael J. Ruhl Reported-by: kernel test robot Signed-off-by: Jason Ekstrand Reviewed-by: Jason Ekstrand Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210723172142.3273510-9-jason@jlekstrand.net --- drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 23 +++++- .../gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c | 87 +++++++++++++++++++++- 2 files changed, 106 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 59dc56ae14d6..afa34111de02 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -164,8 +164,29 @@ static int i915_gem_dmabuf_attach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach) { struct drm_i915_gem_object *obj = dma_buf_to_obj(dmabuf); + struct i915_gem_ww_ctx ww; + int err; + + if (!i915_gem_object_can_migrate(obj, INTEL_REGION_SMEM)) + return -EOPNOTSUPP; + + for_i915_gem_ww(&ww, err, true) { + err = i915_gem_object_lock(obj, &ww); + if (err) + continue; + + err = i915_gem_object_migrate(obj, &ww, INTEL_REGION_SMEM); + if (err) + continue; - return i915_gem_object_pin_pages_unlocked(obj); + err = i915_gem_object_wait_migration(obj, 0); + if (err) + continue; + + err = i915_gem_object_pin_pages(obj); + } + + return err; } static void i915_gem_dmabuf_detach(struct dma_buf *dmabuf, diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index d4ce01e6ee85..ffae7df5e4d7 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -85,9 +85,63 @@ out: return err; } -static int igt_dmabuf_import_same_driver(void *arg) +static int igt_dmabuf_import_same_driver_lmem(void *arg) { struct drm_i915_private *i915 = arg; + struct intel_memory_region *lmem = i915->mm.regions[INTEL_REGION_LMEM]; + struct drm_i915_gem_object *obj; + struct drm_gem_object *import; + struct dma_buf *dmabuf; + int err; + + if (!lmem) + return 0; + + force_different_devices = true; + + obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &lmem, 1); + if (IS_ERR(obj)) { + pr_err("__i915_gem_object_create_user failed with err=%ld\n", + PTR_ERR(dmabuf)); + err = PTR_ERR(obj); + goto out_ret; + } + + dmabuf = i915_gem_prime_export(&obj->base, 0); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%ld\n", + PTR_ERR(dmabuf)); + err = PTR_ERR(dmabuf); + goto out; + } + + /* + * We expect an import of an LMEM-only object to fail with + * -EOPNOTSUPP because it can't be migrated to SMEM. + */ + import = i915_gem_prime_import(&i915->drm, dmabuf); + if (!IS_ERR(import)) { + drm_gem_object_put(import); + pr_err("i915_gem_prime_import succeeded when it shouldn't have\n"); + err = -EINVAL; + } else if (PTR_ERR(import) != -EOPNOTSUPP) { + pr_err("i915_gem_prime_import failed with the wrong err=%ld\n", + PTR_ERR(import)); + err = PTR_ERR(import); + } + + dma_buf_put(dmabuf); +out: + i915_gem_object_put(obj); +out_ret: + force_different_devices = false; + return err; +} + +static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, + struct intel_memory_region **regions, + unsigned int num_regions) +{ struct drm_i915_gem_object *obj, *import_obj; struct drm_gem_object *import; struct dma_buf *dmabuf; @@ -97,8 +151,12 @@ static int igt_dmabuf_import_same_driver(void *arg) int err; force_different_devices = true; - obj = i915_gem_object_create_shmem(i915, PAGE_SIZE); + + obj = __i915_gem_object_create_user(i915, PAGE_SIZE, + regions, num_regions); if (IS_ERR(obj)) { + pr_err("__i915_gem_object_create_user failed with err=%ld\n", + PTR_ERR(dmabuf)); err = PTR_ERR(obj); goto out_ret; } @@ -180,6 +238,27 @@ out_ret: return err; } +static int igt_dmabuf_import_same_driver_smem(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_memory_region *smem = i915->mm.regions[INTEL_REGION_SMEM]; + + return igt_dmabuf_import_same_driver(i915, &smem, 1); +} + +static int igt_dmabuf_import_same_driver_lmem_smem(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_memory_region *regions[2]; + + if (!i915->mm.regions[INTEL_REGION_LMEM]) + return 0; + + regions[0] = i915->mm.regions[INTEL_REGION_LMEM]; + regions[1] = i915->mm.regions[INTEL_REGION_SMEM]; + return igt_dmabuf_import_same_driver(i915, regions, 2); +} + static int igt_dmabuf_import(void *arg) { struct drm_i915_private *i915 = arg; @@ -390,7 +469,9 @@ int i915_gem_dmabuf_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_dmabuf_export), - SUBTEST(igt_dmabuf_import_same_driver), + SUBTEST(igt_dmabuf_import_same_driver_lmem), + SUBTEST(igt_dmabuf_import_same_driver_smem), + SUBTEST(igt_dmabuf_import_same_driver_lmem_smem), }; return i915_subtests(tests, i915); -- cgit v1.2.3 From 3821cc7fc0b90a55c3708628336a97993e464dba Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 23 Jul 2021 11:50:44 +0100 Subject: drm/i915: document caching related bits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Try to document the object caching related bits, like cache_coherent and cache_dirty. v2(Ville): - As pointed out by Ville, fix the completely incorrect assumptions about the "partial" coherency on shared LLC platforms. v3(Daniel): - Fix nonsense about "dirtying" the cache with reads. v4(Daniel): - Various improvements, including adding some more details for WT. Suggested-by: Daniel Vetter Signed-off-by: Matthew Auld Cc: Ville Syrjälä Cc: Mika Kuoppala Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210723105045.400841-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 187 ++++++++++++++++++++++- drivers/gpu/drm/i915/i915_drv.h | 9 -- 2 files changed, 183 insertions(+), 13 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index afbadfc5516b..79de925aecfd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -92,6 +92,86 @@ struct drm_i915_gem_object_ops { const char *name; /* friendly name for debug, e.g. lockdep classes */ }; +/** + * enum i915_cache_level - The supported GTT caching values for system memory + * pages. + * + * These translate to some special GTT PTE bits when binding pages into some + * address space. It also determines whether an object, or rather its pages are + * coherent with the GPU, when also reading or writing through the CPU cache + * with those pages. + * + * Userspace can also control this through struct drm_i915_gem_caching. + */ +enum i915_cache_level { + /** + * @I915_CACHE_NONE: + * + * GPU access is not coherent with the CPU cache. If the cache is dirty + * and we need the underlying pages to be coherent with some later GPU + * access then we need to manually flush the pages. + * + * On shared LLC platforms reads and writes through the CPU cache are + * still coherent even with this setting. See also + * &drm_i915_gem_object.cache_coherent for more details. Due to this we + * should only ever use uncached for scanout surfaces, otherwise we end + * up over-flushing in some places. + * + * This is the default on non-LLC platforms. + */ + I915_CACHE_NONE = 0, + /** + * @I915_CACHE_LLC: + * + * GPU access is coherent with the CPU cache. If the cache is dirty, + * then the GPU will ensure that access remains coherent, when both + * reading and writing through the CPU cache. GPU writes can dirty the + * CPU cache. + * + * Not used for scanout surfaces. + * + * Applies to both platforms with shared LLC(HAS_LLC), and snooping + * based platforms(HAS_SNOOP). + * + * This is the default on shared LLC platforms. The only exception is + * scanout objects, where the display engine is not coherent with the + * CPU cache. For such objects I915_CACHE_NONE or I915_CACHE_WT is + * automatically applied by the kernel in pin_for_display, if userspace + * has not done so already. + */ + I915_CACHE_LLC, + /** + * @I915_CACHE_L3_LLC: + * + * Explicitly enable the Gfx L3 cache, with coherent LLC. + * + * The Gfx L3 sits between the domain specific caches, e.g + * sampler/render caches, and the larger LLC. LLC is coherent with the + * GPU, but L3 is only visible to the GPU, so likely needs to be flushed + * when the workload completes. + * + * Not used for scanout surfaces. + * + * Only exposed on some gen7 + GGTT. More recent hardware has dropped + * this explicit setting, where it should now be enabled by default. + */ + I915_CACHE_L3_LLC, + /** + * @I915_CACHE_WT: + * + * Write-through. Used for scanout surfaces. + * + * The GPU can utilise the caches, while still having the display engine + * be coherent with GPU writes, as a result we don't need to flush the + * CPU caches when moving out of the render domain. This is the default + * setting chosen by the kernel, if supported by the HW, otherwise we + * fallback to I915_CACHE_NONE. On the CPU side writes through the CPU + * cache still need to be flushed, to remain coherent with the display + * engine. + */ + I915_CACHE_WT, +}; + enum i915_map_type { I915_MAP_WB = 0, I915_MAP_WC, @@ -229,14 +309,113 @@ struct drm_i915_gem_object { unsigned int mem_flags; #define I915_BO_FLAG_STRUCT_PAGE BIT(0) /* Object backed by struct pages */ #define I915_BO_FLAG_IOMEM BIT(1) /* Object backed by IO memory */ - /* - * Is the object to be mapped as read-only to the GPU - * Only honoured if hardware has relevant pte bit + /** + * @cache_level: The desired GTT caching level. + * + * See enum i915_cache_level for possible values, along with what + * each does. */ unsigned int cache_level:3; - unsigned int cache_coherent:2; + /** + * @cache_coherent: + * + * Track whether the pages are coherent with the GPU if reading or + * writing through the CPU caches. The largely depends on the + * @cache_level setting. + * + * On platforms which don't have the shared LLC(HAS_SNOOP), like on Atom + * platforms, coherency must be explicitly requested with some special + * GTT caching bits(see enum i915_cache_level). When enabling coherency + * it does come at a performance and power cost on such platforms. On + * the flip side the kernel does not need to manually flush any buffers + * which need to be coherent with the GPU, if the object is not coherent + * i.e @cache_coherent is zero. + * + * On platforms that share the LLC with the CPU(HAS_LLC), all GT memory + * access will automatically snoop the CPU caches(even with CACHE_NONE). + * The one exception is when dealing with the display engine, like with + * scanout surfaces. To handle this the kernel will always flush the + * surface out of the CPU caches when preparing it for scanout. Also + * note that since scanout surfaces are only ever read by the display + * engine we only need to care about flushing any writes through the CPU + * cache, reads on the other hand will always be coherent. + * + * Something strange here is why @cache_coherent is not a simple + * boolean, i.e coherent vs non-coherent. The reasoning for this is back + * to the display engine not being fully coherent. As a result scanout + * surfaces will either be marked as I915_CACHE_NONE or I915_CACHE_WT. + * In the case of seeing I915_CACHE_NONE the kernel makes the assumption + * that this is likely a scanout surface, and will set @cache_coherent + * as only I915_BO_CACHE_COHERENT_FOR_READ, on platforms with the shared + * LLC. The kernel uses this to always flush writes through the CPU + * cache as early as possible, where it can, in effect keeping + * @cache_dirty clean, so we can potentially avoid stalling when + * flushing the surface just before doing the scanout. This does mean + * we might unnecessarily flush non-scanout objects in some places, but + * the default assumption is that all normal objects should be using + * I915_CACHE_LLC, at least on platforms with the shared LLC. + * + * Supported values: + * + * I915_BO_CACHE_COHERENT_FOR_READ: + * + * On shared LLC platforms, we use this for special scanout surfaces, + * where the display engine is not coherent with the CPU cache. As such + * we need to ensure we flush any writes before doing the scanout. As an + * optimisation we try to flush any writes as early as possible to avoid + * stalling later. + * + * Thus for scanout surfaces using I915_CACHE_NONE, on shared LLC + * platforms, we use: + * + * cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ + * + * While for normal objects that are fully coherent, including special + * scanout surfaces marked as I915_CACHE_WT, we use: + * + * cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ | + * I915_BO_CACHE_COHERENT_FOR_WRITE + * + * And then for objects that are not coherent at all we use: + * + * cache_coherent = 0 + * + * I915_BO_CACHE_COHERENT_FOR_WRITE: + * + * When writing through the CPU cache, the GPU is still coherent. Note + * that this also implies I915_BO_CACHE_COHERENT_FOR_READ. + */ #define I915_BO_CACHE_COHERENT_FOR_READ BIT(0) #define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1) + unsigned int cache_coherent:2; + + /** + * @cache_dirty: + * + * Track if we are we dirty with writes through the CPU cache for this + * object. As a result reading directly from main memory might yield + * stale data. + * + * This also ties into whether the kernel is tracking the object as + * coherent with the GPU, as per @cache_coherent, as it determines if + * flushing might be needed at various points. + * + * Another part of @cache_dirty is managing flushing when first + * acquiring the pages for system memory, at this point the pages are + * considered foreign, so the default assumption is that the cache is + * dirty, for example the page zeroing done by the kernel might leave + * writes though the CPU cache, or swapping-in, while the actual data in + * main memory is potentially stale. Note that this is a potential + * security issue when dealing with userspace objects and zeroing. Now, + * whether we actually need apply the big sledgehammer of flushing all + * the pages on acquire depends on if @cache_coherent is marked as + * I915_BO_CACHE_COHERENT_FOR_WRITE, i.e that the GPU will be coherent + * for both reads and writes though the CPU cache. + * + * Note that on shared LLC platforms we still apply the heavy flush for + * I915_CACHE_NONE objects, under the assumption that this is going to + * be used for scanout. + */ unsigned int cache_dirty:1; /** diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b83389c06550..5c82a8012332 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -392,15 +392,6 @@ struct drm_i915_display_funcs { void (*read_luts)(struct intel_crtc_state *crtc_state); }; -enum i915_cache_level { - I915_CACHE_NONE = 0, - I915_CACHE_LLC, /* also used for snoopable memory on non-LLC */ - I915_CACHE_L3_LLC, /* gen7+, L3 sits between the domain specifc - caches, eg sampler/render caches, and the - large Last-Level-Cache. LLC is coherent with - the CPU, but L3 is only visible to the GPU. */ - I915_CACHE_WT, /* hsw:gt3e WriteThrough for scanouts */ -}; #define I915_COLOR_UNEVICTABLE (-1) /* a non-vma sharing the address space */ -- cgit v1.2.3 From 13d29c823738c0bcc72a631b8e6c3c5cb89090d6 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 23 Jul 2021 11:50:45 +0100 Subject: drm/i915/ehl: unconditionally flush the pages on acquire EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it possible for userspace to bypass the GTT caching bits set by the kernel, as per the given object cache_level. This is troublesome since the heavy flush we apply when first acquiring the pages is skipped if the kernel thinks the object is coherent with the GPU. As a result it might be possible to bypass the cache and read the contents of the page directly, which could be stale data. If it's just a case of userspace shooting themselves in the foot then so be it, but since i915 takes the stance of always zeroing memory before handing it to userspace, we need to prevent this. v2: this time actually set cache_dirty in put_pages() v3: move to get_pages() which looks simpler BSpec: 34007 References: 046091758b50 ("Revert "drm/i915/ehl: Update MOCS table for EHL"") Signed-off-by: Matthew Auld Cc: Tejas Upadhyay Cc: Francisco Jerez Cc: Lucas De Marchi Cc: Jon Bloomfield Cc: Chris Wilson Cc: Matt Roper Cc: Daniel Vetter Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210723105045.400841-2-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 6 ++++++ drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 79de925aecfd..2471f36aaff3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -415,6 +415,12 @@ struct drm_i915_gem_object { * Note that on shared LLC platforms we still apply the heavy flush for * I915_CACHE_NONE objects, under the assumption that this is going to * be used for scanout. + * + * Update: On some hardware there is now also the 'Bypass LLC' MOCS + * entry, which defeats our @cache_coherent tracking, since userspace + * can freely bypass the CPU cache when touching the pages with the GPU, + * where the kernel is completely unaware. On such platform we need + * apply the sledgehammer-on-acquire regardless of the @cache_coherent. */ unsigned int cache_dirty:1; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 6a04cce188fc..11f072193f3b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -182,6 +182,24 @@ rebuild_st: if (i915_gem_object_needs_bit17_swizzle(obj)) i915_gem_object_do_bit_17_swizzle(obj, st); + /* + * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it + * possible for userspace to bypass the GTT caching bits set by the + * kernel, as per the given object cache_level. This is troublesome + * since the heavy flush we apply when first gathering the pages is + * skipped if the kernel thinks the object is coherent with the GPU. As + * a result it might be possible to bypass the cache and read the + * contents of the page directly, which could be stale data. If it's + * just a case of userspace shooting themselves in the foot then so be + * it, but since i915 takes the stance of always zeroing memory before + * handing it to userspace, we need to prevent this. + * + * By setting cache_dirty here we make the clflush in set_pages + * unconditional on such platforms. + */ + if (IS_JSL_EHL(i915) && obj->flags & I915_BO_ALLOC_USER) + obj->cache_dirty = true; + __i915_gem_object_set_pages(obj, st, sg_page_sizes); return 0; -- cgit v1.2.3 From 556120256ecd25aacea2c7e3ad11ec6584de7252 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Jul 2021 17:23:16 -0700 Subject: drm/i915/guc: GuC virtual engines Implement GuC virtual engines. Rather simple implementation, basically just allocate an engine, setup context enter / exit function to virtual engine specific functions, set all other variables / functions to guc versions, and set the engine mask to that of all the siblings. v2: Update to work with proto-ctx v3: (Daniele) - Drop include, add comment to intel_virtual_engine_has_heartbeat Cc: Daniele Ceraolo Spurio Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-2-matthew.brost@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 8 +- drivers/gpu/drm/i915/gt/intel_context_types.h | 6 + drivers/gpu/drm/i915/gt/intel_engine.h | 30 ++- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 14 ++ .../gpu/drm/i915/gt/intel_execlists_submission.c | 29 ++- .../gpu/drm/i915/gt/intel_execlists_submission.h | 4 - drivers/gpu/drm/i915/gt/selftest_execlists.c | 12 +- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 242 ++++++++++++++++++++- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h | 2 + 9 files changed, 311 insertions(+), 36 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 89ca401bf9ae..bc52eeed782a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -74,7 +74,6 @@ #include "gt/intel_context_param.h" #include "gt/intel_engine_heartbeat.h" #include "gt/intel_engine_user.h" -#include "gt/intel_execlists_submission.h" /* virtual_engine */ #include "gt/intel_gpu_commands.h" #include "gt/intel_ring.h" @@ -363,9 +362,6 @@ set_proto_ctx_engines_balance(struct i915_user_extension __user *base, if (!HAS_EXECLISTS(i915)) return -ENODEV; - if (intel_uc_uses_guc_submission(&i915->gt.uc)) - return -ENODEV; /* not implement yet */ - if (get_user(idx, &ext->engine_index)) return -EFAULT; @@ -950,8 +946,8 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx, break; case I915_GEM_ENGINE_TYPE_BALANCED: - ce = intel_execlists_create_virtual(pe[n].siblings, - pe[n].num_siblings); + ce = intel_engine_create_virtual(pe[n].siblings, + pe[n].num_siblings); break; case I915_GEM_ENGINE_TYPE_INVALID: diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 4a5518d295c2..542c98418771 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -47,6 +47,12 @@ struct intel_context_ops { void (*reset)(struct intel_context *ce); void (*destroy)(struct kref *kref); + + /* virtual engine/context interface */ + struct intel_context *(*create_virtual)(struct intel_engine_cs **engine, + unsigned int count); + struct intel_engine_cs *(*get_sibling)(struct intel_engine_cs *engine, + unsigned int sibling); }; struct intel_context { diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index f911c1224ab2..13bfb7ec33b2 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -273,13 +273,41 @@ intel_engine_has_preempt_reset(const struct intel_engine_cs *engine) return intel_engine_has_preemption(engine); } +struct intel_context * +intel_engine_create_virtual(struct intel_engine_cs **siblings, + unsigned int count); + +static inline bool +intel_virtual_engine_has_heartbeat(const struct intel_engine_cs *engine) +{ + /* + * For non-GuC submission we expect the back-end to look at the + * heartbeat status of the actual physical engine that the work + * has been (or is being) scheduled on, so we should only reach + * here with GuC submission enabled. + */ + GEM_BUG_ON(!intel_engine_uses_guc(engine)); + + return intel_guc_virtual_engine_has_heartbeat(engine); +} + static inline bool intel_engine_has_heartbeat(const struct intel_engine_cs *engine) { if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL)) return false; - return READ_ONCE(engine->props.heartbeat_interval_ms); + if (intel_engine_is_virtual(engine)) + return intel_virtual_engine_has_heartbeat(engine); + else + return READ_ONCE(engine->props.heartbeat_interval_ms); +} + +static inline struct intel_engine_cs * +intel_engine_get_sibling(struct intel_engine_cs *engine, unsigned int sibling) +{ + GEM_BUG_ON(!intel_engine_is_virtual(engine)); + return engine->cops->get_sibling(engine, sibling); } #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 43c3fb2aeb13..6a7edc839a2c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1796,6 +1796,20 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now) return total; } +struct intel_context * +intel_engine_create_virtual(struct intel_engine_cs **siblings, + unsigned int count) +{ + if (count == 0) + return ERR_PTR(-EINVAL); + + if (count == 1) + return intel_context_create(siblings[0]); + + GEM_BUG_ON(!siblings[0]->cops->create_virtual); + return siblings[0]->cops->create_virtual(siblings, count); +} + static bool match_ring(struct i915_request *rq) { u32 ring = ENGINE_READ(rq->engine, RING_START); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index be99a74e6e09..1e76768dc5e9 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -199,6 +199,9 @@ static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine) return container_of(engine, struct virtual_engine, base); } +static struct intel_context * +execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count); + static struct i915_request * __active_request(const struct intel_timeline * const tl, struct i915_request *rq, @@ -2599,6 +2602,8 @@ static const struct intel_context_ops execlists_context_ops = { .reset = lrc_reset, .destroy = lrc_destroy, + + .create_virtual = execlists_create_virtual, }; static int emit_pdps(struct i915_request *rq) @@ -3549,6 +3554,17 @@ static void virtual_context_exit(struct intel_context *ce) intel_engine_pm_put(ve->siblings[n]); } +static struct intel_engine_cs * +virtual_get_sibling(struct intel_engine_cs *engine, unsigned int sibling) +{ + struct virtual_engine *ve = to_virtual_engine(engine); + + if (sibling >= ve->num_siblings) + return NULL; + + return ve->siblings[sibling]; +} + static const struct intel_context_ops virtual_context_ops = { .flags = COPS_HAS_INFLIGHT, @@ -3563,6 +3579,8 @@ static const struct intel_context_ops virtual_context_ops = { .exit = virtual_context_exit, .destroy = virtual_context_destroy, + + .get_sibling = virtual_get_sibling, }; static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) @@ -3711,20 +3729,13 @@ unlock: spin_unlock_irqrestore(&ve->base.sched_engine->lock, flags); } -struct intel_context * -intel_execlists_create_virtual(struct intel_engine_cs **siblings, - unsigned int count) +static struct intel_context * +execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count) { struct virtual_engine *ve; unsigned int n; int err; - if (count == 0) - return ERR_PTR(-EINVAL); - - if (count == 1) - return intel_context_create(siblings[0]); - ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL); if (!ve) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h index ad4f3e1a0fde..a1aa92c983a5 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h @@ -32,10 +32,6 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, int indent), unsigned int max); -struct intel_context * -intel_execlists_create_virtual(struct intel_engine_cs **siblings, - unsigned int count); - bool intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 22a124b134b6..f12ffe797639 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -3733,7 +3733,7 @@ static int nop_virtual_engine(struct intel_gt *gt, GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve)); for (n = 0; n < nctx; n++) { - ve[n] = intel_execlists_create_virtual(siblings, nsibling); + ve[n] = intel_engine_create_virtual(siblings, nsibling); if (IS_ERR(ve[n])) { err = PTR_ERR(ve[n]); nctx = n; @@ -3929,7 +3929,7 @@ static int mask_virtual_engine(struct intel_gt *gt, * restrict it to our desired engine within the virtual engine. */ - ve = intel_execlists_create_virtual(siblings, nsibling); + ve = intel_engine_create_virtual(siblings, nsibling); if (IS_ERR(ve)) { err = PTR_ERR(ve); goto out_close; @@ -4060,7 +4060,7 @@ static int slicein_virtual_engine(struct intel_gt *gt, i915_request_add(rq); } - ce = intel_execlists_create_virtual(siblings, nsibling); + ce = intel_engine_create_virtual(siblings, nsibling); if (IS_ERR(ce)) { err = PTR_ERR(ce); goto out; @@ -4112,7 +4112,7 @@ static int sliceout_virtual_engine(struct intel_gt *gt, /* XXX We do not handle oversubscription and fairness with normal rq */ for (n = 0; n < nsibling; n++) { - ce = intel_execlists_create_virtual(siblings, nsibling); + ce = intel_engine_create_virtual(siblings, nsibling); if (IS_ERR(ce)) { err = PTR_ERR(ce); goto out; @@ -4214,7 +4214,7 @@ static int preserved_virtual_engine(struct intel_gt *gt, if (err) goto out_scratch; - ve = intel_execlists_create_virtual(siblings, nsibling); + ve = intel_engine_create_virtual(siblings, nsibling); if (IS_ERR(ve)) { err = PTR_ERR(ve); goto out_scratch; @@ -4354,7 +4354,7 @@ static int reset_virtual_engine(struct intel_gt *gt, if (igt_spinner_init(&spin, gt)) return -ENOMEM; - ve = intel_execlists_create_virtual(siblings, nsibling); + ve = intel_engine_create_virtual(siblings, nsibling); if (IS_ERR(ve)) { err = PTR_ERR(ve); goto out_spin; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 26aadad10b12..8b3ae5f65cd5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -60,6 +60,15 @@ * */ +/* GuC Virtual Engine */ +struct guc_virtual_engine { + struct intel_engine_cs base; + struct intel_context context; +}; + +static struct intel_context * +guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count); + #define GUC_REQUEST_SIZE 64 /* bytes */ /* @@ -931,14 +940,17 @@ static int guc_lrc_desc_pin(struct intel_context *ce) return ret; } -static int guc_context_pre_pin(struct intel_context *ce, - struct i915_gem_ww_ctx *ww, - void **vaddr) +static int __guc_context_pre_pin(struct intel_context *ce, + struct intel_engine_cs *engine, + struct i915_gem_ww_ctx *ww, + void **vaddr) { - return lrc_pre_pin(ce, ce->engine, ww, vaddr); + return lrc_pre_pin(ce, engine, ww, vaddr); } -static int guc_context_pin(struct intel_context *ce, void *vaddr) +static int __guc_context_pin(struct intel_context *ce, + struct intel_engine_cs *engine, + void *vaddr) { if (i915_ggtt_offset(ce->state) != (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) @@ -949,7 +961,19 @@ static int guc_context_pin(struct intel_context *ce, void *vaddr) * explaination of why. */ - return lrc_pin(ce, ce->engine, vaddr); + return lrc_pin(ce, engine, vaddr); +} + +static int guc_context_pre_pin(struct intel_context *ce, + struct i915_gem_ww_ctx *ww, + void **vaddr) +{ + return __guc_context_pre_pin(ce, ce->engine, ww, vaddr); +} + +static int guc_context_pin(struct intel_context *ce, void *vaddr) +{ + return __guc_context_pin(ce, ce->engine, vaddr); } static void guc_context_unpin(struct intel_context *ce) @@ -1054,6 +1078,21 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce) deregister_context(ce, ce->guc_id); } +static void __guc_context_destroy(struct intel_context *ce) +{ + lrc_fini(ce); + intel_context_fini(ce); + + if (intel_engine_is_virtual(ce->engine)) { + struct guc_virtual_engine *ve = + container_of(ce, typeof(*ve), context); + + kfree(ve); + } else { + intel_context_free(ce); + } +} + static void guc_context_destroy(struct kref *kref) { struct intel_context *ce = container_of(kref, typeof(*ce), ref); @@ -1068,11 +1107,11 @@ static void guc_context_destroy(struct kref *kref) * registered with the GuC. */ if (context_guc_id_invalid(ce)) { - lrc_destroy(kref); + __guc_context_destroy(ce); return; } else if (!lrc_desc_registered(guc, ce->guc_id)) { release_guc_id(guc, ce); - lrc_destroy(kref); + __guc_context_destroy(ce); return; } @@ -1087,7 +1126,7 @@ static void guc_context_destroy(struct kref *kref) spin_lock_irqsave(&guc->contexts_lock, flags); if (context_guc_id_invalid(ce)) { spin_unlock_irqrestore(&guc->contexts_lock, flags); - lrc_destroy(kref); + __guc_context_destroy(ce); return; } @@ -1132,6 +1171,8 @@ static const struct intel_context_ops guc_context_ops = { .reset = lrc_reset, .destroy = guc_context_destroy, + + .create_virtual = guc_create_virtual, }; static void __guc_signal_context_fence(struct intel_context *ce) @@ -1260,6 +1301,83 @@ out: return 0; } +static struct intel_engine_cs * +guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling) +{ + struct intel_engine_cs *engine; + intel_engine_mask_t tmp, mask = ve->mask; + unsigned int num_siblings = 0; + + for_each_engine_masked(engine, ve->gt, mask, tmp) + if (num_siblings++ == sibling) + return engine; + + return NULL; +} + +static int guc_virtual_context_pre_pin(struct intel_context *ce, + struct i915_gem_ww_ctx *ww, + void **vaddr) +{ + struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); + + return __guc_context_pre_pin(ce, engine, ww, vaddr); +} + +static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr) +{ + struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); + + return __guc_context_pin(ce, engine, vaddr); +} + +static void guc_virtual_context_enter(struct intel_context *ce) +{ + intel_engine_mask_t tmp, mask = ce->engine->mask; + struct intel_engine_cs *engine; + + for_each_engine_masked(engine, ce->engine->gt, mask, tmp) + intel_engine_pm_get(engine); + + intel_timeline_enter(ce->timeline); +} + +static void guc_virtual_context_exit(struct intel_context *ce) +{ + intel_engine_mask_t tmp, mask = ce->engine->mask; + struct intel_engine_cs *engine; + + for_each_engine_masked(engine, ce->engine->gt, mask, tmp) + intel_engine_pm_put(engine); + + intel_timeline_exit(ce->timeline); +} + +static int guc_virtual_context_alloc(struct intel_context *ce) +{ + struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); + + return lrc_alloc(ce, engine); +} + +static const struct intel_context_ops virtual_guc_context_ops = { + .alloc = guc_virtual_context_alloc, + + .pre_pin = guc_virtual_context_pre_pin, + .pin = guc_virtual_context_pin, + .unpin = guc_context_unpin, + .post_unpin = guc_context_post_unpin, + + .enter = guc_virtual_context_enter, + .exit = guc_virtual_context_exit, + + .sched_disable = guc_context_sched_disable, + + .destroy = guc_context_destroy, + + .get_sibling = guc_virtual_get_sibling, +}; + static void sanitize_hwsp(struct intel_engine_cs *engine) { struct intel_timeline *tl; @@ -1566,7 +1684,7 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc, } else if (context_destroyed(ce)) { /* Context has been destroyed */ release_guc_id(guc, ce); - lrc_destroy(&ce->ref); + __guc_context_destroy(ce); } decr_outstanding_submission_g2h(guc); @@ -1681,3 +1799,107 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, atomic_read(&ce->guc_sched_state_no_lock)); } } + +static struct intel_context * +guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count) +{ + struct guc_virtual_engine *ve; + struct intel_guc *guc; + unsigned int n; + int err; + + ve = kzalloc(sizeof(*ve), GFP_KERNEL); + if (!ve) + return ERR_PTR(-ENOMEM); + + guc = &siblings[0]->gt->uc.guc; + + ve->base.i915 = siblings[0]->i915; + ve->base.gt = siblings[0]->gt; + ve->base.uncore = siblings[0]->uncore; + ve->base.id = -1; + + ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; + ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; + ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; + ve->base.saturated = ALL_ENGINES; + ve->base.breadcrumbs = intel_breadcrumbs_create(&ve->base); + if (!ve->base.breadcrumbs) { + kfree(ve); + return ERR_PTR(-ENOMEM); + } + + snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); + + ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine); + + ve->base.cops = &virtual_guc_context_ops; + ve->base.request_alloc = guc_request_alloc; + + ve->base.submit_request = guc_submit_request; + + ve->base.flags = I915_ENGINE_IS_VIRTUAL; + + intel_context_init(&ve->context, &ve->base); + + for (n = 0; n < count; n++) { + struct intel_engine_cs *sibling = siblings[n]; + + GEM_BUG_ON(!is_power_of_2(sibling->mask)); + if (sibling->mask & ve->base.mask) { + DRM_DEBUG("duplicate %s entry in load balancer\n", + sibling->name); + err = -EINVAL; + goto err_put; + } + + ve->base.mask |= sibling->mask; + + if (n != 0 && ve->base.class != sibling->class) { + DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", + sibling->class, ve->base.class); + err = -EINVAL; + goto err_put; + } else if (n == 0) { + ve->base.class = sibling->class; + ve->base.uabi_class = sibling->uabi_class; + snprintf(ve->base.name, sizeof(ve->base.name), + "v%dx%d", ve->base.class, count); + ve->base.context_size = sibling->context_size; + + ve->base.emit_bb_start = sibling->emit_bb_start; + ve->base.emit_flush = sibling->emit_flush; + ve->base.emit_init_breadcrumb = + sibling->emit_init_breadcrumb; + ve->base.emit_fini_breadcrumb = + sibling->emit_fini_breadcrumb; + ve->base.emit_fini_breadcrumb_dw = + sibling->emit_fini_breadcrumb_dw; + + ve->base.flags |= sibling->flags; + + ve->base.props.timeslice_duration_ms = + sibling->props.timeslice_duration_ms; + ve->base.props.preempt_timeout_ms = + sibling->props.preempt_timeout_ms; + } + } + + return &ve->context; + +err_put: + intel_context_put(&ve->context); + return ERR_PTR(err); +} + +bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) +{ + struct intel_engine_cs *engine; + intel_engine_mask_t tmp, mask = ve->mask; + + for_each_engine_masked(engine, ve->gt, mask, tmp) + if (READ_ONCE(engine->props.heartbeat_interval_ms)) + return true; + + return false; +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h index 2b9470c90558..5f263ac4f46a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h @@ -26,6 +26,8 @@ void intel_guc_submission_print_info(struct intel_guc *guc, void intel_guc_submission_print_context_info(struct intel_guc *guc, struct drm_printer *p); +bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve); + static inline bool intel_guc_submission_is_supported(struct intel_guc *guc) { /* XXX: GuC submission is unavailable for now */ -- cgit v1.2.3 From 96d3e0e1ad0af3070f0a01c72b0ecfa8c2e581b7 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Mon, 26 Jul 2021 17:23:17 -0700 Subject: drm/i915/guc: Make hangcheck work with GuC virtual engines The serial number tracking of engines happens at the backend of request submission and was expecting to only be given physical engines. However, in GuC submission mode, the decomposition of virtual to physical engines does not happen in i915. Instead, requests are submitted to their virtual engine mask all the way through to the hardware (i.e. to GuC). This would mean that the heart beat code thinks the physical engines are idle due to the serial number not incrementing. Which in turns means hangcheck does not work for GuC virtual engines. This patch updates the tracking to decompose virtual engines into their physical constituents and tracks the request against each. This is not entirely accurate as the GuC will only be issuing the request to one physical engine. However, it is the best that i915 can do given that it has no knowledge of the GuC's scheduling decisions. Downside of this is that all physical engines constituting a GuC virtual engine will be periodically unparked (even during just a single context executing) in order to be pinged with a heartbeat request. However the power and performance cost of this is not expected to be measurable (due low frequency of heartbeat pulses) and it is considered an easier option than trying to make changes to GuC firmware. v2: (Tvrtko) - Update commit message - Have default behavior if no vfunc present Signed-off-by: John Harrison Signed-off-by: Matthew Brost Reviewed-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-3-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_types.h | 2 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 10 ++++++++++ drivers/gpu/drm/i915/i915_request.c | 6 +++++- 3 files changed, 17 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 3f308a920b50..75a34cd3f1c2 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -382,6 +382,8 @@ struct intel_engine_cs { void (*park)(struct intel_engine_cs *engine); void (*unpark)(struct intel_engine_cs *engine); + void (*bump_serial)(struct intel_engine_cs *engine); + void (*set_default_submission)(struct intel_engine_cs *engine); const struct intel_context_ops *cops; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 8b3ae5f65cd5..6b08221df143 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1492,6 +1492,15 @@ static void guc_release(struct intel_engine_cs *engine) lrc_fini_wa_ctx(engine); } +static void virtual_guc_bump_serial(struct intel_engine_cs *engine) +{ + struct intel_engine_cs *e; + intel_engine_mask_t tmp, mask = engine->mask; + + for_each_engine_masked(e, engine->gt, mask, tmp) + e->serial++; +} + static void guc_default_vfuncs(struct intel_engine_cs *engine) { /* Default vfuncs which can be overridden by each engine. */ @@ -1835,6 +1844,7 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count) ve->base.cops = &virtual_guc_context_ops; ve->base.request_alloc = guc_request_alloc; + ve->base.bump_serial = virtual_guc_bump_serial; ve->base.submit_request = guc_submit_request; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 6594cb2f8ebd..39a21d96577e 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -669,7 +669,11 @@ bool __i915_request_submit(struct i915_request *request) request->ring->vaddr + request->postfix); trace_i915_request_execute(request); - engine->serial++; + if (engine->bump_serial) + engine->bump_serial(engine); + else + engine->serial++; + result = true; GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); -- cgit v1.2.3 From 1e98d8c52ed5dfbaf273c4423c636525c2ce59e7 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Jul 2021 17:23:18 -0700 Subject: drm/i915: Hold reference to intel_context over life of i915_request Hold a reference to the intel_context over life of an i915_request. Without this an i915_request can exist after the context has been destroyed (e.g. request retired, context closed, but user space holds a reference to the request from an out fence). In the case of GuC submission + virtual engine, the engine that the request references is also destroyed which can trigger bad pointer dref in fence ops (e.g. i915_fence_get_driver_name). We could likely change i915_fence_get_driver_name to avoid touching the engine but let's just be safe and hold the intel_context reference. v2: (John Harrison) - Update comment explaining how GuC mode and execlists mode deal with virtual engines differently Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-4-matthew.brost@intel.com --- drivers/gpu/drm/i915/i915_request.c | 55 ++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 32 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 39a21d96577e..57c9187aff74 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -125,39 +125,17 @@ static void i915_fence_release(struct dma_fence *fence) i915_sw_fence_fini(&rq->semaphore); /* - * Keep one request on each engine for reserved use under mempressure - * - * We do not hold a reference to the engine here and so have to be - * very careful in what rq->engine we poke. The virtual engine is - * referenced via the rq->context and we released that ref during - * i915_request_retire(), ergo we must not dereference a virtual - * engine here. Not that we would want to, as the only consumer of - * the reserved engine->request_pool is the power management parking, - * which must-not-fail, and that is only run on the physical engines. - * - * Since the request must have been executed to be have completed, - * we know that it will have been processed by the HW and will - * not be unsubmitted again, so rq->engine and rq->execution_mask - * at this point is stable. rq->execution_mask will be a single - * bit if the last and _only_ engine it could execution on was a - * physical engine, if it's multiple bits then it started on and - * could still be on a virtual engine. Thus if the mask is not a - * power-of-two we assume that rq->engine may still be a virtual - * engine and so a dangling invalid pointer that we cannot dereference - * - * For example, consider the flow of a bonded request through a virtual - * engine. The request is created with a wide engine mask (all engines - * that we might execute on). On processing the bond, the request mask - * is reduced to one or more engines. If the request is subsequently - * bound to a single engine, it will then be constrained to only - * execute on that engine and never returned to the virtual engine - * after timeslicing away, see __unwind_incomplete_requests(). Thus we - * know that if the rq->execution_mask is a single bit, rq->engine - * can be a physical engine with the exact corresponding mask. + * Keep one request on each engine for reserved use under mempressure, + * do not use with virtual engines as this really is only needed for + * kernel contexts. */ - if (is_power_of_2(rq->execution_mask) && - !cmpxchg(&rq->engine->request_pool, NULL, rq)) + if (!intel_engine_is_virtual(rq->engine) && + !cmpxchg(&rq->engine->request_pool, NULL, rq)) { + intel_context_put(rq->context); return; + } + + intel_context_put(rq->context); kmem_cache_free(global.slab_requests, rq); } @@ -956,7 +934,19 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) } } - rq->context = ce; + /* + * Hold a reference to the intel_context over life of an i915_request. + * Without this an i915_request can exist after the context has been + * destroyed (e.g. request retired, context closed, but user space holds + * a reference to the request from an out fence). In the case of GuC + * submission + virtual engine, the engine that the request references + * is also destroyed which can trigger bad pointer dref in fence ops + * (e.g. i915_fence_get_driver_name). We could likely change these + * functions to avoid touching the engine but let's just be safe and + * hold the intel_context reference. In execlist mode the request always + * eventually points to a physical engine so this isn't an issue. + */ + rq->context = intel_context_get(ce); rq->engine = ce->engine; rq->ring = ce->ring; rq->execution_mask = ce->engine->mask; @@ -1033,6 +1023,7 @@ err_unwind: GEM_BUG_ON(!list_empty(&rq->sched.waiters_list)); err_free: + intel_context_put(ce); kmem_cache_free(global.slab_requests, rq); err_unreserve: intel_context_unpin(ce); -- cgit v1.2.3 From b02d86b915709155b3c61f97be44251bfdcd4834 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Jul 2021 17:23:19 -0700 Subject: drm/i915/guc: Disable bonding extension with GuC submission Update the bonding extension to return -ENODEV when using GuC submission as this extension fundamentally will not work with the GuC submission interface. Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-5-matthew.brost@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index bc52eeed782a..e3df01a201d7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -491,6 +491,11 @@ set_proto_ctx_engines_bond(struct i915_user_extension __user *base, void *data) return -EINVAL; } + if (intel_engine_uses_guc(master)) { + DRM_DEBUG("bonding extension not supported with GuC submission"); + return -ENODEV; + } + if (get_user(num_bonds, &ext->num_bonds)) return -EFAULT; -- cgit v1.2.3 From a95d116098e4ce1c18cfa7f949df3eeeebe6d812 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Jul 2021 17:23:20 -0700 Subject: drm/i915/guc: Direct all breadcrumbs for a class to single breadcrumbs With GuC virtual engines the physical engine which a request executes and completes on isn't known to the i915. Therefore we can't attach a request to a physical engines breadcrumbs. To work around this we create a single breadcrumbs per engine class when using GuC submission and direct all physical engine interrupts to this breadcrumbs. v2: (John H) - Rework header file structure so intel_engine_mask_t can be in intel_engine_types.h Signed-off-by: Matthew Brost CC: John Harrison Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-6-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 41 ++++++------- drivers/gpu/drm/i915/gt/intel_breadcrumbs.h | 16 +++++- drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h | 7 +++ drivers/gpu/drm/i915/gt/intel_engine.h | 3 + drivers/gpu/drm/i915/gt/intel_engine_cs.c | 28 ++++++++- drivers/gpu/drm/i915/gt/intel_engine_types.h | 2 +- .../gpu/drm/i915/gt/intel_execlists_submission.c | 2 +- drivers/gpu/drm/i915/gt/mock_engine.c | 4 +- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 67 ++++++++++++++++++++-- 9 files changed, 133 insertions(+), 37 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index 38cc42783dfb..2007dc6f6b99 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -15,28 +15,14 @@ #include "intel_gt_pm.h" #include "intel_gt_requests.h" -static bool irq_enable(struct intel_engine_cs *engine) +static bool irq_enable(struct intel_breadcrumbs *b) { - if (!engine->irq_enable) - return false; - - /* Caller disables interrupts */ - spin_lock(&engine->gt->irq_lock); - engine->irq_enable(engine); - spin_unlock(&engine->gt->irq_lock); - - return true; + return intel_engine_irq_enable(b->irq_engine); } -static void irq_disable(struct intel_engine_cs *engine) +static void irq_disable(struct intel_breadcrumbs *b) { - if (!engine->irq_disable) - return; - - /* Caller disables interrupts */ - spin_lock(&engine->gt->irq_lock); - engine->irq_disable(engine); - spin_unlock(&engine->gt->irq_lock); + intel_engine_irq_disable(b->irq_engine); } static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) @@ -57,7 +43,7 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) WRITE_ONCE(b->irq_armed, true); /* Requests may have completed before we could enable the interrupt. */ - if (!b->irq_enabled++ && irq_enable(b->irq_engine)) + if (!b->irq_enabled++ && b->irq_enable(b)) irq_work_queue(&b->irq_work); } @@ -76,7 +62,7 @@ static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) { GEM_BUG_ON(!b->irq_enabled); if (!--b->irq_enabled) - irq_disable(b->irq_engine); + b->irq_disable(b); WRITE_ONCE(b->irq_armed, false); intel_gt_pm_put_async(b->irq_engine->gt); @@ -281,7 +267,7 @@ intel_breadcrumbs_create(struct intel_engine_cs *irq_engine) if (!b) return NULL; - b->irq_engine = irq_engine; + kref_init(&b->ref); spin_lock_init(&b->signalers_lock); INIT_LIST_HEAD(&b->signalers); @@ -290,6 +276,10 @@ intel_breadcrumbs_create(struct intel_engine_cs *irq_engine) spin_lock_init(&b->irq_lock); init_irq_work(&b->irq_work, signal_irq_work); + b->irq_engine = irq_engine; + b->irq_enable = irq_enable; + b->irq_disable = irq_disable; + return b; } @@ -303,9 +293,9 @@ void intel_breadcrumbs_reset(struct intel_breadcrumbs *b) spin_lock_irqsave(&b->irq_lock, flags); if (b->irq_enabled) - irq_enable(b->irq_engine); + b->irq_enable(b); else - irq_disable(b->irq_engine); + b->irq_disable(b); spin_unlock_irqrestore(&b->irq_lock, flags); } @@ -325,11 +315,14 @@ void __intel_breadcrumbs_park(struct intel_breadcrumbs *b) } } -void intel_breadcrumbs_free(struct intel_breadcrumbs *b) +void intel_breadcrumbs_free(struct kref *kref) { + struct intel_breadcrumbs *b = container_of(kref, typeof(*b), ref); + irq_work_sync(&b->irq_work); GEM_BUG_ON(!list_empty(&b->signalers)); GEM_BUG_ON(b->irq_armed); + kfree(b); } diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h index 3ce5ce270b04..be0d4f379a85 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h @@ -9,7 +9,7 @@ #include #include -#include "intel_engine_types.h" +#include "intel_breadcrumbs_types.h" struct drm_printer; struct i915_request; @@ -17,7 +17,7 @@ struct intel_breadcrumbs; struct intel_breadcrumbs * intel_breadcrumbs_create(struct intel_engine_cs *irq_engine); -void intel_breadcrumbs_free(struct intel_breadcrumbs *b); +void intel_breadcrumbs_free(struct kref *kref); void intel_breadcrumbs_reset(struct intel_breadcrumbs *b); void __intel_breadcrumbs_park(struct intel_breadcrumbs *b); @@ -48,4 +48,16 @@ void i915_request_cancel_breadcrumb(struct i915_request *request); void intel_context_remove_breadcrumbs(struct intel_context *ce, struct intel_breadcrumbs *b); +static inline struct intel_breadcrumbs * +intel_breadcrumbs_get(struct intel_breadcrumbs *b) +{ + kref_get(&b->ref); + return b; +} + +static inline void intel_breadcrumbs_put(struct intel_breadcrumbs *b) +{ + kref_put(&b->ref, intel_breadcrumbs_free); +} + #endif /* __INTEL_BREADCRUMBS__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h index 3a084ce8ff5e..72dfd3748c4c 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h @@ -7,10 +7,13 @@ #define __INTEL_BREADCRUMBS_TYPES__ #include +#include #include #include #include +#include "intel_engine_types.h" + /* * Rather than have every client wait upon all user interrupts, * with the herd waking after every interrupt and each doing the @@ -29,6 +32,7 @@ * the overhead of waking that client is much preferred. */ struct intel_breadcrumbs { + struct kref ref; atomic_t active; spinlock_t signalers_lock; /* protects the list of signalers */ @@ -42,7 +46,10 @@ struct intel_breadcrumbs { bool irq_armed; /* Not all breadcrumbs are attached to physical HW */ + intel_engine_mask_t engine_mask; struct intel_engine_cs *irq_engine; + bool (*irq_enable)(struct intel_breadcrumbs *b); + void (*irq_disable)(struct intel_breadcrumbs *b); }; #endif /* __INTEL_BREADCRUMBS_TYPES__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 13bfb7ec33b2..8fc76dc8bf98 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -212,6 +212,9 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, void intel_engine_init_execlists(struct intel_engine_cs *engine); +bool intel_engine_irq_enable(struct intel_engine_cs *engine); +void intel_engine_irq_disable(struct intel_engine_cs *engine); + static inline void __intel_engine_reset(struct intel_engine_cs *engine, bool stalled) { diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 6a7edc839a2c..1eaa658507e1 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -798,7 +798,7 @@ static int engine_setup_common(struct intel_engine_cs *engine) err_cmd_parser: i915_sched_engine_put(engine->sched_engine); err_sched_engine: - intel_breadcrumbs_free(engine->breadcrumbs); + intel_breadcrumbs_put(engine->breadcrumbs); err_status: cleanup_status_page(engine); return err; @@ -1007,7 +1007,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) GEM_BUG_ON(!list_empty(&engine->sched_engine->requests)); i915_sched_engine_put(engine->sched_engine); - intel_breadcrumbs_free(engine->breadcrumbs); + intel_breadcrumbs_put(engine->breadcrumbs); intel_engine_fini_retire(engine); intel_engine_cleanup_cmd_parser(engine); @@ -1324,6 +1324,30 @@ bool intel_engines_are_idle(struct intel_gt *gt) return true; } +bool intel_engine_irq_enable(struct intel_engine_cs *engine) +{ + if (!engine->irq_enable) + return false; + + /* Caller disables interrupts */ + spin_lock(&engine->gt->irq_lock); + engine->irq_enable(engine); + spin_unlock(&engine->gt->irq_lock); + + return true; +} + +void intel_engine_irq_disable(struct intel_engine_cs *engine) +{ + if (!engine->irq_disable) + return; + + /* Caller disables interrupts */ + spin_lock(&engine->gt->irq_lock); + engine->irq_disable(engine); + spin_unlock(&engine->gt->irq_lock); +} + void intel_engines_reset_default_submission(struct intel_gt *gt) { struct intel_engine_cs *engine; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 75a34cd3f1c2..e55776e27a2a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -21,7 +21,6 @@ #include "i915_pmu.h" #include "i915_priolist_types.h" #include "i915_selftest.h" -#include "intel_breadcrumbs_types.h" #include "intel_sseu.h" #include "intel_timeline_types.h" #include "intel_uncore.h" @@ -52,6 +51,7 @@ struct i915_sched_engine; struct intel_gt; struct intel_ring; struct intel_uncore; +struct intel_breadcrumbs; typedef u32 intel_engine_mask_t; #define ALL_ENGINES ((intel_engine_mask_t)~0ul) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 1e76768dc5e9..6e4959ee5065 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3457,7 +3457,7 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk) intel_context_fini(&ve->context); if (ve->base.breadcrumbs) - intel_breadcrumbs_free(ve->base.breadcrumbs); + intel_breadcrumbs_put(ve->base.breadcrumbs); if (ve->base.sched_engine) i915_sched_engine_put(ve->base.sched_engine); intel_engine_free_request_pool(&ve->base); diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 68970398e4ef..2ddaacb4e753 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -284,7 +284,7 @@ static void mock_engine_release(struct intel_engine_cs *engine) GEM_BUG_ON(timer_pending(&mock->hw_delay)); i915_sched_engine_put(engine->sched_engine); - intel_breadcrumbs_free(engine->breadcrumbs); + intel_breadcrumbs_put(engine->breadcrumbs); intel_context_unpin(engine->kernel_context); intel_context_put(engine->kernel_context); @@ -370,7 +370,7 @@ int mock_engine_init(struct intel_engine_cs *engine) return 0; err_breadcrumbs: - intel_breadcrumbs_free(engine->breadcrumbs); + intel_breadcrumbs_put(engine->breadcrumbs); err_schedule: i915_sched_engine_put(engine->sched_engine); return -ENOMEM; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 6b08221df143..60a73d9a00f6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1087,6 +1087,9 @@ static void __guc_context_destroy(struct intel_context *ce) struct guc_virtual_engine *ve = container_of(ce, typeof(*ve), context); + if (ve->base.breadcrumbs) + intel_breadcrumbs_put(ve->base.breadcrumbs); + kfree(ve); } else { intel_context_free(ce); @@ -1378,6 +1381,62 @@ static const struct intel_context_ops virtual_guc_context_ops = { .get_sibling = guc_virtual_get_sibling, }; +static bool +guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b) +{ + struct intel_engine_cs *sibling; + intel_engine_mask_t tmp, mask = b->engine_mask; + bool result = false; + + for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) + result |= intel_engine_irq_enable(sibling); + + return result; +} + +static void +guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b) +{ + struct intel_engine_cs *sibling; + intel_engine_mask_t tmp, mask = b->engine_mask; + + for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) + intel_engine_irq_disable(sibling); +} + +static void guc_init_breadcrumbs(struct intel_engine_cs *engine) +{ + int i; + + /* + * In GuC submission mode we do not know which physical engine a request + * will be scheduled on, this creates a problem because the breadcrumb + * interrupt is per physical engine. To work around this we attach + * requests and direct all breadcrumb interrupts to the first instance + * of an engine per class. In addition all breadcrumb interrupts are + * enabled / disabled across an engine class in unison. + */ + for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) { + struct intel_engine_cs *sibling = + engine->gt->engine_class[engine->class][i]; + + if (sibling) { + if (engine->breadcrumbs != sibling->breadcrumbs) { + intel_breadcrumbs_put(engine->breadcrumbs); + engine->breadcrumbs = + intel_breadcrumbs_get(sibling->breadcrumbs); + } + break; + } + } + + if (engine->breadcrumbs) { + engine->breadcrumbs->engine_mask |= engine->mask; + engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs; + engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs; + } +} + static void sanitize_hwsp(struct intel_engine_cs *engine) { struct intel_timeline *tl; @@ -1590,6 +1649,7 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine) guc_default_vfuncs(engine); guc_default_irqs(engine); + guc_init_breadcrumbs(engine); if (engine->class == RENDER_CLASS) rcs_submission_override(engine); @@ -1832,11 +1892,6 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count) ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; ve->base.saturated = ALL_ENGINES; - ve->base.breadcrumbs = intel_breadcrumbs_create(&ve->base); - if (!ve->base.breadcrumbs) { - kfree(ve); - return ERR_PTR(-ENOMEM); - } snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); @@ -1885,6 +1940,8 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count) sibling->emit_fini_breadcrumb; ve->base.emit_fini_breadcrumb_dw = sibling->emit_fini_breadcrumb_dw; + ve->base.breadcrumbs = + intel_breadcrumbs_get(sibling->breadcrumbs); ve->base.flags |= sibling->flags; -- cgit v1.2.3 From 27466222ab8ab60f844869508954222bf05e5a67 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Jul 2021 17:23:21 -0700 Subject: drm/i915: Add i915_sched_engine destroy vfunc This is required to allow backend specific cleanup v2: (John H) - Rework commit message Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-7-matthew.brost@intel.com --- drivers/gpu/drm/i915/i915_scheduler.c | 3 ++- drivers/gpu/drm/i915/i915_scheduler.h | 4 +--- drivers/gpu/drm/i915/i915_scheduler_types.h | 5 +++++ 3 files changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 561c649e59f7..2804fd5b660b 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -431,7 +431,7 @@ void i915_request_show_with_schedule(struct drm_printer *m, rcu_read_unlock(); } -void i915_sched_engine_free(struct kref *kref) +static void default_destroy(struct kref *kref) { struct i915_sched_engine *sched_engine = container_of(kref, typeof(*sched_engine), ref); @@ -453,6 +453,7 @@ i915_sched_engine_create(unsigned int subclass) sched_engine->queue = RB_ROOT_CACHED; sched_engine->queue_priority_hint = INT_MIN; + sched_engine->destroy = default_destroy; INIT_LIST_HEAD(&sched_engine->requests); INIT_LIST_HEAD(&sched_engine->hold); diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 650ab8e0db9f..3c9504e9f409 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -51,8 +51,6 @@ static inline void i915_priolist_free(struct i915_priolist *p) struct i915_sched_engine * i915_sched_engine_create(unsigned int subclass); -void i915_sched_engine_free(struct kref *kref); - static inline struct i915_sched_engine * i915_sched_engine_get(struct i915_sched_engine *sched_engine) { @@ -63,7 +61,7 @@ i915_sched_engine_get(struct i915_sched_engine *sched_engine) static inline void i915_sched_engine_put(struct i915_sched_engine *sched_engine) { - kref_put(&sched_engine->ref, i915_sched_engine_free); + kref_put(&sched_engine->ref, sched_engine->destroy); } static inline bool diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index 5935c3152bdc..00384e2c5273 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -163,6 +163,11 @@ struct i915_sched_engine { */ void *private_data; + /** + * @destroy: destroy schedule engine / cleanup in backend + */ + void (*destroy)(struct kref *kref); + /** * @kick_backend: kick backend after a request's priority has changed */ -- cgit v1.2.3 From d1cee2d37a62888b0fa4babe19bbb06bca976c9f Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Jul 2021 17:23:22 -0700 Subject: drm/i915: Move active request tracking to a vfunc Move active request tracking to a backend vfunc rather than assuming all backends want to do this in the manner. In the of case execlists / ring submission the tracking is on the physical engine while with GuC submission it is on the context. Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-8-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_context.c | 3 ++ drivers/gpu/drm/i915/gt/intel_context_types.h | 7 ++++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 6 ++++ .../gpu/drm/i915/gt/intel_execlists_submission.c | 40 +++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_ring_submission.c | 22 ++++++++++++ drivers/gpu/drm/i915/gt/mock_engine.c | 30 ++++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 33 +++++++++++++++++ drivers/gpu/drm/i915/i915_request.c | 41 +++------------------- drivers/gpu/drm/i915/i915_request.h | 2 ++ 9 files changed, 147 insertions(+), 37 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index baa05fddd690..0bf4a13e9759 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -393,6 +393,9 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) spin_lock_init(&ce->guc_state.lock); INIT_LIST_HEAD(&ce->guc_state.fences); + spin_lock_init(&ce->guc_active.lock); + INIT_LIST_HEAD(&ce->guc_active.requests); + ce->guc_id = GUC_INVALID_LRC_ID; INIT_LIST_HEAD(&ce->guc_id_link); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 542c98418771..035108c10b2c 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -162,6 +162,13 @@ struct intel_context { struct list_head fences; } guc_state; + struct { + /** lock: protects everything in guc_active */ + spinlock_t lock; + /** requests: active requests on this context */ + struct list_head requests; + } guc_active; + /* GuC scheduling state flags that do not require a lock. */ atomic_t guc_sched_state_no_lock; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index e55776e27a2a..1c7e2724cdae 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -414,6 +414,12 @@ struct intel_engine_cs { void (*release)(struct intel_engine_cs *engine); + /* + * Add / remove request from engine active tracking + */ + void (*add_active_request)(struct i915_request *rq); + void (*remove_active_request)(struct i915_request *rq); + struct intel_engine_execlists execlists; /* diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 6e4959ee5065..b4a876736074 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3157,6 +3157,42 @@ static void execlists_park(struct intel_engine_cs *engine) cancel_timer(&engine->execlists.preempt); } +static void add_to_engine(struct i915_request *rq) +{ + lockdep_assert_held(&rq->engine->sched_engine->lock); + list_move_tail(&rq->sched.link, &rq->engine->sched_engine->requests); +} + +static void remove_from_engine(struct i915_request *rq) +{ + struct intel_engine_cs *engine, *locked; + + /* + * Virtual engines complicate acquiring the engine timeline lock, + * as their rq->engine pointer is not stable until under that + * engine lock. The simple ploy we use is to take the lock then + * check that the rq still belongs to the newly locked engine. + */ + locked = READ_ONCE(rq->engine); + spin_lock_irq(&locked->sched_engine->lock); + while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { + spin_unlock(&locked->sched_engine->lock); + spin_lock(&engine->sched_engine->lock); + locked = engine; + } + list_del_init(&rq->sched.link); + + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); + + /* Prevent further __await_execution() registering a cb, then flush */ + set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); + + spin_unlock_irq(&locked->sched_engine->lock); + + i915_request_notify_execute_cb_imm(rq); +} + static bool can_preempt(struct intel_engine_cs *engine) { if (GRAPHICS_VER(engine->i915) > 8) @@ -3251,6 +3287,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->cops = &execlists_context_ops; engine->request_alloc = execlists_request_alloc; + engine->add_active_request = add_to_engine; + engine->remove_active_request = remove_from_engine; engine->reset.prepare = execlists_reset_prepare; engine->reset.rewind = execlists_reset_rewind; @@ -3847,6 +3885,8 @@ execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count) "v%dx%d", ve->base.class, count); ve->base.context_size = sibling->context_size; + ve->base.add_active_request = sibling->add_active_request; + ve->base.remove_active_request = sibling->remove_active_request; ve->base.emit_bb_start = sibling->emit_bb_start; ve->base.emit_flush = sibling->emit_flush; ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb; diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 5c4d204d07cc..a5404c7b600f 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -1047,6 +1047,25 @@ static void setup_irq(struct intel_engine_cs *engine) } } +static void add_to_engine(struct i915_request *rq) +{ + lockdep_assert_held(&rq->engine->sched_engine->lock); + list_move_tail(&rq->sched.link, &rq->engine->sched_engine->requests); +} + +static void remove_from_engine(struct i915_request *rq) +{ + spin_lock_irq(&rq->engine->sched_engine->lock); + list_del_init(&rq->sched.link); + + /* Prevent further __await_execution() registering a cb, then flush */ + set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); + + spin_unlock_irq(&rq->engine->sched_engine->lock); + + i915_request_notify_execute_cb_imm(rq); +} + static void setup_common(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; @@ -1064,6 +1083,9 @@ static void setup_common(struct intel_engine_cs *engine) engine->reset.cancel = reset_cancel; engine->reset.finish = reset_finish; + engine->add_active_request = add_to_engine; + engine->remove_active_request = remove_from_engine; + engine->cops = &ring_context_ops; engine->request_alloc = ring_request_alloc; diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 2ddaacb4e753..2c1af030310c 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -235,6 +235,34 @@ static void mock_submit_request(struct i915_request *request) spin_unlock_irqrestore(&engine->hw_lock, flags); } +static void mock_add_to_engine(struct i915_request *rq) +{ + lockdep_assert_held(&rq->engine->sched_engine->lock); + list_move_tail(&rq->sched.link, &rq->engine->sched_engine->requests); +} + +static void mock_remove_from_engine(struct i915_request *rq) +{ + struct intel_engine_cs *engine, *locked; + + /* + * Virtual engines complicate acquiring the engine timeline lock, + * as their rq->engine pointer is not stable until under that + * engine lock. The simple ploy we use is to take the lock then + * check that the rq still belongs to the newly locked engine. + */ + + locked = READ_ONCE(rq->engine); + spin_lock_irq(&locked->sched_engine->lock); + while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { + spin_unlock(&locked->sched_engine->lock); + spin_lock(&engine->sched_engine->lock); + locked = engine; + } + list_del_init(&rq->sched.link); + spin_unlock_irq(&locked->sched_engine->lock); +} + static void mock_reset_prepare(struct intel_engine_cs *engine) { } @@ -321,6 +349,8 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.emit_flush = mock_emit_flush; engine->base.emit_fini_breadcrumb = mock_emit_breadcrumb; engine->base.submit_request = mock_submit_request; + engine->base.add_active_request = mock_add_to_engine; + engine->base.remove_active_request = mock_remove_from_engine; engine->base.reset.prepare = mock_reset_prepare; engine->base.reset.rewind = mock_reset_rewind; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 60a73d9a00f6..2d49fbcfb88b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1159,6 +1159,33 @@ static int guc_context_alloc(struct intel_context *ce) return lrc_alloc(ce, ce->engine); } +static void add_to_context(struct i915_request *rq) +{ + struct intel_context *ce = rq->context; + + spin_lock(&ce->guc_active.lock); + list_move_tail(&rq->sched.link, &ce->guc_active.requests); + spin_unlock(&ce->guc_active.lock); +} + +static void remove_from_context(struct i915_request *rq) +{ + struct intel_context *ce = rq->context; + + spin_lock_irq(&ce->guc_active.lock); + + list_del_init(&rq->sched.link); + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + + /* Prevent further __await_execution() registering a cb, then flush */ + set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); + + spin_unlock_irq(&ce->guc_active.lock); + + atomic_dec(&ce->guc_id_ref); + i915_request_notify_execute_cb_imm(rq); +} + static const struct intel_context_ops guc_context_ops = { .alloc = guc_context_alloc, @@ -1568,6 +1595,8 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) engine->cops = &guc_context_ops; engine->request_alloc = guc_request_alloc; + engine->add_active_request = add_to_context; + engine->remove_active_request = remove_from_context; engine->sched_engine->schedule = i915_schedule; @@ -1932,6 +1961,10 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count) "v%dx%d", ve->base.class, count); ve->base.context_size = sibling->context_size; + ve->base.add_active_request = + sibling->add_active_request; + ve->base.remove_active_request = + sibling->remove_active_request; ve->base.emit_bb_start = sibling->emit_bb_start; ve->base.emit_flush = sibling->emit_flush; ve->base.emit_init_breadcrumb = diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 57c9187aff74..aeef45679897 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -182,7 +182,7 @@ static bool irq_work_imm(struct irq_work *wrk) return false; } -static void __notify_execute_cb_imm(struct i915_request *rq) +void i915_request_notify_execute_cb_imm(struct i915_request *rq) { __notify_execute_cb(rq, irq_work_imm); } @@ -256,37 +256,6 @@ i915_request_active_engine(struct i915_request *rq, return ret; } - -static void remove_from_engine(struct i915_request *rq) -{ - struct intel_engine_cs *engine, *locked; - - /* - * Virtual engines complicate acquiring the engine timeline lock, - * as their rq->engine pointer is not stable until under that - * engine lock. The simple ploy we use is to take the lock then - * check that the rq still belongs to the newly locked engine. - */ - locked = READ_ONCE(rq->engine); - spin_lock_irq(&locked->sched_engine->lock); - while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { - spin_unlock(&locked->sched_engine->lock); - spin_lock(&engine->sched_engine->lock); - locked = engine; - } - list_del_init(&rq->sched.link); - - clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); - clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); - - /* Prevent further __await_execution() registering a cb, then flush */ - set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); - - spin_unlock_irq(&locked->sched_engine->lock); - - __notify_execute_cb_imm(rq); -} - static void __rq_init_watchdog(struct i915_request *rq) { rq->watchdog.timer.function = NULL; @@ -383,9 +352,7 @@ bool i915_request_retire(struct i915_request *rq) * after removing the breadcrumb and signaling it, so that we do not * inadvertently attach the breadcrumb to a completed request. */ - if (!list_empty(&rq->sched.link)) - remove_from_engine(rq); - atomic_dec(&rq->context->guc_id_ref); + rq->engine->remove_active_request(rq); GEM_BUG_ON(!llist_empty(&rq->execute_cb)); __list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */ @@ -516,7 +483,7 @@ __await_execution(struct i915_request *rq, if (llist_add(&cb->work.node.llist, &signal->execute_cb)) { if (i915_request_is_active(signal) || __request_in_flight(signal)) - __notify_execute_cb_imm(signal); + i915_request_notify_execute_cb_imm(signal); } return 0; @@ -655,7 +622,7 @@ bool __i915_request_submit(struct i915_request *request) result = true; GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); - list_move_tail(&request->sched.link, &engine->sched_engine->requests); + engine->add_active_request(request); active: clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 717e5b292046..128030f43bbf 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -647,4 +647,6 @@ bool i915_request_active_engine(struct i915_request *rq, struct intel_engine_cs **active); +void i915_request_notify_execute_cb_imm(struct i915_request *rq); + #endif /* I915_REQUEST_H */ -- cgit v1.2.3 From eb5e7da736f36f558b9f8992f5e691a5b3b46b84 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Jul 2021 17:23:23 -0700 Subject: drm/i915/guc: Reset implementation for new GuC interface Reset implementation for new GuC interface. This is the legacy reset implementation which is called when the i915 owns the engine hang check. Future patches will offload the engine hang check to GuC but we will continue to maintain this legacy path as a fallback and this code path is also required if the GuC dies. With the new GuC interface it is not possible to reset individual engines - it is only possible to reset the GPU entirely. This patch forces an entire chip reset if any engine hangs. v2: (Michal) - Check for -EPIPE rather than -EIO (CT deadlock/corrupt check) v3: (John H) - Split into a series of smaller patches v4: (John H) - Fix typo - Add braces around if statements in reset code v5: (Checkpatch) - Fix warnings Cc: John Harrison Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-9-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 6 +- drivers/gpu/drm/i915/gt/intel_reset.c | 18 +- drivers/gpu/drm/i915/gt/uc/intel_guc.c | 13 - drivers/gpu/drm/i915/gt/uc/intel_guc.h | 8 +- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 562 ++++++++++++++++++---- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 38 +- drivers/gpu/drm/i915/gt/uc/intel_uc.h | 3 + 7 files changed, 516 insertions(+), 132 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index d86825437516..cd7b96005d29 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -170,8 +170,6 @@ static void gt_sanitize(struct intel_gt *gt, bool force) if (intel_gt_is_wedged(gt)) intel_gt_unset_wedged(gt); - intel_uc_sanitize(>->uc); - for_each_engine(engine, gt, id) if (engine->reset.prepare) engine->reset.prepare(engine); @@ -187,6 +185,8 @@ static void gt_sanitize(struct intel_gt *gt, bool force) __intel_engine_reset(engine, false); } + intel_uc_reset(>->uc, false); + for_each_engine(engine, gt, id) if (engine->reset.finish) engine->reset.finish(engine); @@ -239,6 +239,8 @@ int intel_gt_resume(struct intel_gt *gt) goto err_wedged; } + intel_uc_reset_finish(>->uc); + intel_rps_enable(>->rps); intel_llc_enable(>->llc); diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 9586613ee399..721a10e2215e 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -832,6 +832,8 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) __intel_engine_reset(engine, stalled_mask & engine->mask); local_bh_enable(); + intel_uc_reset(>->uc, true); + intel_ggtt_restore_fences(gt->ggtt); return err; @@ -856,6 +858,8 @@ static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake) if (awake & engine->mask) intel_engine_pm_put(engine); } + + intel_uc_reset_finish(>->uc); } static void nop_submit_request(struct i915_request *request) @@ -909,6 +913,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt) for_each_engine(engine, gt, id) if (engine->reset.cancel) engine->reset.cancel(engine); + intel_uc_cancel_requests(>->uc); local_bh_enable(); reset_finish(gt, awake); @@ -1197,6 +1202,9 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg) ENGINE_TRACE(engine, "flags=%lx\n", gt->reset.flags); GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, >->reset.flags)); + if (intel_engine_uses_guc(engine)) + return -ENODEV; + if (!intel_engine_pm_get_if_awake(engine)) return 0; @@ -1207,13 +1215,10 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg) "Resetting %s for %s\n", engine->name, msg); atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]); - if (intel_engine_uses_guc(engine)) - ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine); - else - ret = intel_gt_reset_engine(engine); + ret = intel_gt_reset_engine(engine); if (ret) { /* If we fail here, we expect to fallback to a global reset */ - ENGINE_TRACE(engine, "Failed to reset, err: %d\n", ret); + ENGINE_TRACE(engine, "Failed to reset %s, err: %d\n", engine->name, ret); goto out; } @@ -1347,7 +1352,8 @@ void intel_gt_handle_error(struct intel_gt *gt, * Try engine reset when available. We fall back to full reset if * single reset fails. */ - if (intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) { + if (!intel_uc_uses_guc_submission(>->uc) && + intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) { local_bh_disable(); for_each_engine_masked(engine, gt, engine_mask, tmp) { BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 6661dcb02239..9b09395b998f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -572,19 +572,6 @@ int intel_guc_suspend(struct intel_guc *guc) return 0; } -/** - * intel_guc_reset_engine() - ask GuC to reset an engine - * @guc: intel_guc structure - * @engine: engine to be reset - */ -int intel_guc_reset_engine(struct intel_guc *guc, - struct intel_engine_cs *engine) -{ - /* XXX: to be implemented with submission interface rework */ - - return -ENODEV; -} - /** * intel_guc_resume() - notify GuC resuming from suspend state * @guc: the guc diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index d4987cd789ea..a068e3262676 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -249,14 +249,16 @@ static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask) int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout); -int intel_guc_reset_engine(struct intel_guc *guc, - struct intel_engine_cs *engine); - int intel_guc_deregister_done_process_msg(struct intel_guc *guc, const u32 *msg, u32 len); int intel_guc_sched_done_process_msg(struct intel_guc *guc, const u32 *msg, u32 len); +void intel_guc_submission_reset_prepare(struct intel_guc *guc); +void intel_guc_submission_reset(struct intel_guc *guc, bool stalled); +void intel_guc_submission_reset_finish(struct intel_guc *guc); +void intel_guc_submission_cancel_requests(struct intel_guc *guc); + void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 2d49fbcfb88b..be13a33ef17e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -141,7 +141,7 @@ context_wait_for_deregister_to_register(struct intel_context *ce) static inline void set_context_wait_for_deregister_to_register(struct intel_context *ce) { - /* Only should be called from guc_lrc_desc_pin() */ + /* Only should be called from guc_lrc_desc_pin() without lock */ ce->guc_state.sched_state |= SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; } @@ -239,15 +239,31 @@ static int guc_lrc_desc_pool_create(struct intel_guc *guc) static void guc_lrc_desc_pool_destroy(struct intel_guc *guc) { + guc->lrc_desc_pool_vaddr = NULL; i915_vma_unpin_and_release(&guc->lrc_desc_pool, I915_VMA_RELEASE_MAP); } +static inline bool guc_submission_initialized(struct intel_guc *guc) +{ + return !!guc->lrc_desc_pool_vaddr; +} + static inline void reset_lrc_desc(struct intel_guc *guc, u32 id) { - struct guc_lrc_desc *desc = __get_lrc_desc(guc, id); + if (likely(guc_submission_initialized(guc))) { + struct guc_lrc_desc *desc = __get_lrc_desc(guc, id); + unsigned long flags; - memset(desc, 0, sizeof(*desc)); - xa_erase_irq(&guc->context_lookup, id); + memset(desc, 0, sizeof(*desc)); + + /* + * xarray API doesn't have xa_erase_irqsave wrapper, so calling + * the lower level functions directly. + */ + xa_lock_irqsave(&guc->context_lookup, flags); + __xa_erase(&guc->context_lookup, id); + xa_unlock_irqrestore(&guc->context_lookup, flags); + } } static inline bool lrc_desc_registered(struct intel_guc *guc, u32 id) @@ -258,7 +274,15 @@ static inline bool lrc_desc_registered(struct intel_guc *guc, u32 id) static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id, struct intel_context *ce) { - xa_store_irq(&guc->context_lookup, id, ce, GFP_ATOMIC); + unsigned long flags; + + /* + * xarray API doesn't have xa_save_irqsave wrapper, so calling the + * lower level functions directly. + */ + xa_lock_irqsave(&guc->context_lookup, flags); + __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC); + xa_unlock_irqrestore(&guc->context_lookup, flags); } static int guc_submission_send_busy_loop(struct intel_guc *guc, @@ -327,6 +351,8 @@ int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) true, timeout); } +static int guc_lrc_desc_pin(struct intel_context *ce, bool loop); + static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) { int err; @@ -334,11 +360,22 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) u32 action[3]; int len = 0; u32 g2h_len_dw = 0; - bool enabled = context_enabled(ce); + bool enabled; GEM_BUG_ON(!atomic_read(&ce->guc_id_ref)); GEM_BUG_ON(context_guc_id_invalid(ce)); + /* + * Corner case where the GuC firmware was blown away and reloaded while + * this context was pinned. + */ + if (unlikely(!lrc_desc_registered(guc, ce->guc_id))) { + err = guc_lrc_desc_pin(ce, false); + if (unlikely(err)) + goto out; + } + enabled = context_enabled(ce); + if (!enabled) { action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; action[len++] = ce->guc_id; @@ -361,6 +398,7 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) intel_context_put(ce); } +out: return err; } @@ -415,15 +453,10 @@ done: if (submit) { guc_set_lrc_tail(last); resubmit: - /* - * We only check for -EBUSY here even though it is possible for - * -EDEADLK to be returned. If -EDEADLK is returned, the GuC has - * died and a full GT reset needs to be done. The hangcheck will - * eventually detect that the GuC has died and trigger this - * reset so no need to handle -EDEADLK here. - */ ret = guc_add_request(guc, last); - if (ret == -EBUSY) { + if (unlikely(ret == -EPIPE)) + goto deadlk; + else if (ret == -EBUSY) { tasklet_schedule(&sched_engine->tasklet); guc->stalled_request = last; return false; @@ -433,6 +466,11 @@ resubmit: guc->stalled_request = NULL; return submit; + +deadlk: + sched_engine->tasklet.callback = NULL; + tasklet_disable_nosync(&sched_engine->tasklet); + return false; } static void guc_submission_tasklet(struct tasklet_struct *t) @@ -459,27 +497,167 @@ static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir) intel_engine_signal_breadcrumbs(engine); } -static void guc_reset_prepare(struct intel_engine_cs *engine) +static void __guc_context_destroy(struct intel_context *ce); +static void release_guc_id(struct intel_guc *guc, struct intel_context *ce); +static void guc_signal_context_fence(struct intel_context *ce); + +static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) +{ + struct intel_context *ce; + unsigned long index, flags; + bool pending_disable, pending_enable, deregister, destroyed; + + xa_for_each(&guc->context_lookup, index, ce) { + /* Flush context */ + spin_lock_irqsave(&ce->guc_state.lock, flags); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + /* + * Once we are at this point submission_disabled() is guaranteed + * to be visible to all callers who set the below flags (see above + * flush and flushes in reset_prepare). If submission_disabled() + * is set, the caller shouldn't set these flags. + */ + + destroyed = context_destroyed(ce); + pending_enable = context_pending_enable(ce); + pending_disable = context_pending_disable(ce); + deregister = context_wait_for_deregister_to_register(ce); + init_sched_state(ce); + + if (pending_enable || destroyed || deregister) { + atomic_dec(&guc->outstanding_submission_g2h); + if (deregister) + guc_signal_context_fence(ce); + if (destroyed) { + release_guc_id(guc, ce); + __guc_context_destroy(ce); + } + if (pending_enable || deregister) + intel_context_put(ce); + } + + /* Not mutualy exclusive with above if statement. */ + if (pending_disable) { + guc_signal_context_fence(ce); + intel_context_sched_disable_unpin(ce); + atomic_dec(&guc->outstanding_submission_g2h); + intel_context_put(ce); + } + } +} + +static inline bool +submission_disabled(struct intel_guc *guc) +{ + struct i915_sched_engine * const sched_engine = guc->sched_engine; + + return unlikely(!sched_engine || + !__tasklet_is_enabled(&sched_engine->tasklet)); +} + +static void disable_submission(struct intel_guc *guc) +{ + struct i915_sched_engine * const sched_engine = guc->sched_engine; + + if (__tasklet_is_enabled(&sched_engine->tasklet)) { + GEM_BUG_ON(!guc->ct.enabled); + __tasklet_disable_sync_once(&sched_engine->tasklet); + sched_engine->tasklet.callback = NULL; + } +} + +static void enable_submission(struct intel_guc *guc) { - ENGINE_TRACE(engine, "\n"); + struct i915_sched_engine * const sched_engine = guc->sched_engine; + unsigned long flags; + + spin_lock_irqsave(&guc->sched_engine->lock, flags); + sched_engine->tasklet.callback = guc_submission_tasklet; + wmb(); /* Make sure callback visible */ + if (!__tasklet_is_enabled(&sched_engine->tasklet) && + __tasklet_enable(&sched_engine->tasklet)) { + GEM_BUG_ON(!guc->ct.enabled); + + /* And kick in case we missed a new request submission. */ + tasklet_hi_schedule(&sched_engine->tasklet); + } + spin_unlock_irqrestore(&guc->sched_engine->lock, flags); +} + +static void guc_flush_submissions(struct intel_guc *guc) +{ + struct i915_sched_engine * const sched_engine = guc->sched_engine; + unsigned long flags; + + spin_lock_irqsave(&sched_engine->lock, flags); + spin_unlock_irqrestore(&sched_engine->lock, flags); +} + +void intel_guc_submission_reset_prepare(struct intel_guc *guc) +{ + int i; + + if (unlikely(!guc_submission_initialized(guc))) { + /* Reset called during driver load? GuC not yet initialised! */ + return; + } + + disable_submission(guc); + guc->interrupts.disable(guc); + + /* Flush IRQ handler */ + spin_lock_irq(&guc_to_gt(guc)->irq_lock); + spin_unlock_irq(&guc_to_gt(guc)->irq_lock); + + guc_flush_submissions(guc); /* - * Prevent request submission to the hardware until we have - * completed the reset in i915_gem_reset_finish(). If a request - * is completed by one engine, it may then queue a request - * to a second via its execlists->tasklet *just* as we are - * calling engine->init_hw() and also writing the ELSP. - * Turning off the execlists->tasklet until the reset is over - * prevents the race. + * Handle any outstanding G2Hs before reset. Call IRQ handler directly + * each pass as interrupt have been disabled. We always scrub for + * outstanding G2H as it is possible for outstanding_submission_g2h to + * be incremented after the context state update. */ - __tasklet_disable_sync_once(&engine->sched_engine->tasklet); + for (i = 0; i < 4 && atomic_read(&guc->outstanding_submission_g2h); ++i) { + intel_guc_to_host_event_handler(guc); +#define wait_for_reset(guc, wait_var) \ + guc_wait_for_pending_msg(guc, wait_var, false, (HZ / 20)) + do { + wait_for_reset(guc, &guc->outstanding_submission_g2h); + } while (!list_empty(&guc->ct.requests.incoming)); + } + scrub_guc_desc_for_outstanding_g2h(guc); } -static void guc_reset_state(struct intel_context *ce, - struct intel_engine_cs *engine, - u32 head, - bool scrub) +static struct intel_engine_cs * +guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling) { + struct intel_engine_cs *engine; + intel_engine_mask_t tmp, mask = ve->mask; + unsigned int num_siblings = 0; + + for_each_engine_masked(engine, ve->gt, mask, tmp) + if (num_siblings++ == sibling) + return engine; + + return NULL; +} + +static inline struct intel_engine_cs * +__context_to_physical_engine(struct intel_context *ce) +{ + struct intel_engine_cs *engine = ce->engine; + + if (intel_engine_is_virtual(engine)) + engine = guc_virtual_get_sibling(engine, 0); + + return engine; +} + +static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) +{ + struct intel_engine_cs *engine = __context_to_physical_engine(ce); + GEM_BUG_ON(!intel_context_is_pinned(ce)); /* @@ -497,42 +675,148 @@ static void guc_reset_state(struct intel_context *ce, lrc_update_regs(ce, engine, head); } -static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled) +static void guc_reset_nop(struct intel_engine_cs *engine) { - struct intel_engine_execlists * const execlists = &engine->execlists; - struct i915_request *rq; +} + +static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled) +{ +} + +static void +__unwind_incomplete_requests(struct intel_context *ce) +{ + struct i915_request *rq, *rn; + struct list_head *pl; + int prio = I915_PRIORITY_INVALID; + struct i915_sched_engine * const sched_engine = + ce->engine->sched_engine; + unsigned long flags; + + spin_lock_irqsave(&sched_engine->lock, flags); + spin_lock(&ce->guc_active.lock); + list_for_each_entry_safe(rq, rn, + &ce->guc_active.requests, + sched.link) { + if (i915_request_completed(rq)) + continue; + + list_del_init(&rq->sched.link); + spin_unlock(&ce->guc_active.lock); + + __i915_request_unsubmit(rq); + + /* Push the request back into the queue for later resubmission. */ + GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); + if (rq_prio(rq) != prio) { + prio = rq_prio(rq); + pl = i915_sched_lookup_priolist(sched_engine, prio); + } + GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine)); + + list_add_tail(&rq->sched.link, pl); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + + spin_lock(&ce->guc_active.lock); + } + spin_unlock(&ce->guc_active.lock); + spin_unlock_irqrestore(&sched_engine->lock, flags); +} + +static struct i915_request *context_find_active_request(struct intel_context *ce) +{ + struct i915_request *rq, *active = NULL; unsigned long flags; - spin_lock_irqsave(&engine->sched_engine->lock, flags); + spin_lock_irqsave(&ce->guc_active.lock, flags); + list_for_each_entry_reverse(rq, &ce->guc_active.requests, + sched.link) { + if (i915_request_completed(rq)) + break; + + active = rq; + } + spin_unlock_irqrestore(&ce->guc_active.lock, flags); + + return active; +} + +static void __guc_reset_context(struct intel_context *ce, bool stalled) +{ + struct i915_request *rq; + u32 head; + + /* + * GuC will implicitly mark the context as non-schedulable + * when it sends the reset notification. Make sure our state + * reflects this change. The context will be marked enabled + * on resubmission. + */ + clr_context_enabled(ce); - /* Push back any incomplete requests for replay after the reset. */ - rq = execlists_unwind_incomplete_requests(execlists); - if (!rq) - goto out_unlock; + rq = context_find_active_request(ce); + if (!rq) { + head = ce->ring->tail; + stalled = false; + goto out_replay; + } if (!i915_request_started(rq)) stalled = false; + GEM_BUG_ON(i915_active_is_idle(&ce->active)); + head = intel_ring_wrap(ce->ring, rq->head); __i915_request_reset(rq, stalled); - guc_reset_state(rq->context, engine, rq->head, stalled); -out_unlock: - spin_unlock_irqrestore(&engine->sched_engine->lock, flags); +out_replay: + guc_reset_state(ce, head, stalled); + __unwind_incomplete_requests(ce); +} + +void intel_guc_submission_reset(struct intel_guc *guc, bool stalled) +{ + struct intel_context *ce; + unsigned long index; + + if (unlikely(!guc_submission_initialized(guc))) { + /* Reset called during driver load? GuC not yet initialised! */ + return; + } + + xa_for_each(&guc->context_lookup, index, ce) + if (intel_context_is_pinned(ce)) + __guc_reset_context(ce, stalled); + + /* GuC is blown away, drop all references to contexts */ + xa_destroy(&guc->context_lookup); +} + +static void guc_cancel_context_requests(struct intel_context *ce) +{ + struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine; + struct i915_request *rq; + unsigned long flags; + + /* Mark all executing requests as skipped. */ + spin_lock_irqsave(&sched_engine->lock, flags); + spin_lock(&ce->guc_active.lock); + list_for_each_entry(rq, &ce->guc_active.requests, sched.link) + i915_request_put(i915_request_mark_eio(rq)); + spin_unlock(&ce->guc_active.lock); + spin_unlock_irqrestore(&sched_engine->lock, flags); } -static void guc_reset_cancel(struct intel_engine_cs *engine) +static void +guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine) { - struct i915_sched_engine * const sched_engine = engine->sched_engine; struct i915_request *rq, *rn; struct rb_node *rb; unsigned long flags; /* Can be called during boot if GuC fails to load */ - if (!engine->gt) + if (!sched_engine) return; - ENGINE_TRACE(engine, "\n"); - /* * Before we call engine->cancel_requests(), we should have exclusive * access to the submission state. This is arranged for us by the @@ -549,21 +833,16 @@ static void guc_reset_cancel(struct intel_engine_cs *engine) */ spin_lock_irqsave(&sched_engine->lock, flags); - /* Mark all executing requests as skipped. */ - list_for_each_entry(rq, &sched_engine->requests, sched.link) { - i915_request_set_error_once(rq, -EIO); - i915_request_mark_complete(rq); - } - /* Flush the queued requests to the timeline list (for retiring). */ while ((rb = rb_first_cached(&sched_engine->queue))) { struct i915_priolist *p = to_priolist(rb); priolist_for_each_request_consume(rq, rn, p) { list_del_init(&rq->sched.link); + __i915_request_submit(rq); - dma_fence_set_error(&rq->fence, -EIO); - i915_request_mark_complete(rq); + + i915_request_put(i915_request_mark_eio(rq)); } rb_erase_cached(&p->node, &sched_engine->queue); @@ -578,14 +857,39 @@ static void guc_reset_cancel(struct intel_engine_cs *engine) spin_unlock_irqrestore(&sched_engine->lock, flags); } -static void guc_reset_finish(struct intel_engine_cs *engine) +void intel_guc_submission_cancel_requests(struct intel_guc *guc) { - if (__tasklet_enable(&engine->sched_engine->tasklet)) - /* And kick in case we missed a new request submission. */ - tasklet_hi_schedule(&engine->sched_engine->tasklet); + struct intel_context *ce; + unsigned long index; + + xa_for_each(&guc->context_lookup, index, ce) + if (intel_context_is_pinned(ce)) + guc_cancel_context_requests(ce); + + guc_cancel_sched_engine_requests(guc->sched_engine); + + /* GuC is blown away, drop all references to contexts */ + xa_destroy(&guc->context_lookup); +} + +void intel_guc_submission_reset_finish(struct intel_guc *guc) +{ + /* Reset called during driver load or during wedge? */ + if (unlikely(!guc_submission_initialized(guc) || + test_bit(I915_WEDGED, &guc_to_gt(guc)->reset.flags))) { + return; + } + + /* + * Technically possible for either of these values to be non-zero here, + * but very unlikely + harmless. Regardless let's add a warn so we can + * see in CI if this happens frequently / a precursor to taking down the + * machine. + */ + GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); + atomic_set(&guc->outstanding_submission_g2h, 0); - ENGINE_TRACE(engine, "depth->%d\n", - atomic_read(&engine->sched_engine->tasklet.count)); + enable_submission(guc); } /* @@ -652,6 +956,9 @@ static int guc_bypass_tasklet_submit(struct intel_guc *guc, else trace_i915_request_guc_submit(rq); + if (unlikely(ret == -EPIPE)) + disable_submission(guc); + return ret; } @@ -664,7 +971,8 @@ static void guc_submit_request(struct i915_request *rq) /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&sched_engine->lock, flags); - if (guc->stalled_request || !i915_sched_engine_is_empty(sched_engine)) + if (submission_disabled(guc) || guc->stalled_request || + !i915_sched_engine_is_empty(sched_engine)) queue_request(sched_engine, rq, rq_prio(rq)); else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY) tasklet_hi_schedule(&sched_engine->tasklet); @@ -807,7 +1115,8 @@ static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) static int __guc_action_register_context(struct intel_guc *guc, u32 guc_id, - u32 offset) + u32 offset, + bool loop) { u32 action[] = { INTEL_GUC_ACTION_REGISTER_CONTEXT, @@ -816,10 +1125,10 @@ static int __guc_action_register_context(struct intel_guc *guc, }; return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), - 0, true); + 0, loop); } -static int register_context(struct intel_context *ce) +static int register_context(struct intel_context *ce, bool loop) { struct intel_guc *guc = ce_to_guc(ce); u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) + @@ -827,11 +1136,12 @@ static int register_context(struct intel_context *ce) trace_intel_context_register(ce); - return __guc_action_register_context(guc, ce->guc_id, offset); + return __guc_action_register_context(guc, ce->guc_id, offset, loop); } static int __guc_action_deregister_context(struct intel_guc *guc, - u32 guc_id) + u32 guc_id, + bool loop) { u32 action[] = { INTEL_GUC_ACTION_DEREGISTER_CONTEXT, @@ -840,16 +1150,16 @@ static int __guc_action_deregister_context(struct intel_guc *guc, return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), G2H_LEN_DW_DEREGISTER_CONTEXT, - true); + loop); } -static int deregister_context(struct intel_context *ce, u32 guc_id) +static int deregister_context(struct intel_context *ce, u32 guc_id, bool loop) { struct intel_guc *guc = ce_to_guc(ce); trace_intel_context_deregister(ce); - return __guc_action_deregister_context(guc, guc_id); + return __guc_action_deregister_context(guc, guc_id, loop); } static intel_engine_mask_t adjust_engine_mask(u8 class, intel_engine_mask_t mask) @@ -878,7 +1188,7 @@ static void guc_context_policy_init(struct intel_engine_cs *engine, desc->preemption_timeout = CONTEXT_POLICY_DEFAULT_PREEMPTION_TIME_US; } -static int guc_lrc_desc_pin(struct intel_context *ce) +static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) { struct intel_engine_cs *engine = ce->engine; struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; @@ -923,18 +1233,46 @@ static int guc_lrc_desc_pin(struct intel_context *ce) */ if (context_registered) { trace_intel_context_steal_guc_id(ce); - set_context_wait_for_deregister_to_register(ce); - intel_context_get(ce); + if (!loop) { + set_context_wait_for_deregister_to_register(ce); + intel_context_get(ce); + } else { + bool disabled; + unsigned long flags; + + /* Seal race with Reset */ + spin_lock_irqsave(&ce->guc_state.lock, flags); + disabled = submission_disabled(guc); + if (likely(!disabled)) { + set_context_wait_for_deregister_to_register(ce); + intel_context_get(ce); + } + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + if (unlikely(disabled)) { + reset_lrc_desc(guc, desc_idx); + return 0; /* Will get registered later */ + } + } /* * If stealing the guc_id, this ce has the same guc_id as the * context whose guc_id was stolen. */ with_intel_runtime_pm(runtime_pm, wakeref) - ret = deregister_context(ce, ce->guc_id); + ret = deregister_context(ce, ce->guc_id, loop); + if (unlikely(ret == -EBUSY)) { + clr_context_wait_for_deregister_to_register(ce); + intel_context_put(ce); + } else if (unlikely(ret == -ENODEV)) { + ret = 0; /* Will get registered later */ + } } else { with_intel_runtime_pm(runtime_pm, wakeref) - ret = register_context(ce); + ret = register_context(ce, loop); + if (unlikely(ret == -EBUSY)) + reset_lrc_desc(guc, desc_idx); + else if (unlikely(ret == -ENODEV)) + ret = 0; /* Will get registered later */ } return ret; @@ -1002,7 +1340,6 @@ static void __guc_context_sched_disable(struct intel_guc *guc, GEM_BUG_ON(guc_id == GUC_INVALID_LRC_ID); trace_intel_context_sched_disable(ce); - intel_context_get(ce); guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); @@ -1014,6 +1351,7 @@ static u16 prep_context_pending_disable(struct intel_context *ce) set_context_pending_disable(ce); clr_context_enabled(ce); + intel_context_get(ce); return ce->guc_id; } @@ -1026,7 +1364,7 @@ static void guc_context_sched_disable(struct intel_context *ce) u16 guc_id; intel_wakeref_t wakeref; - if (context_guc_id_invalid(ce) || + if (submission_disabled(guc) || context_guc_id_invalid(ce) || !lrc_desc_registered(guc, ce->guc_id)) { clr_context_enabled(ce); goto unpin; @@ -1065,17 +1403,12 @@ unpin: static inline void guc_lrc_desc_unpin(struct intel_context *ce) { struct intel_guc *guc = ce_to_guc(ce); - unsigned long flags; GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id)); GEM_BUG_ON(ce != __get_context(guc, ce->guc_id)); GEM_BUG_ON(context_enabled(ce)); - spin_lock_irqsave(&ce->guc_state.lock, flags); - set_context_destroyed(ce); - spin_unlock_irqrestore(&ce->guc_state.lock, flags); - - deregister_context(ce, ce->guc_id); + deregister_context(ce, ce->guc_id, true); } static void __guc_context_destroy(struct intel_context *ce) @@ -1103,16 +1436,18 @@ static void guc_context_destroy(struct kref *kref) struct intel_guc *guc = ce_to_guc(ce); intel_wakeref_t wakeref; unsigned long flags; + bool disabled; /* * If the guc_id is invalid this context has been stolen and we can free * it immediately. Also can be freed immediately if the context is not - * registered with the GuC. + * registered with the GuC or the GuC is in the middle of a reset. */ if (context_guc_id_invalid(ce)) { __guc_context_destroy(ce); return; - } else if (!lrc_desc_registered(guc, ce->guc_id)) { + } else if (submission_disabled(guc) || + !lrc_desc_registered(guc, ce->guc_id)) { release_guc_id(guc, ce); __guc_context_destroy(ce); return; @@ -1137,6 +1472,18 @@ static void guc_context_destroy(struct kref *kref) list_del_init(&ce->guc_id_link); spin_unlock_irqrestore(&guc->contexts_lock, flags); + /* Seal race with Reset */ + spin_lock_irqsave(&ce->guc_state.lock, flags); + disabled = submission_disabled(guc); + if (likely(!disabled)) + set_context_destroyed(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + if (unlikely(disabled)) { + release_guc_id(guc, ce); + __guc_context_destroy(ce); + return; + } + /* * We defer GuC context deregistration until the context is destroyed * in order to save on CTBs. With this optimization ideally we only need @@ -1224,8 +1571,6 @@ static void guc_signal_context_fence(struct intel_context *ce) { unsigned long flags; - GEM_BUG_ON(!context_wait_for_deregister_to_register(ce)); - spin_lock_irqsave(&ce->guc_state.lock, flags); clr_context_wait_for_deregister_to_register(ce); __guc_signal_context_fence(ce); @@ -1234,8 +1579,9 @@ static void guc_signal_context_fence(struct intel_context *ce) static bool context_needs_register(struct intel_context *ce, bool new_guc_id) { - return new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || - !lrc_desc_registered(ce_to_guc(ce), ce->guc_id); + return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || + !lrc_desc_registered(ce_to_guc(ce), ce->guc_id)) && + !submission_disabled(ce_to_guc(ce)); } static int guc_request_alloc(struct i915_request *rq) @@ -1293,8 +1639,12 @@ static int guc_request_alloc(struct i915_request *rq) if (unlikely(ret < 0)) return ret; if (context_needs_register(ce, !!ret)) { - ret = guc_lrc_desc_pin(ce); + ret = guc_lrc_desc_pin(ce, true); if (unlikely(ret)) { /* unwind */ + if (ret == -EPIPE) { + disable_submission(guc); + goto out; /* GPU will be reset */ + } atomic_dec(&ce->guc_id_ref); unpin_guc_id(guc, ce); return ret; @@ -1331,20 +1681,6 @@ out: return 0; } -static struct intel_engine_cs * -guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling) -{ - struct intel_engine_cs *engine; - intel_engine_mask_t tmp, mask = ve->mask; - unsigned int num_siblings = 0; - - for_each_engine_masked(engine, ve->gt, mask, tmp) - if (num_siblings++ == sibling) - return engine; - - return NULL; -} - static int guc_virtual_context_pre_pin(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr) @@ -1540,7 +1876,7 @@ static inline void guc_kernel_context_pin(struct intel_guc *guc, { if (context_guc_id_invalid(ce)) pin_guc_id(guc, ce); - guc_lrc_desc_pin(ce); + guc_lrc_desc_pin(ce, true); } static inline void guc_init_lrc_mapping(struct intel_guc *guc) @@ -1600,10 +1936,10 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) engine->sched_engine->schedule = i915_schedule; - engine->reset.prepare = guc_reset_prepare; - engine->reset.rewind = guc_reset_rewind; - engine->reset.cancel = guc_reset_cancel; - engine->reset.finish = guc_reset_finish; + engine->reset.prepare = guc_reset_nop; + engine->reset.rewind = guc_rewind_nop; + engine->reset.cancel = guc_reset_nop; + engine->reset.finish = guc_reset_nop; engine->emit_flush = gen8_emit_flush_xcs; engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; @@ -1652,6 +1988,17 @@ static inline void guc_default_irqs(struct intel_engine_cs *engine) intel_engine_set_irq_handler(engine, cs_irq_handler); } +static void guc_sched_engine_destroy(struct kref *kref) +{ + struct i915_sched_engine *sched_engine = + container_of(kref, typeof(*sched_engine), ref); + struct intel_guc *guc = sched_engine->private_data; + + guc->sched_engine = NULL; + tasklet_kill(&sched_engine->tasklet); /* flush the callback */ + kfree(sched_engine); +} + int intel_guc_submission_setup(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; @@ -1670,6 +2017,7 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine) guc->sched_engine->schedule = i915_schedule; guc->sched_engine->private_data = guc; + guc->sched_engine->destroy = guc_sched_engine_destroy; tasklet_setup(&guc->sched_engine->tasklet, guc_submission_tasklet); } @@ -1776,7 +2124,7 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc, * register this context. */ with_intel_runtime_pm(runtime_pm, wakeref) - register_context(ce); + register_context(ce, true); guc_signal_context_fence(ce); intel_context_put(ce); } else if (context_destroyed(ce)) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 6d8b9233214e..9a5ef1dca022 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -565,12 +565,48 @@ void intel_uc_reset_prepare(struct intel_uc *uc) { struct intel_guc *guc = &uc->guc; - if (!intel_guc_is_ready(guc)) + /* Nothing to do if GuC isn't supported */ + if (!intel_uc_supports_guc(uc)) return; + /* Firmware expected to be running when this function is called */ + if (!intel_guc_is_ready(guc)) + goto sanitize; + + if (intel_uc_uses_guc_submission(uc)) + intel_guc_submission_reset_prepare(guc); + +sanitize: __uc_sanitize(uc); } +void intel_uc_reset(struct intel_uc *uc, bool stalled) +{ + struct intel_guc *guc = &uc->guc; + + /* Firmware can not be running when this function is called */ + if (intel_uc_uses_guc_submission(uc)) + intel_guc_submission_reset(guc, stalled); +} + +void intel_uc_reset_finish(struct intel_uc *uc) +{ + struct intel_guc *guc = &uc->guc; + + /* Firmware expected to be running when this function is called */ + if (intel_guc_is_fw_running(guc) && intel_uc_uses_guc_submission(uc)) + intel_guc_submission_reset_finish(guc); +} + +void intel_uc_cancel_requests(struct intel_uc *uc) +{ + struct intel_guc *guc = &uc->guc; + + /* Firmware can not be running when this function is called */ + if (intel_uc_uses_guc_submission(uc)) + intel_guc_submission_cancel_requests(guc); +} + void intel_uc_runtime_suspend(struct intel_uc *uc) { struct intel_guc *guc = &uc->guc; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h index c4cef885e984..eaa3202192ac 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h @@ -37,6 +37,9 @@ void intel_uc_driver_late_release(struct intel_uc *uc); void intel_uc_driver_remove(struct intel_uc *uc); void intel_uc_init_mmio(struct intel_uc *uc); void intel_uc_reset_prepare(struct intel_uc *uc); +void intel_uc_reset(struct intel_uc *uc, bool stalled); +void intel_uc_reset_finish(struct intel_uc *uc); +void intel_uc_cancel_requests(struct intel_uc *uc); void intel_uc_suspend(struct intel_uc *uc); void intel_uc_runtime_suspend(struct intel_uc *uc); int intel_uc_resume(struct intel_uc *uc); -- cgit v1.2.3 From c41ee2873eb37ffff1b4185262b8efefc0c0d6e3 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Jul 2021 17:23:24 -0700 Subject: drm/i915: Reset GPU immediately if submission is disabled If submission is disabled by the backend for any reason, reset the GPU immediately in the heartbeat code as the backend can't be reenabled until the GPU is reset. Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-10-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c | 62 ++++++++++++++++++----- drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h | 4 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 9 ++++ drivers/gpu/drm/i915/i915_scheduler.c | 6 +++ drivers/gpu/drm/i915/i915_scheduler.h | 6 +++ drivers/gpu/drm/i915/i915_scheduler_types.h | 5 ++ 6 files changed, 79 insertions(+), 13 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index b6a305e6a974..0b16f19c384e 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -70,12 +70,30 @@ static void show_heartbeat(const struct i915_request *rq, { struct drm_printer p = drm_debug_printer("heartbeat"); - intel_engine_dump(engine, &p, - "%s heartbeat {seqno:%llx:%lld, prio:%d} not ticking\n", - engine->name, - rq->fence.context, - rq->fence.seqno, - rq->sched.attr.priority); + if (!rq) { + intel_engine_dump(engine, &p, + "%s heartbeat not ticking\n", + engine->name); + } else { + intel_engine_dump(engine, &p, + "%s heartbeat {seqno:%llx:%lld, prio:%d} not ticking\n", + engine->name, + rq->fence.context, + rq->fence.seqno, + rq->sched.attr.priority); + } +} + +static void +reset_engine(struct intel_engine_cs *engine, struct i915_request *rq) +{ + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + show_heartbeat(rq, engine); + + intel_gt_handle_error(engine->gt, engine->mask, + I915_ERROR_CAPTURE, + "stopped heartbeat on %s", + engine->name); } static void heartbeat(struct work_struct *wrk) @@ -102,6 +120,11 @@ static void heartbeat(struct work_struct *wrk) if (intel_gt_is_wedged(engine->gt)) goto out; + if (i915_sched_engine_disabled(engine->sched_engine)) { + reset_engine(engine, engine->heartbeat.systole); + goto out; + } + if (engine->heartbeat.systole) { long delay = READ_ONCE(engine->props.heartbeat_interval_ms); @@ -139,13 +162,7 @@ static void heartbeat(struct work_struct *wrk) engine->sched_engine->schedule(rq, &attr); local_bh_enable(); } else { - if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - show_heartbeat(rq, engine); - - intel_gt_handle_error(engine->gt, engine->mask, - I915_ERROR_CAPTURE, - "stopped heartbeat on %s", - engine->name); + reset_engine(engine, rq); } rq->emitted_jiffies = jiffies; @@ -194,6 +211,25 @@ void intel_engine_park_heartbeat(struct intel_engine_cs *engine) i915_request_put(fetch_and_zero(&engine->heartbeat.systole)); } +void intel_gt_unpark_heartbeats(struct intel_gt *gt) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, gt, id) + if (intel_engine_pm_is_awake(engine)) + intel_engine_unpark_heartbeat(engine); +} + +void intel_gt_park_heartbeats(struct intel_gt *gt) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, gt, id) + intel_engine_park_heartbeat(engine); +} + void intel_engine_init_heartbeat(struct intel_engine_cs *engine) { INIT_DELAYED_WORK(&engine->heartbeat.work, heartbeat); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h index a488ea3e84a3..5da6d809a87a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h @@ -7,6 +7,7 @@ #define INTEL_ENGINE_HEARTBEAT_H struct intel_engine_cs; +struct intel_gt; void intel_engine_init_heartbeat(struct intel_engine_cs *engine); @@ -16,6 +17,9 @@ int intel_engine_set_heartbeat(struct intel_engine_cs *engine, void intel_engine_park_heartbeat(struct intel_engine_cs *engine); void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine); +void intel_gt_park_heartbeats(struct intel_gt *gt); +void intel_gt_unpark_heartbeats(struct intel_gt *gt); + int intel_engine_pulse(struct intel_engine_cs *engine); int intel_engine_flush_barriers(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index be13a33ef17e..dd4ea1ef5731 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -10,6 +10,7 @@ #include "gt/intel_breadcrumbs.h" #include "gt/intel_context.h" #include "gt/intel_engine_pm.h" +#include "gt/intel_engine_heartbeat.h" #include "gt/intel_gt.h" #include "gt/intel_gt_irq.h" #include "gt/intel_gt_pm.h" @@ -603,6 +604,7 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc) return; } + intel_gt_park_heartbeats(guc_to_gt(guc)); disable_submission(guc); guc->interrupts.disable(guc); @@ -890,6 +892,7 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc) atomic_set(&guc->outstanding_submission_g2h, 0); enable_submission(guc); + intel_gt_unpark_heartbeats(guc_to_gt(guc)); } /* @@ -1866,6 +1869,11 @@ static int guc_resume(struct intel_engine_cs *engine) return 0; } +static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine) +{ + return !sched_engine->tasklet.callback; +} + static void guc_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = guc_submit_request; @@ -2016,6 +2024,7 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine) return -ENOMEM; guc->sched_engine->schedule = i915_schedule; + guc->sched_engine->disabled = guc_sched_engine_disabled; guc->sched_engine->private_data = guc; guc->sched_engine->destroy = guc_sched_engine_destroy; tasklet_setup(&guc->sched_engine->tasklet, diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 2804fd5b660b..28dd887eb1be 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -440,6 +440,11 @@ static void default_destroy(struct kref *kref) kfree(sched_engine); } +static bool default_disabled(struct i915_sched_engine *sched_engine) +{ + return false; +} + struct i915_sched_engine * i915_sched_engine_create(unsigned int subclass) { @@ -454,6 +459,7 @@ i915_sched_engine_create(unsigned int subclass) sched_engine->queue = RB_ROOT_CACHED; sched_engine->queue_priority_hint = INT_MIN; sched_engine->destroy = default_destroy; + sched_engine->disabled = default_disabled; INIT_LIST_HEAD(&sched_engine->requests); INIT_LIST_HEAD(&sched_engine->hold); diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 3c9504e9f409..f4d9811ade5b 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -96,4 +96,10 @@ void i915_request_show_with_schedule(struct drm_printer *m, const char *prefix, int indent); +static inline bool +i915_sched_engine_disabled(struct i915_sched_engine *sched_engine) +{ + return sched_engine->disabled(sched_engine); +} + #endif /* _I915_SCHEDULER_H_ */ diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index 00384e2c5273..eaef233e9080 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -168,6 +168,11 @@ struct i915_sched_engine { */ void (*destroy)(struct kref *kref); + /** + * @disabled: check if backend has disabled submission + */ + bool (*disabled)(struct i915_sched_engine *sched_engine); + /** * @kick_backend: kick backend after a request's priority has changed */ -- cgit v1.2.3 From e5a1ad035938e60448cc0cd334359885c4fd3054 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Jul 2021 17:23:25 -0700 Subject: drm/i915/guc: Add disable interrupts to guc sanitize Add disable GuC interrupts to intel_guc_sanitize(). Part of this requires moving the guc_*_interrupt wrapper function into header file intel_guc.h. Signed-off-by: Matthew Brost Cc: Daniele Ceraolo Spurio Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-11-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 16 ++++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_uc.c | 21 +++------------------ 2 files changed, 19 insertions(+), 18 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index a068e3262676..7b871f675e48 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -224,9 +224,25 @@ static inline bool intel_guc_is_ready(struct intel_guc *guc) return intel_guc_is_fw_running(guc) && intel_guc_ct_enabled(&guc->ct); } +static inline void intel_guc_reset_interrupts(struct intel_guc *guc) +{ + guc->interrupts.reset(guc); +} + +static inline void intel_guc_enable_interrupts(struct intel_guc *guc) +{ + guc->interrupts.enable(guc); +} + +static inline void intel_guc_disable_interrupts(struct intel_guc *guc) +{ + guc->interrupts.disable(guc); +} + static inline int intel_guc_sanitize(struct intel_guc *guc) { intel_uc_fw_sanitize(&guc->fw); + intel_guc_disable_interrupts(guc); intel_guc_ct_sanitize(&guc->ct); guc->mmio_msg = 0; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 9a5ef1dca022..05a2001afde9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -207,21 +207,6 @@ static void guc_handle_mmio_msg(struct intel_guc *guc) spin_unlock_irq(&guc->irq_lock); } -static void guc_reset_interrupts(struct intel_guc *guc) -{ - guc->interrupts.reset(guc); -} - -static void guc_enable_interrupts(struct intel_guc *guc) -{ - guc->interrupts.enable(guc); -} - -static void guc_disable_interrupts(struct intel_guc *guc) -{ - guc->interrupts.disable(guc); -} - static int guc_enable_communication(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); @@ -242,7 +227,7 @@ static int guc_enable_communication(struct intel_guc *guc) guc_get_mmio_msg(guc); guc_handle_mmio_msg(guc); - guc_enable_interrupts(guc); + intel_guc_enable_interrupts(guc); /* check for CT messages received before we enabled interrupts */ spin_lock_irq(>->irq_lock); @@ -265,7 +250,7 @@ static void guc_disable_communication(struct intel_guc *guc) */ guc_clear_mmio_msg(guc); - guc_disable_interrupts(guc); + intel_guc_disable_interrupts(guc); intel_guc_ct_disable(&guc->ct); @@ -463,7 +448,7 @@ static int __uc_init_hw(struct intel_uc *uc) if (ret) goto err_out; - guc_reset_interrupts(guc); + intel_guc_reset_interrupts(guc); /* WaEnableuKernelHeaderValidFix:skl */ /* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */ -- cgit v1.2.3 From cad46a332f3d0f0ffeb9f9069499fb19a2ab43f4 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Jul 2021 17:23:26 -0700 Subject: drm/i915/guc: Suspend/resume implementation for new interface The new GuC interface introduces an MMIO H2G command, INTEL_GUC_ACTION_RESET_CLIENT, which is used to implement suspend. This MMIO tears down any active contexts generating a context reset G2H CTB for each. Once that step completes the GuC tears down the CTB channels. It is safe to suspend once this MMIO H2G command completes and all G2H CTBs have been processed. In practice the i915 will likely never receive a G2H as suspend should only be called after the GPU is idle. Resume is implemented in the same manner as before - simply reload the GuC firmware and reinitialize everything (e.g. CTB channels, contexts, etc..). v2: (Michel / John H) - INTEL_GUC_ACTION_RESET_CLIENT 0x5B01 -> 0x5507 Cc: John Harrison Signed-off-by: Matthew Brost Signed-off-by: Michal Wajdeczko Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-12-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc.c | 64 +++++++++-------------- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 15 +++--- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h | 5 ++ drivers/gpu/drm/i915/gt/uc/intel_uc.c | 20 ++++--- 5 files changed, 54 insertions(+), 51 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index 57e18babdf4b..d832c8f11c11 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -142,6 +142,7 @@ enum intel_guc_action { INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505, INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506, INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600, + INTEL_GUC_ACTION_RESET_CLIENT = 0x5507, INTEL_GUC_ACTION_LIMIT }; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 9b09395b998f..68266cbffd1f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -524,51 +524,34 @@ int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset) */ int intel_guc_suspend(struct intel_guc *guc) { - struct intel_uncore *uncore = guc_to_gt(guc)->uncore; int ret; - u32 status; u32 action[] = { - INTEL_GUC_ACTION_ENTER_S_STATE, - GUC_POWER_D1, /* any value greater than GUC_POWER_D0 */ + INTEL_GUC_ACTION_RESET_CLIENT, }; - /* - * If GuC communication is enabled but submission is not supported, - * we do not need to suspend the GuC. - */ - if (!intel_guc_submission_is_used(guc) || !intel_guc_is_ready(guc)) + if (!intel_guc_is_ready(guc)) return 0; - /* - * The ENTER_S_STATE action queues the save/restore operation in GuC FW - * and then returns, so waiting on the H2G is not enough to guarantee - * GuC is done. When all the processing is done, GuC writes - * INTEL_GUC_SLEEP_STATE_SUCCESS to scratch register 14, so we can poll - * on that. Note that GuC does not ensure that the value in the register - * is different from INTEL_GUC_SLEEP_STATE_SUCCESS while the action is - * in progress so we need to take care of that ourselves as well. - */ - - intel_uncore_write(uncore, SOFT_SCRATCH(14), - INTEL_GUC_SLEEP_STATE_INVALID_MASK); - - ret = intel_guc_send(guc, action, ARRAY_SIZE(action)); - if (ret) - return ret; - - ret = __intel_wait_for_register(uncore, SOFT_SCRATCH(14), - INTEL_GUC_SLEEP_STATE_INVALID_MASK, - 0, 0, 10, &status); - if (ret) - return ret; - - if (status != INTEL_GUC_SLEEP_STATE_SUCCESS) { - DRM_ERROR("GuC failed to change sleep state. " - "action=0x%x, err=%u\n", - action[0], status); - return -EIO; + if (intel_guc_submission_is_used(guc)) { + /* + * This H2G MMIO command tears down the GuC in two steps. First it will + * generate a G2H CTB for every active context indicating a reset. In + * practice the i915 shouldn't ever get a G2H as suspend should only be + * called when the GPU is idle. Next, it tears down the CTBs and this + * H2G MMIO command completes. + * + * Don't abort on a failure code from the GuC. Keep going and do the + * clean up in santize() and re-initialisation on resume and hopefully + * the error here won't be problematic. + */ + ret = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); + if (ret) + DRM_ERROR("GuC suspend: RESET_CLIENT action failed with error %d!\n", ret); } + /* Signal that the GuC isn't running. */ + intel_guc_sanitize(guc); + return 0; } @@ -578,7 +561,12 @@ int intel_guc_suspend(struct intel_guc *guc) */ int intel_guc_resume(struct intel_guc *guc) { - /* XXX: to be implemented with submission interface rework */ + /* + * NB: This function can still be called even if GuC submission is + * disabled, e.g. if GuC is enabled for HuC authentication only. Thus, + * if any code is later added here, it must be support doing nothing + * if submission is disabled (as per intel_guc_suspend). + */ return 0; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index dd4ea1ef5731..d39a312e980a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -302,10 +302,10 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc, return err; } -static int guc_wait_for_pending_msg(struct intel_guc *guc, - atomic_t *wait_var, - bool interruptible, - long timeout) +int intel_guc_wait_for_pending_msg(struct intel_guc *guc, + atomic_t *wait_var, + bool interruptible, + long timeout) { const int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; @@ -348,8 +348,9 @@ int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc)) return 0; - return guc_wait_for_pending_msg(guc, &guc->outstanding_submission_g2h, - true, timeout); + return intel_guc_wait_for_pending_msg(guc, + &guc->outstanding_submission_g2h, + true, timeout); } static int guc_lrc_desc_pin(struct intel_context *ce, bool loop); @@ -623,7 +624,7 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc) for (i = 0; i < 4 && atomic_read(&guc->outstanding_submission_g2h); ++i) { intel_guc_to_host_event_handler(guc); #define wait_for_reset(guc, wait_var) \ - guc_wait_for_pending_msg(guc, wait_var, false, (HZ / 20)) + intel_guc_wait_for_pending_msg(guc, wait_var, false, (HZ / 20)) do { wait_for_reset(guc, &guc->outstanding_submission_g2h); } while (!list_empty(&guc->ct.requests.incoming)); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h index 5f263ac4f46a..08ff77c5c50e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h @@ -28,6 +28,11 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve); +int intel_guc_wait_for_pending_msg(struct intel_guc *guc, + atomic_t *wait_var, + bool interruptible, + long timeout); + static inline bool intel_guc_submission_is_supported(struct intel_guc *guc) { /* XXX: GuC submission is unavailable for now */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 05a2001afde9..e07c989968f9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -595,14 +595,18 @@ void intel_uc_cancel_requests(struct intel_uc *uc) void intel_uc_runtime_suspend(struct intel_uc *uc) { struct intel_guc *guc = &uc->guc; - int err; if (!intel_guc_is_ready(guc)) return; - err = intel_guc_suspend(guc); - if (err) - DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err); + /* + * Wait for any outstanding CTB before tearing down communication /w the + * GuC. + */ +#define OUTSTANDING_CTB_TIMEOUT_PERIOD (HZ / 5) + intel_guc_wait_for_pending_msg(guc, &guc->outstanding_submission_g2h, + false, OUTSTANDING_CTB_TIMEOUT_PERIOD); + GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); guc_disable_communication(guc); } @@ -611,12 +615,16 @@ void intel_uc_suspend(struct intel_uc *uc) { struct intel_guc *guc = &uc->guc; intel_wakeref_t wakeref; + int err; if (!intel_guc_is_ready(guc)) return; - with_intel_runtime_pm(uc_to_gt(uc)->uncore->rpm, wakeref) - intel_uc_runtime_suspend(uc); + with_intel_runtime_pm(&uc_to_gt(uc)->i915->runtime_pm, wakeref) { + err = intel_guc_suspend(guc); + if (err) + DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err); + } } static int __uc_resume(struct intel_uc *uc, bool enable_communication) -- cgit v1.2.3 From 1e0fd2b5da1ed5f71985676dc55145dd58367d93 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Jul 2021 17:23:27 -0700 Subject: drm/i915/guc: Handle context reset notification GuC will issue a reset on detecting an engine hang and will notify the driver via a G2H message. The driver will service the notification by resetting the guilty context to a simple state or banning it completely. v2: (John Harrison) - Move msg[0] lookup after length check v3: (John Harrison) - s/drm_dbg/drm_err Cc: Matthew Brost Cc: John Harrison Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-13-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 2 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 3 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 36 +++++++++++++++++++++++ drivers/gpu/drm/i915/i915_trace.h | 10 +++++++ 4 files changed, 51 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 7b871f675e48..c79da154c16d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -269,6 +269,8 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc, const u32 *msg, u32 len); int intel_guc_sched_done_process_msg(struct intel_guc *guc, const u32 *msg, u32 len); +int intel_guc_context_reset_process_msg(struct intel_guc *guc, + const u32 *msg, u32 len); void intel_guc_submission_reset_prepare(struct intel_guc *guc); void intel_guc_submission_reset(struct intel_guc *guc, bool stalled); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 8bb6b1bbcea1..231a42887675 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -984,6 +984,9 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r case INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE: ret = intel_guc_sched_done_process_msg(guc, payload, len); break; + case INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION: + ret = intel_guc_context_reset_process_msg(guc, payload, len); + break; default: ret = -EOPNOTSUPP; break; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index d39a312e980a..7be84b0761b5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -2201,6 +2201,42 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, return 0; } +static void guc_context_replay(struct intel_context *ce) +{ + struct i915_sched_engine *sched_engine = ce->engine->sched_engine; + + __guc_reset_context(ce, true); + tasklet_hi_schedule(&sched_engine->tasklet); +} + +static void guc_handle_context_reset(struct intel_guc *guc, + struct intel_context *ce) +{ + trace_intel_context_reset(ce); + guc_context_replay(ce); +} + +int intel_guc_context_reset_process_msg(struct intel_guc *guc, + const u32 *msg, u32 len) +{ + struct intel_context *ce; + int desc_idx; + + if (unlikely(len != 1)) { + drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); + return -EPROTO; + } + + desc_idx = msg[0]; + ce = g2h_context_lookup(guc, desc_idx); + if (unlikely(!ce)) + return -EPROTO; + + guc_handle_context_reset(guc, ce); + + return 0; +} + void intel_guc_submission_print_info(struct intel_guc *guc, struct drm_printer *p) { diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 68b70626c3e2..3f43d904f043 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -920,6 +920,11 @@ DECLARE_EVENT_CLASS(intel_context, __entry->guc_sched_state_no_lock) ); +DEFINE_EVENT(intel_context, intel_context_reset, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + DEFINE_EVENT(intel_context, intel_context_register, TP_PROTO(struct intel_context *ce), TP_ARGS(ce) @@ -1007,6 +1012,11 @@ trace_i915_request_out(struct i915_request *rq) { } +static inline void +trace_intel_context_reset(struct intel_context *ce) +{ +} + static inline void trace_intel_context_register(struct intel_context *ce) { -- cgit v1.2.3 From f7957e603cbc12e18b5f616e5f0d18e8ba994a1d Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Jul 2021 17:23:28 -0700 Subject: drm/i915/guc: Handle engine reset failure notification GuC will notify the driver, via G2H, if it fails to reset an engine. We recover by resorting to a full GPU reset. v2: (John Harrison): - s/drm_dbg/drm_err Signed-off-by: Matthew Brost Signed-off-by: Fernando Pacheco Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-14-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 2 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 3 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 43 +++++++++++++++++++++++ 3 files changed, 48 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index c79da154c16d..9c8d53a32e0d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -271,6 +271,8 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, const u32 *msg, u32 len); int intel_guc_context_reset_process_msg(struct intel_guc *guc, const u32 *msg, u32 len); +int intel_guc_engine_failure_process_msg(struct intel_guc *guc, + const u32 *msg, u32 len); void intel_guc_submission_reset_prepare(struct intel_guc *guc); void intel_guc_submission_reset(struct intel_guc *guc, bool stalled); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 231a42887675..18917b443f05 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -987,6 +987,9 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r case INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION: ret = intel_guc_context_reset_process_msg(guc, payload, len); break; + case INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION: + ret = intel_guc_engine_failure_process_msg(guc, payload, len); + break; default: ret = -EOPNOTSUPP; break; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 7be84b0761b5..d76494e3e271 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -2237,6 +2237,49 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc, return 0; } +static struct intel_engine_cs * +guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance) +{ + struct intel_gt *gt = guc_to_gt(guc); + u8 engine_class = guc_class_to_engine_class(guc_class); + + /* Class index is checked in class converter */ + GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE); + + return gt->engine_class[engine_class][instance]; +} + +int intel_guc_engine_failure_process_msg(struct intel_guc *guc, + const u32 *msg, u32 len) +{ + struct intel_engine_cs *engine; + u8 guc_class, instance; + u32 reason; + + if (unlikely(len != 3)) { + drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); + return -EPROTO; + } + + guc_class = msg[0]; + instance = msg[1]; + reason = msg[2]; + + engine = guc_lookup_engine(guc, guc_class, instance); + if (unlikely(!engine)) { + drm_err(&guc_to_gt(guc)->i915->drm, + "Invalid engine %d:%d", guc_class, instance); + return -EPROTO; + } + + intel_gt_handle_error(guc_to_gt(guc), engine->mask, + I915_ERROR_CAPTURE, + "GuC failed to reset %s (reason=0x%08x)\n", + engine->name, reason); + + return 0; +} + void intel_guc_submission_print_info(struct intel_guc *guc, struct drm_printer *p) { -- cgit v1.2.3 From 933864af118166655ec5d1075f2bee0bb3bea95c Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Jul 2021 17:23:29 -0700 Subject: drm/i915/guc: Enable the timer expired interrupt for GuC The GuC can implement execution qunatums, detect hung contexts and other such things but it requires the timer expired interrupt to do so. Signed-off-by: Matthew Brost CC: John Harrison Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-15-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_rps.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 06e9a8ed4e03..0c8e7f2b06f0 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1877,6 +1877,10 @@ void intel_rps_init(struct intel_rps *rps) if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) < 11) rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; + + /* GuC needs ARAT expired interrupt unmasked */ + if (intel_uc_uses_guc_submission(&rps_to_gt(rps)->uc)) + rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; } void intel_rps_sanitize(struct intel_rps *rps) -- cgit v1.2.3 From 6de12da166783285c911c177d29e5db7dbafbb98 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Mon, 26 Jul 2021 17:23:30 -0700 Subject: drm/i915/guc: Provide mmio list to be saved/restored on engine reset The driver must provide GuC with a list of mmio registers that should be saved/restored during a GuC-based engine reset. Unfortunately, the list must be dynamically allocated as its size is variable. That means the driver must generate the list twice - once to work out the size and a second time to actually save it. v2: (Alan / CI) - GEN7_GT_MODE -> GEN6_GT_MODE to fix WA selftest failure Signed-off-by: John Harrison Signed-off-by: Fernando Pacheco Signed-off-by: Matthew Brost Cc: Daniele Ceraolo Spurio Cc: Tvrtko Ursulin Reviewed-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-16-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 46 ++--- drivers/gpu/drm/i915/gt/intel_workarounds_types.h | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc.h | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 199 +++++++++++++++++++++- drivers/gpu/drm/i915/i915_reg.h | 1 + 5 files changed, 222 insertions(+), 26 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 3aa5ce3cda8b..9173df59821a 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -150,13 +150,14 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) } static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, - u32 clear, u32 set, u32 read_mask) + u32 clear, u32 set, u32 read_mask, bool masked_reg) { struct i915_wa wa = { .reg = reg, .clr = clear, .set = set, .read = read_mask, + .masked_reg = masked_reg, }; _wa_add(wal, &wa); @@ -165,7 +166,7 @@ static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, static void wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set) { - wa_add(wal, reg, clear, set, clear); + wa_add(wal, reg, clear, set, clear, false); } static void @@ -200,20 +201,20 @@ wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr) static void wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) { - wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val); + wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true); } static void wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val) { - wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val); + wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true); } static void wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) { - wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask); + wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true); } static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -533,10 +534,10 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, wa_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); /* WaEnableFloatBlendOptimization:icl */ - wa_write_clr_set(wal, - GEN10_CACHE_MODE_SS, - 0, /* write-only, so skip validation */ - _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE)); + wa_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE), + 0 /* write-only, so skip validation */, + true); /* WaDisableGPGPUMidThreadPreemption:icl */ wa_masked_field_set(wal, GEN8_CS_CHICKEN1, @@ -581,7 +582,7 @@ static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine, FF_MODE2, FF_MODE2_TDS_TIMER_MASK, FF_MODE2_TDS_TIMER_128, - 0); + 0, false); } static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -619,7 +620,7 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, FF_MODE2, FF_MODE2_GS_TIMER_MASK, FF_MODE2_GS_TIMER_224, - 0); + 0, false); /* * Wa_14012131227:dg1 @@ -795,7 +796,7 @@ hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) wa_add(wal, HSW_ROW_CHICKEN3, 0, _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE), - 0 /* XXX does this reg exist? */); + 0 /* XXX does this reg exist? */, true); /* WaVSRefCountFullforceMissDisable:hsw */ wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME); @@ -1824,10 +1825,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * disable bit, which we don't touch here, but it's good * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). */ - wa_add(wal, GEN7_GT_MODE, 0, - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, - GEN6_WIZ_HASHING_16x4), - GEN6_WIZ_HASHING_16x4); + wa_masked_field_set(wal, + GEN7_GT_MODE, + GEN6_WIZ_HASHING_MASK, + GEN6_WIZ_HASHING_16x4); } if (IS_GRAPHICS_VER(i915, 6, 7)) @@ -1877,10 +1878,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * disable bit, which we don't touch here, but it's good * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). */ - wa_add(wal, - GEN6_GT_MODE, 0, - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), - GEN6_WIZ_HASHING_16x4); + wa_masked_field_set(wal, + GEN6_GT_MODE, + GEN6_WIZ_HASHING_MASK, + GEN6_WIZ_HASHING_16x4); /* WaDisable_RenderCache_OperationalFlush:snb */ wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); @@ -1901,7 +1902,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_add(wal, MI_MODE, 0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH), /* XXX bit doesn't stick on Broadwater */ - IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH); + IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH, true); if (GRAPHICS_VER(i915) == 4) /* @@ -1916,7 +1917,8 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) */ wa_add(wal, ECOSKPD, 0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE), - 0 /* XXX bit doesn't stick on Broadwater */); + 0 /* XXX bit doesn't stick on Broadwater */, + true); } static void diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h index c214111ea367..1e873681795d 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h +++ b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h @@ -15,6 +15,7 @@ struct i915_wa { u32 clr; u32 set; u32 read; + bool masked_reg; }; struct i915_wa_list { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 9c8d53a32e0d..f3c69160cb7e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -59,6 +59,7 @@ struct intel_guc { struct i915_vma *ads_vma; struct __guc_ads_blob *ads_blob; + u32 ads_regset_size; struct i915_vma *lrc_desc_pool; void *lrc_desc_pool_vaddr; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index b82145652d57..fdac8a7805f5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -3,6 +3,8 @@ * Copyright © 2014-2019 Intel Corporation */ +#include + #include "gt/intel_gt.h" #include "gt/intel_lrc.h" #include "intel_guc_ads.h" @@ -23,7 +25,12 @@ * | guc_policies | * +---------------------------------------+ * | guc_gt_system_info | - * +---------------------------------------+ + * +---------------------------------------+ <== static + * | guc_mmio_reg[countA] (engine 0.0) | + * | guc_mmio_reg[countB] (engine 0.1) | + * | guc_mmio_reg[countC] (engine 1.0) | + * | ... | + * +---------------------------------------+ <== dynamic * | padding | * +---------------------------------------+ <== 4K aligned * | private data | @@ -35,16 +42,33 @@ struct __guc_ads_blob { struct guc_ads ads; struct guc_policies policies; struct guc_gt_system_info system_info; + /* From here on, location is dynamic! Refer to above diagram. */ + struct guc_mmio_reg regset[0]; } __packed; +static u32 guc_ads_regset_size(struct intel_guc *guc) +{ + GEM_BUG_ON(!guc->ads_regset_size); + return guc->ads_regset_size; +} + static u32 guc_ads_private_data_size(struct intel_guc *guc) { return PAGE_ALIGN(guc->fw.private_data_size); } +static u32 guc_ads_regset_offset(struct intel_guc *guc) +{ + return offsetof(struct __guc_ads_blob, regset); +} + static u32 guc_ads_private_data_offset(struct intel_guc *guc) { - return PAGE_ALIGN(sizeof(struct __guc_ads_blob)); + u32 offset; + + offset = guc_ads_regset_offset(guc) + + guc_ads_regset_size(guc); + return PAGE_ALIGN(offset); } static u32 guc_ads_blob_size(struct intel_guc *guc) @@ -83,6 +107,165 @@ static void guc_mapping_table_init(struct intel_gt *gt, } } +/* + * The save/restore register list must be pre-calculated to a temporary + * buffer of driver defined size before it can be generated in place + * inside the ADS. + */ +#define MAX_MMIO_REGS 128 /* Arbitrary size, increase as needed */ +struct temp_regset { + struct guc_mmio_reg *registers; + u32 used; + u32 size; +}; + +static int guc_mmio_reg_cmp(const void *a, const void *b) +{ + const struct guc_mmio_reg *ra = a; + const struct guc_mmio_reg *rb = b; + + return (int)ra->offset - (int)rb->offset; +} + +static void guc_mmio_reg_add(struct temp_regset *regset, + u32 offset, u32 flags) +{ + u32 count = regset->used; + struct guc_mmio_reg reg = { + .offset = offset, + .flags = flags, + }; + struct guc_mmio_reg *slot; + + GEM_BUG_ON(count >= regset->size); + + /* + * The mmio list is built using separate lists within the driver. + * It's possible that at some point we may attempt to add the same + * register more than once. Do not consider this an error; silently + * move on if the register is already in the list. + */ + if (bsearch(®, regset->registers, count, + sizeof(reg), guc_mmio_reg_cmp)) + return; + + slot = ®set->registers[count]; + regset->used++; + *slot = reg; + + while (slot-- > regset->registers) { + GEM_BUG_ON(slot[0].offset == slot[1].offset); + if (slot[1].offset > slot[0].offset) + break; + + swap(slot[1], slot[0]); + } +} + +#define GUC_MMIO_REG_ADD(regset, reg, masked) \ + guc_mmio_reg_add(regset, \ + i915_mmio_reg_offset((reg)), \ + (masked) ? GUC_REGSET_MASKED : 0) + +static void guc_mmio_regset_init(struct temp_regset *regset, + struct intel_engine_cs *engine) +{ + const u32 base = engine->mmio_base; + struct i915_wa_list *wal = &engine->wa_list; + struct i915_wa *wa; + unsigned int i; + + regset->used = 0; + + GUC_MMIO_REG_ADD(regset, RING_MODE_GEN7(base), true); + GUC_MMIO_REG_ADD(regset, RING_HWS_PGA(base), false); + GUC_MMIO_REG_ADD(regset, RING_IMR(base), false); + + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) + GUC_MMIO_REG_ADD(regset, wa->reg, wa->masked_reg); + + /* Be extra paranoid and include all whitelist registers. */ + for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) + GUC_MMIO_REG_ADD(regset, + RING_FORCE_TO_NONPRIV(base, i), + false); + + /* add in local MOCS registers */ + for (i = 0; i < GEN9_LNCFCMOCS_REG_COUNT; i++) + GUC_MMIO_REG_ADD(regset, GEN9_LNCFCMOCS(i), false); +} + +static int guc_mmio_reg_state_query(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct temp_regset temp_set; + u32 total; + + /* + * Need to actually build the list in order to filter out + * duplicates and other such data dependent constructions. + */ + temp_set.size = MAX_MMIO_REGS; + temp_set.registers = kmalloc_array(temp_set.size, + sizeof(*temp_set.registers), + GFP_KERNEL); + if (!temp_set.registers) + return -ENOMEM; + + total = 0; + for_each_engine(engine, gt, id) { + guc_mmio_regset_init(&temp_set, engine); + total += temp_set.used; + } + + kfree(temp_set.registers); + + return total * sizeof(struct guc_mmio_reg); +} + +static void guc_mmio_reg_state_init(struct intel_guc *guc, + struct __guc_ads_blob *blob) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct temp_regset temp_set; + struct guc_mmio_reg_set *ads_reg_set; + u32 addr_ggtt, offset; + u8 guc_class; + + offset = guc_ads_regset_offset(guc); + addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset; + temp_set.registers = (struct guc_mmio_reg *)(((u8 *)blob) + offset); + temp_set.size = guc->ads_regset_size / sizeof(temp_set.registers[0]); + + for_each_engine(engine, gt, id) { + /* Class index is checked in class converter */ + GEM_BUG_ON(engine->instance >= GUC_MAX_INSTANCES_PER_CLASS); + + guc_class = engine_class_to_guc_class(engine->class); + ads_reg_set = &blob->ads.reg_state_list[guc_class][engine->instance]; + + guc_mmio_regset_init(&temp_set, engine); + if (!temp_set.used) { + ads_reg_set->address = 0; + ads_reg_set->count = 0; + continue; + } + + ads_reg_set->address = addr_ggtt; + ads_reg_set->count = temp_set.used; + + temp_set.size -= temp_set.used; + temp_set.registers += temp_set.used; + addr_ggtt += temp_set.used * sizeof(struct guc_mmio_reg); + } + + GEM_BUG_ON(temp_set.size); +} + /* * The first 80 dwords of the register state context, containing the * execlists and ppgtt registers. @@ -121,8 +304,7 @@ static void __guc_ads_init(struct intel_guc *guc) */ blob->ads.golden_context_lrca[guc_class] = 0; blob->ads.eng_state_size[guc_class] = - intel_engine_context_size(guc_to_gt(guc), - engine_class) - + intel_engine_context_size(gt, engine_class) - skipped_size; } @@ -153,6 +335,9 @@ static void __guc_ads_init(struct intel_guc *guc) blob->ads.scheduler_policies = base + ptr_offset(blob, policies); blob->ads.gt_system_info = base + ptr_offset(blob, system_info); + /* MMIO save/restore list */ + guc_mmio_reg_state_init(guc, blob); + /* Private Data */ blob->ads.private_data = base + guc_ads_private_data_offset(guc); @@ -173,6 +358,12 @@ int intel_guc_ads_create(struct intel_guc *guc) GEM_BUG_ON(guc->ads_vma); + /* Need to calculate the reg state size dynamically: */ + ret = guc_mmio_reg_state_query(guc); + if (ret < 0) + return ret; + guc->ads_regset_size = ret; + size = guc_ads_blob_size(guc); ret = intel_guc_allocate_and_map_vma(guc, size, &guc->ads_vma, diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 314194b419f0..5603377e06ca 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -12316,6 +12316,7 @@ enum skl_power_gate { /* MOCS (Memory Object Control State) registers */ #define GEN9_LNCFCMOCS(i) _MMIO(0xb020 + (i) * 4) /* L3 Cache Control */ +#define GEN9_LNCFCMOCS_REG_COUNT 32 #define __GEN9_RCS0_MOCS0 0xc800 #define GEN9_GFX_MOCS(i) _MMIO(__GEN9_RCS0_MOCS0 + (i) * 4) -- cgit v1.2.3 From d75dc57fee98294944f14069fd686b451754627d Mon Sep 17 00:00:00 2001 From: John Harrison Date: Mon, 26 Jul 2021 17:23:31 -0700 Subject: drm/i915/guc: Don't complain about reset races It is impossible to seal all race conditions of resets occurring concurrent to other operations. At least, not without introducing excesive mutex locking. Instead, don't complain if it occurs. In particular, don't complain if trying to send a H2G during a reset. Whatever the H2G was about should get redone once the reset is over. Signed-off-by: John Harrison Signed-off-by: Matthew Brost Reviewed-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-17-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 5 ++++- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 4 ++++ drivers/gpu/drm/i915/gt/uc/intel_uc.h | 2 ++ 3 files changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 18917b443f05..22b4733b55e2 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -760,7 +760,10 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, int ret; if (unlikely(!ct->enabled)) { - WARN(1, "Unexpected send: action=%#x\n", *action); + struct intel_guc *guc = ct_to_guc(ct); + struct intel_uc *uc = container_of(guc, struct intel_uc, guc); + + WARN(!uc->reset_in_progress, "Unexpected send: action=%#x\n", *action); return -ENODEV; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index e07c989968f9..77c1fe2ed883 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -550,6 +550,8 @@ void intel_uc_reset_prepare(struct intel_uc *uc) { struct intel_guc *guc = &uc->guc; + uc->reset_in_progress = true; + /* Nothing to do if GuC isn't supported */ if (!intel_uc_supports_guc(uc)) return; @@ -578,6 +580,8 @@ void intel_uc_reset_finish(struct intel_uc *uc) { struct intel_guc *guc = &uc->guc; + uc->reset_in_progress = false; + /* Firmware expected to be running when this function is called */ if (intel_guc_is_fw_running(guc) && intel_uc_uses_guc_submission(uc)) intel_guc_submission_reset_finish(guc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h index eaa3202192ac..91315e3f1c58 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h @@ -30,6 +30,8 @@ struct intel_uc { /* Snapshot of GuC log from last failed load */ struct drm_i915_gem_object *load_err_log; + + bool reset_in_progress; }; void intel_uc_init_early(struct intel_uc *uc); -- cgit v1.2.3 From c17b637928f030caac2d1c737959b9627011ac49 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Mon, 26 Jul 2021 17:23:32 -0700 Subject: drm/i915/guc: Enable GuC engine reset Clear the 'disable resets' flag to allow GuC to reset hung contexts (detected via pre-emption timeout). Signed-off-by: John Harrison Signed-off-by: Matthew Brost Reviewed-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-18-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index fdac8a7805f5..97cfaaaf2397 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -81,8 +81,7 @@ static void guc_policies_init(struct guc_policies *policies) { policies->dpc_promote_time = GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US; policies->max_num_work_items = GLOBAL_POLICY_MAX_NUM_WI; - /* Disable automatic resets as not yet supported. */ - policies->global_flags = GLOBAL_POLICY_DISABLE_ENGINE_RESET; + policies->global_flags = 0; policies->is_valid = 1; } -- cgit v1.2.3 From 573ba126aef37c8315e5bb68d2dad515efa96994 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Jul 2021 17:23:33 -0700 Subject: drm/i915/guc: Capture error state on context reset We receive notification of an engine reset from GuC at its completion. Meaning GuC has potentially cleared any HW state we may have been interested in capturing. GuC resumes scheduling on the engine post-reset, as the resets are meant to be transparent, further muddling our error state. There is ongoing work to define an API for a GuC debug state dump. The suggestion for now is to manually disable FW initiated resets in cases where debug state is needed. Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-19-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_context.c | 20 +++++++++++++ drivers/gpu/drm/i915/gt/intel_context.h | 3 ++ drivers/gpu/drm/i915/gt/intel_engine.h | 21 +++++++++++++- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 11 +++++-- drivers/gpu/drm/i915/gt/intel_engine_types.h | 2 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 35 +++++++++++------------ drivers/gpu/drm/i915/i915_gpu_error.c | 25 +++++++++++++--- 7 files changed, 91 insertions(+), 26 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 0bf4a13e9759..237b70e98744 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -509,6 +509,26 @@ retry: return rq; } +struct i915_request *intel_context_find_active_request(struct intel_context *ce) +{ + struct i915_request *rq, *active = NULL; + unsigned long flags; + + GEM_BUG_ON(!intel_engine_uses_guc(ce->engine)); + + spin_lock_irqsave(&ce->guc_active.lock, flags); + list_for_each_entry_reverse(rq, &ce->guc_active.requests, + sched.link) { + if (i915_request_completed(rq)) + break; + + active = rq; + } + spin_unlock_irqrestore(&ce->guc_active.lock, flags); + + return active; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_context.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 974ef85320c2..2ed9bf5f91a5 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -200,6 +200,9 @@ int intel_context_prepare_remote_request(struct intel_context *ce, struct i915_request *intel_context_create_request(struct intel_context *ce); +struct i915_request * +intel_context_find_active_request(struct intel_context *ce); + static inline bool intel_context_is_barrier(const struct intel_context *ce) { return test_bit(CONTEXT_BARRIER_BIT, &ce->flags); diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 8fc76dc8bf98..1db2d3efc71f 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -245,7 +245,7 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now); struct i915_request * -intel_engine_find_active_request(struct intel_engine_cs *engine); +intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine); u32 intel_engine_context_size(struct intel_gt *gt, u8 class); struct intel_context * @@ -313,4 +313,23 @@ intel_engine_get_sibling(struct intel_engine_cs *engine, unsigned int sibling) return engine->cops->get_sibling(engine, sibling); } +static inline void +intel_engine_set_hung_context(struct intel_engine_cs *engine, + struct intel_context *ce) +{ + engine->hung_ce = ce; +} + +static inline void +intel_engine_clear_hung_context(struct intel_engine_cs *engine) +{ + intel_engine_set_hung_context(engine, NULL); +} + +static inline struct intel_context * +intel_engine_get_hung_context(struct intel_engine_cs *engine) +{ + return engine->hung_ce; +} + #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 1eaa658507e1..0da7868c5a13 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1731,7 +1731,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tRequests:\n"); spin_lock_irqsave(&engine->sched_engine->lock, flags); - rq = intel_engine_find_active_request(engine); + rq = intel_engine_execlist_find_hung_request(engine); if (rq) { struct intel_timeline *tl = get_timeline(rq); @@ -1842,10 +1842,17 @@ static bool match_ring(struct i915_request *rq) } struct i915_request * -intel_engine_find_active_request(struct intel_engine_cs *engine) +intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine) { struct i915_request *request, *active = NULL; + /* + * This search does not work in GuC submission mode. However, the GuC + * will report the hanging context directly to the driver itself. So + * the driver should never get here when in GuC mode. + */ + GEM_BUG_ON(intel_uc_uses_guc_submission(&engine->gt->uc)); + /* * We are called by the error capture, reset and to dump engine * state at random points in time. In particular, note that neither is diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 1c7e2724cdae..260cce15cb62 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -298,6 +298,8 @@ struct intel_engine_cs { /* keep a request in reserve for a [pm] barrier under oom */ struct i915_request *request_pool; + struct intel_context *hung_ce; + struct llist_head barrier_tasks; struct intel_context *kernel_context; /* pinned */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index d76494e3e271..7d6ca0d54f9e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -726,24 +726,6 @@ __unwind_incomplete_requests(struct intel_context *ce) spin_unlock_irqrestore(&sched_engine->lock, flags); } -static struct i915_request *context_find_active_request(struct intel_context *ce) -{ - struct i915_request *rq, *active = NULL; - unsigned long flags; - - spin_lock_irqsave(&ce->guc_active.lock, flags); - list_for_each_entry_reverse(rq, &ce->guc_active.requests, - sched.link) { - if (i915_request_completed(rq)) - break; - - active = rq; - } - spin_unlock_irqrestore(&ce->guc_active.lock, flags); - - return active; -} - static void __guc_reset_context(struct intel_context *ce, bool stalled) { struct i915_request *rq; @@ -757,7 +739,7 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled) */ clr_context_enabled(ce); - rq = context_find_active_request(ce); + rq = intel_context_find_active_request(ce); if (!rq) { head = ce->ring->tail; stalled = false; @@ -2201,6 +2183,20 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, return 0; } +static void capture_error_state(struct intel_guc *guc, + struct intel_context *ce) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct drm_i915_private *i915 = gt->i915; + struct intel_engine_cs *engine = __context_to_physical_engine(ce); + intel_wakeref_t wakeref; + + intel_engine_set_hung_context(engine, ce); + with_intel_runtime_pm(&i915->runtime_pm, wakeref) + i915_capture_error_state(gt, engine->mask); + atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]); +} + static void guc_context_replay(struct intel_context *ce) { struct i915_sched_engine *sched_engine = ce->engine->sched_engine; @@ -2213,6 +2209,7 @@ static void guc_handle_context_reset(struct intel_guc *guc, struct intel_context *ce) { trace_intel_context_reset(ce); + capture_error_state(guc, ce); guc_context_replay(ce); } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index a2c58b54a592..0f08bcfbe964 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1429,20 +1429,37 @@ capture_engine(struct intel_engine_cs *engine, { struct intel_engine_capture_vma *capture = NULL; struct intel_engine_coredump *ee; - struct i915_request *rq; + struct intel_context *ce; + struct i915_request *rq = NULL; unsigned long flags; ee = intel_engine_coredump_alloc(engine, GFP_KERNEL); if (!ee) return NULL; - spin_lock_irqsave(&engine->sched_engine->lock, flags); - rq = intel_engine_find_active_request(engine); + ce = intel_engine_get_hung_context(engine); + if (ce) { + intel_engine_clear_hung_context(engine); + rq = intel_context_find_active_request(ce); + if (!rq || !i915_request_started(rq)) + goto no_request_capture; + } else { + /* + * Getting here with GuC enabled means it is a forced error capture + * with no actual hang. So, no need to attempt the execlist search. + */ + if (!intel_uc_uses_guc_submission(&engine->gt->uc)) { + spin_lock_irqsave(&engine->sched_engine->lock, flags); + rq = intel_engine_execlist_find_hung_request(engine); + spin_unlock_irqrestore(&engine->sched_engine->lock, + flags); + } + } if (rq) capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL); - spin_unlock_irqrestore(&engine->sched_engine->lock, flags); if (!capture) { +no_request_capture: kfree(ee); return NULL; } -- cgit v1.2.3 From dc0dad365c5ed8bd7e2e506e84d2099624247ca4 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Mon, 26 Jul 2021 17:23:34 -0700 Subject: drm/i915/guc: Fix for error capture after full GPU reset with GuC In the case of a full GPU reset (e.g. because GuC has died or because GuC's hang detection has been disabled), the driver can't rely on GuC reporting the guilty context. Instead, the driver needs to scan all active contexts and find one that is currently executing, as per the execlist mode behaviour. In GuC mode, this scan is different to execlist mode as the active request list is handled very differently. Similarly, the request state dump in debugfs needs to be handled differently when in GuC submission mode. Also refactured some of the request scanning code to avoid duplication across the multiple code paths that are now replicating it. Signed-off-by: John Harrison Signed-off-by: Matthew Brost Reviewed-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-20-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_engine.h | 3 + drivers/gpu/drm/i915/gt/intel_engine_cs.c | 140 +++++++++++++++------- drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c | 8 ++ drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 2 + drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 67 +++++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h | 3 + drivers/gpu/drm/i915/i915_request.c | 41 +++++++ drivers/gpu/drm/i915/i915_request.h | 10 ++ 9 files changed, 229 insertions(+), 47 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 1db2d3efc71f..c2a5640ae055 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -240,6 +240,9 @@ __printf(3, 4) void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...); +void intel_engine_dump_active_requests(struct list_head *requests, + struct i915_request *hung_rq, + struct drm_printer *m); ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 0da7868c5a13..dea0e522c5c7 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1684,6 +1684,98 @@ static void print_properties(struct intel_engine_cs *engine, read_ul(&engine->defaults, p->offset)); } +static void engine_dump_request(struct i915_request *rq, struct drm_printer *m, const char *msg) +{ + struct intel_timeline *tl = get_timeline(rq); + + i915_request_show(m, rq, msg, 0); + + drm_printf(m, "\t\tring->start: 0x%08x\n", + i915_ggtt_offset(rq->ring->vma)); + drm_printf(m, "\t\tring->head: 0x%08x\n", + rq->ring->head); + drm_printf(m, "\t\tring->tail: 0x%08x\n", + rq->ring->tail); + drm_printf(m, "\t\tring->emit: 0x%08x\n", + rq->ring->emit); + drm_printf(m, "\t\tring->space: 0x%08x\n", + rq->ring->space); + + if (tl) { + drm_printf(m, "\t\tring->hwsp: 0x%08x\n", + tl->hwsp_offset); + intel_timeline_put(tl); + } + + print_request_ring(m, rq); + + if (rq->context->lrc_reg_state) { + drm_printf(m, "Logical Ring Context:\n"); + hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE); + } +} + +void intel_engine_dump_active_requests(struct list_head *requests, + struct i915_request *hung_rq, + struct drm_printer *m) +{ + struct i915_request *rq; + const char *msg; + enum i915_request_state state; + + list_for_each_entry(rq, requests, sched.link) { + if (rq == hung_rq) + continue; + + state = i915_test_request_state(rq); + if (state < I915_REQUEST_QUEUED) + continue; + + if (state == I915_REQUEST_ACTIVE) + msg = "\t\tactive on engine"; + else + msg = "\t\tactive in queue"; + + engine_dump_request(rq, m, msg); + } +} + +static void engine_dump_active_requests(struct intel_engine_cs *engine, struct drm_printer *m) +{ + struct i915_request *hung_rq = NULL; + struct intel_context *ce; + bool guc; + + /* + * No need for an engine->irq_seqno_barrier() before the seqno reads. + * The GPU is still running so requests are still executing and any + * hardware reads will be out of date by the time they are reported. + * But the intention here is just to report an instantaneous snapshot + * so that's fine. + */ + lockdep_assert_held(&engine->sched_engine->lock); + + drm_printf(m, "\tRequests:\n"); + + guc = intel_uc_uses_guc_submission(&engine->gt->uc); + if (guc) { + ce = intel_engine_get_hung_context(engine); + if (ce) + hung_rq = intel_context_find_active_request(ce); + } else { + hung_rq = intel_engine_execlist_find_hung_request(engine); + } + + if (hung_rq) + engine_dump_request(hung_rq, m, "\t\thung"); + + if (guc) + intel_guc_dump_active_requests(engine, hung_rq, m); + else + intel_engine_dump_active_requests(&engine->sched_engine->requests, + hung_rq, m); +} + void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...) @@ -1728,39 +1820,9 @@ void intel_engine_dump(struct intel_engine_cs *engine, i915_reset_count(error)); print_properties(engine, m); - drm_printf(m, "\tRequests:\n"); - spin_lock_irqsave(&engine->sched_engine->lock, flags); - rq = intel_engine_execlist_find_hung_request(engine); - if (rq) { - struct intel_timeline *tl = get_timeline(rq); + engine_dump_active_requests(engine, m); - i915_request_show(m, rq, "\t\tactive ", 0); - - drm_printf(m, "\t\tring->start: 0x%08x\n", - i915_ggtt_offset(rq->ring->vma)); - drm_printf(m, "\t\tring->head: 0x%08x\n", - rq->ring->head); - drm_printf(m, "\t\tring->tail: 0x%08x\n", - rq->ring->tail); - drm_printf(m, "\t\tring->emit: 0x%08x\n", - rq->ring->emit); - drm_printf(m, "\t\tring->space: 0x%08x\n", - rq->ring->space); - - if (tl) { - drm_printf(m, "\t\tring->hwsp: 0x%08x\n", - tl->hwsp_offset); - intel_timeline_put(tl); - } - - print_request_ring(m, rq); - - if (rq->context->lrc_reg_state) { - drm_printf(m, "Logical Ring Context:\n"); - hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE); - } - } drm_printf(m, "\tOn hold?: %lu\n", list_count(&engine->sched_engine->hold)); spin_unlock_irqrestore(&engine->sched_engine->lock, flags); @@ -1834,13 +1896,6 @@ intel_engine_create_virtual(struct intel_engine_cs **siblings, return siblings[0]->cops->create_virtual(siblings, count); } -static bool match_ring(struct i915_request *rq) -{ - u32 ring = ENGINE_READ(rq->engine, RING_START); - - return ring == i915_ggtt_offset(rq->ring->vma); -} - struct i915_request * intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine) { @@ -1884,14 +1939,7 @@ intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine) list_for_each_entry(request, &engine->sched_engine->requests, sched.link) { - if (__i915_request_is_complete(request)) - continue; - - if (!__i915_request_has_started(request)) - continue; - - /* More than one preemptible request may match! */ - if (!match_ring(request)) + if (i915_test_request_state(request) != I915_REQUEST_ACTIVE) continue; active = request; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 0b16f19c384e..74775ae961b2 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -90,6 +90,14 @@ reset_engine(struct intel_engine_cs *engine, struct i915_request *rq) if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) show_heartbeat(rq, engine); + if (intel_engine_uses_guc(engine)) + /* + * GuC itself is toast or GuC's hang detection + * is disabled. Either way, need to find the + * hang culprit manually. + */ + intel_guc_find_hung_context(engine); + intel_gt_handle_error(engine->gt, engine->mask, I915_ERROR_CAPTURE, "stopped heartbeat on %s", diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 721a10e2215e..4d281bc8a38c 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -156,7 +156,7 @@ void __i915_request_reset(struct i915_request *rq, bool guilty) if (guilty) { i915_request_set_error_once(rq, -EIO); __i915_request_skip(rq); - if (mark_guilty(rq)) + if (mark_guilty(rq) && !intel_engine_uses_guc(rq->engine)) skip_context(rq); } else { i915_request_set_error_once(rq, -EAGAIN); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index f3c69160cb7e..f355a70bbec4 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -275,6 +275,8 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc, int intel_guc_engine_failure_process_msg(struct intel_guc *guc, const u32 *msg, u32 len); +void intel_guc_find_hung_context(struct intel_engine_cs *engine); + void intel_guc_submission_reset_prepare(struct intel_guc *guc); void intel_guc_submission_reset(struct intel_guc *guc, bool stalled); void intel_guc_submission_reset_finish(struct intel_guc *guc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 7d6ca0d54f9e..76c2d927e1c3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -2277,6 +2277,73 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc, return 0; } +void intel_guc_find_hung_context(struct intel_engine_cs *engine) +{ + struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_context *ce; + struct i915_request *rq; + unsigned long index; + + /* Reset called during driver load? GuC not yet initialised! */ + if (unlikely(!guc_submission_initialized(guc))) + return; + + xa_for_each(&guc->context_lookup, index, ce) { + if (!intel_context_is_pinned(ce)) + continue; + + if (intel_engine_is_virtual(ce->engine)) { + if (!(ce->engine->mask & engine->mask)) + continue; + } else { + if (ce->engine != engine) + continue; + } + + list_for_each_entry(rq, &ce->guc_active.requests, sched.link) { + if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE) + continue; + + intel_engine_set_hung_context(engine, ce); + + /* Can only cope with one hang at a time... */ + return; + } + } +} + +void intel_guc_dump_active_requests(struct intel_engine_cs *engine, + struct i915_request *hung_rq, + struct drm_printer *m) +{ + struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_context *ce; + unsigned long index; + unsigned long flags; + + /* Reset called during driver load? GuC not yet initialised! */ + if (unlikely(!guc_submission_initialized(guc))) + return; + + xa_for_each(&guc->context_lookup, index, ce) { + if (!intel_context_is_pinned(ce)) + continue; + + if (intel_engine_is_virtual(ce->engine)) { + if (!(ce->engine->mask & engine->mask)) + continue; + } else { + if (ce->engine != engine) + continue; + } + + spin_lock_irqsave(&ce->guc_active.lock, flags); + intel_engine_dump_active_requests(&ce->guc_active.requests, + hung_rq, m); + spin_unlock_irqrestore(&ce->guc_active.lock, flags); + } +} + void intel_guc_submission_print_info(struct intel_guc *guc, struct drm_printer *p) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h index 08ff77c5c50e..03bc1c83a4d2 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h @@ -25,6 +25,9 @@ void intel_guc_submission_print_info(struct intel_guc *guc, struct drm_printer *p); void intel_guc_submission_print_context_info(struct intel_guc *guc, struct drm_printer *p); +void intel_guc_dump_active_requests(struct intel_engine_cs *engine, + struct i915_request *hung_rq, + struct drm_printer *m); bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index aeef45679897..28f38b02a5d2 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -2041,6 +2041,47 @@ void i915_request_show(struct drm_printer *m, name); } +static bool engine_match_ring(struct intel_engine_cs *engine, struct i915_request *rq) +{ + u32 ring = ENGINE_READ(engine, RING_START); + + return ring == i915_ggtt_offset(rq->ring->vma); +} + +static bool match_ring(struct i915_request *rq) +{ + struct intel_engine_cs *engine; + bool found; + int i; + + if (!intel_engine_is_virtual(rq->engine)) + return engine_match_ring(rq->engine, rq); + + found = false; + i = 0; + while ((engine = intel_engine_get_sibling(rq->engine, i++))) { + found = engine_match_ring(engine, rq); + if (found) + break; + } + + return found; +} + +enum i915_request_state i915_test_request_state(struct i915_request *rq) +{ + if (i915_request_completed(rq)) + return I915_REQUEST_COMPLETE; + + if (!i915_request_started(rq)) + return I915_REQUEST_PENDING; + + if (match_ring(rq)) + return I915_REQUEST_ACTIVE; + + return I915_REQUEST_QUEUED; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_request.c" #include "selftests/i915_request.c" diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 128030f43bbf..ac0e3326c067 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -649,4 +649,14 @@ i915_request_active_engine(struct i915_request *rq, void i915_request_notify_execute_cb_imm(struct i915_request *rq); +enum i915_request_state { + I915_REQUEST_UNKNOWN = 0, + I915_REQUEST_COMPLETE, + I915_REQUEST_PENDING, + I915_REQUEST_QUEUED, + I915_REQUEST_ACTIVE, +}; + +enum i915_request_state i915_test_request_state(struct i915_request *rq); + #endif /* I915_REQUEST_H */ -- cgit v1.2.3 From 7935785240508c738002accfdac07c398dd77abf Mon Sep 17 00:00:00 2001 From: John Harrison Date: Mon, 26 Jul 2021 17:23:35 -0700 Subject: drm/i915/guc: Hook GuC scheduling policies up Use the official driver default scheduling policies for configuring the GuC scheduler rather than a bunch of hardcoded values. v2: (Matthew Brost) - Move I915_ENGINE_WANT_FORCED_PREEMPTION to later patch Signed-off-by: John Harrison Signed-off-by: Matthew Brost Reviewed-by: Matthew Brost Cc: Jose Souza Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-21-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 2 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 44 +++++++++++++++++++++-- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 8 +++-- 3 files changed, 49 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index f355a70bbec4..8c8cf842c29a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -277,6 +277,8 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc, void intel_guc_find_hung_context(struct intel_engine_cs *engine); +int intel_guc_global_policies_update(struct intel_guc *guc); + void intel_guc_submission_reset_prepare(struct intel_guc *guc); void intel_guc_submission_reset(struct intel_guc *guc, bool stalled); void intel_guc_submission_reset_finish(struct intel_guc *guc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 97cfaaaf2397..60b73625f686 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -77,14 +77,54 @@ static u32 guc_ads_blob_size(struct intel_guc *guc) guc_ads_private_data_size(guc); } -static void guc_policies_init(struct guc_policies *policies) +static void guc_policies_init(struct intel_guc *guc, struct guc_policies *policies) { + struct intel_gt *gt = guc_to_gt(guc); + struct drm_i915_private *i915 = gt->i915; + policies->dpc_promote_time = GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US; policies->max_num_work_items = GLOBAL_POLICY_MAX_NUM_WI; + policies->global_flags = 0; + if (i915->params.reset < 2) + policies->global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET; + policies->is_valid = 1; } +static int guc_action_policies_update(struct intel_guc *guc, u32 policy_offset) +{ + u32 action[] = { + INTEL_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE, + policy_offset + }; + + return intel_guc_send(guc, action, ARRAY_SIZE(action)); +} + +int intel_guc_global_policies_update(struct intel_guc *guc) +{ + struct __guc_ads_blob *blob = guc->ads_blob; + struct intel_gt *gt = guc_to_gt(guc); + intel_wakeref_t wakeref; + int ret; + + if (!blob) + return -EOPNOTSUPP; + + GEM_BUG_ON(!blob->ads.scheduler_policies); + + guc_policies_init(guc, &blob->policies); + + if (!intel_guc_is_ready(guc)) + return 0; + + with_intel_runtime_pm(>->i915->runtime_pm, wakeref) + ret = guc_action_policies_update(guc, blob->ads.scheduler_policies); + + return ret; +} + static void guc_mapping_table_init(struct intel_gt *gt, struct guc_gt_system_info *system_info) { @@ -281,7 +321,7 @@ static void __guc_ads_init(struct intel_guc *guc) u8 engine_class, guc_class; /* GuC scheduling policies */ - guc_policies_init(&blob->policies); + guc_policies_init(guc, &blob->policies); /* * GuC expects a per-engine-class context image and size diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 76c2d927e1c3..cea3e3073a71 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -874,6 +874,7 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc) GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); atomic_set(&guc->outstanding_submission_g2h, 0); + intel_guc_global_policies_update(guc); enable_submission(guc); intel_gt_unpark_heartbeats(guc_to_gt(guc)); } @@ -1170,8 +1171,9 @@ static void guc_context_policy_init(struct intel_engine_cs *engine, { desc->policy_flags = 0; - desc->execution_quantum = CONTEXT_POLICY_DEFAULT_EXECUTION_QUANTUM_US; - desc->preemption_timeout = CONTEXT_POLICY_DEFAULT_PREEMPTION_TIME_US; + /* NB: For both of these, zero means disabled. */ + desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; + desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; } static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) @@ -1942,13 +1944,13 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) engine->set_default_submission = guc_set_default_submission; engine->flags |= I915_ENGINE_HAS_PREEMPTION; + engine->flags |= I915_ENGINE_HAS_TIMESLICES; /* * TODO: GuC supports timeslicing and semaphores as well, but they're * handled by the firmware so some minor tweaks are required before * enabling. * - * engine->flags |= I915_ENGINE_HAS_TIMESLICES; * engine->flags |= I915_ENGINE_HAS_SEMAPHORES; */ -- cgit v1.2.3 From cb6cc815868ca27e14eef17eedd27e5f7dd99620 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Mon, 26 Jul 2021 17:23:36 -0700 Subject: drm/i915/guc: Connect reset modparam updates to GuC policy flags Changing the reset module parameter has no effect on a running GuC. The corresponding entry in the ADS must be updated and then the GuC informed via a Host2GuC message. The new debugfs interface to module parameters allows this to happen. However, connecting the parameter data address back to anything useful is messy. One option would be to pass a new private data structure address through instead of just the parameter pointer. However, that means having a new (and different) data structure for each parameter and a new (and different) write function for each parameter. This method keeps everything generic by instead using a string lookup on the directory entry name. Signed-off-by: John Harrison Signed-off-by: Matthew Brost Reviewed-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-22-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 2 +- drivers/gpu/drm/i915/i915_debugfs_params.c | 32 ++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 60b73625f686..7797766c56a9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -99,7 +99,7 @@ static int guc_action_policies_update(struct intel_guc *guc, u32 policy_offset) policy_offset }; - return intel_guc_send(guc, action, ARRAY_SIZE(action)); + return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); } int intel_guc_global_policies_update(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/i915_debugfs_params.c b/drivers/gpu/drm/i915/i915_debugfs_params.c index 4e2b077692cb..20424275d41e 100644 --- a/drivers/gpu/drm/i915/i915_debugfs_params.c +++ b/drivers/gpu/drm/i915/i915_debugfs_params.c @@ -6,9 +6,21 @@ #include #include "i915_debugfs_params.h" +#include "gt/intel_gt.h" +#include "gt/uc/intel_guc.h" #include "i915_drv.h" #include "i915_params.h" +#define MATCH_DEBUGFS_NODE_NAME(_file, _name) \ + (strcmp((_file)->f_path.dentry->d_name.name, (_name)) == 0) + +#define GET_I915(i915, name, ptr) \ + do { \ + struct i915_params *params; \ + params = container_of(((void *)(ptr)), typeof(*params), name); \ + (i915) = container_of(params, typeof(*(i915)), params); \ + } while (0) + /* int param */ static int i915_param_int_show(struct seq_file *m, void *data) { @@ -24,6 +36,16 @@ static int i915_param_int_open(struct inode *inode, struct file *file) return single_open(file, i915_param_int_show, inode->i_private); } +static int notify_guc(struct drm_i915_private *i915) +{ + int ret = 0; + + if (intel_uc_uses_guc_submission(&i915->gt.uc)) + ret = intel_guc_global_policies_update(&i915->gt.uc.guc); + + return ret; +} + static ssize_t i915_param_int_write(struct file *file, const char __user *ubuf, size_t len, loff_t *offp) @@ -81,8 +103,10 @@ static ssize_t i915_param_uint_write(struct file *file, const char __user *ubuf, size_t len, loff_t *offp) { + struct drm_i915_private *i915; struct seq_file *m = file->private_data; unsigned int *value = m->private; + unsigned int old = *value; int ret; ret = kstrtouint_from_user(ubuf, len, 0, value); @@ -95,6 +119,14 @@ static ssize_t i915_param_uint_write(struct file *file, *value = b; } + if (!ret && MATCH_DEBUGFS_NODE_NAME(file, "reset")) { + GET_I915(i915, reset, value); + + ret = notify_guc(i915); + if (ret) + *value = old; + } + return ret ?: len; } -- cgit v1.2.3 From 731c2ad5e1f812ef91113f1c118a0c7252f17ef6 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Mon, 26 Jul 2021 17:23:37 -0700 Subject: drm/i915/guc: Include scheduling policies in the debugfs state dump Added the scheduling policy parameters to the 'guc_info' debugfs state dump. Signed-off-by: John Harrison Signed-off-by: Matthew Brost Reviewed-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-23-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 14 ++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h | 3 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c | 2 ++ 3 files changed, 19 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 7797766c56a9..51fc14ee79cd 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -92,6 +92,20 @@ static void guc_policies_init(struct intel_guc *guc, struct guc_policies *polici policies->is_valid = 1; } +void intel_guc_ads_print_policy_info(struct intel_guc *guc, + struct drm_printer *dp) +{ + struct __guc_ads_blob *blob = guc->ads_blob; + + if (unlikely(!blob)) + return; + + drm_printf(dp, "Global scheduling policies:\n"); + drm_printf(dp, " DPC promote time = %u\n", blob->policies.dpc_promote_time); + drm_printf(dp, " Max num work items = %u\n", blob->policies.max_num_work_items); + drm_printf(dp, " Flags = %u\n", blob->policies.global_flags); +} + static int guc_action_policies_update(struct intel_guc *guc, u32 policy_offset) { u32 action[] = { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h index b00d3ae1113a..bdcb339a5321 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h @@ -7,9 +7,12 @@ #define _INTEL_GUC_ADS_H_ struct intel_guc; +struct drm_printer; int intel_guc_ads_create(struct intel_guc *guc); void intel_guc_ads_destroy(struct intel_guc *guc); void intel_guc_ads_reset(struct intel_guc *guc); +void intel_guc_ads_print_policy_info(struct intel_guc *guc, + struct drm_printer *p); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c index 7a454c91a736..72ddfff42f7d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c @@ -10,6 +10,7 @@ #include "intel_guc_debugfs.h" #include "intel_guc_log_debugfs.h" #include "gt/uc/intel_guc_ct.h" +#include "gt/uc/intel_guc_ads.h" #include "gt/uc/intel_guc_submission.h" static int guc_info_show(struct seq_file *m, void *data) @@ -29,6 +30,7 @@ static int guc_info_show(struct seq_file *m, void *data) intel_guc_ct_print_info(&guc->ct, &p); intel_guc_submission_print_info(guc, &p); + intel_guc_ads_print_policy_info(guc, &p); return 0; } -- cgit v1.2.3 From 481d458caede241607e8463b9920ff9e29cece38 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Mon, 26 Jul 2021 17:23:38 -0700 Subject: drm/i915/guc: Add golden context to GuC ADS The media watchdog mechanism involves GuC doing a silent reset and continue of the hung context. This requires the i915 driver provide a golden context to GuC in the ADS. v2: (Matthew Brost): - Fix memory corruption in shmem_read (John H) - Use locals rather than defines for LR_* + SKIP_SIZE Signed-off-by: John Harrison Signed-off-by: Matthew Brost Reviewed-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-24-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 2 + drivers/gpu/drm/i915/gt/uc/intel_guc.c | 5 + drivers/gpu/drm/i915/gt/uc/intel_guc.h | 2 + drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 216 +++++++++++++++++++++++++---- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h | 1 + drivers/gpu/drm/i915/gt/uc/intel_uc.c | 5 + drivers/gpu/drm/i915/gt/uc/intel_uc.h | 1 + 7 files changed, 202 insertions(+), 30 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 46441607d18b..a64aa43f7cd9 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -654,6 +654,8 @@ int intel_gt_init(struct intel_gt *gt) if (err) goto err_gt; + intel_uc_init_late(>->uc); + err = i915_inject_probe_error(gt->i915, -EIO); if (err) goto err_gt; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 68266cbffd1f..979128e28372 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -180,6 +180,11 @@ void intel_guc_init_early(struct intel_guc *guc) } } +void intel_guc_init_late(struct intel_guc *guc) +{ + intel_guc_ads_init_late(guc); +} + static u32 guc_ctl_debug_flags(struct intel_guc *guc) { u32 level = intel_guc_log_get_level(&guc->log); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 8c8cf842c29a..1875303c3bca 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -60,6 +60,7 @@ struct intel_guc { struct i915_vma *ads_vma; struct __guc_ads_blob *ads_blob; u32 ads_regset_size; + u32 ads_golden_ctxt_size; struct i915_vma *lrc_desc_pool; void *lrc_desc_pool_vaddr; @@ -183,6 +184,7 @@ static inline u32 intel_guc_ggtt_offset(struct intel_guc *guc, } void intel_guc_init_early(struct intel_guc *guc); +void intel_guc_init_late(struct intel_guc *guc); void intel_guc_init_send_regs(struct intel_guc *guc); void intel_guc_write_params(struct intel_guc *guc); int intel_guc_init(struct intel_guc *guc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 51fc14ee79cd..6926919bcac6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -7,6 +7,7 @@ #include "gt/intel_gt.h" #include "gt/intel_lrc.h" +#include "gt/shmem_utils.h" #include "intel_guc_ads.h" #include "intel_guc_fwif.h" #include "intel_uc.h" @@ -33,6 +34,10 @@ * +---------------------------------------+ <== dynamic * | padding | * +---------------------------------------+ <== 4K aligned + * | golden contexts | + * +---------------------------------------+ + * | padding | + * +---------------------------------------+ <== 4K aligned * | private data | * +---------------------------------------+ * | padding | @@ -52,6 +57,11 @@ static u32 guc_ads_regset_size(struct intel_guc *guc) return guc->ads_regset_size; } +static u32 guc_ads_golden_ctxt_size(struct intel_guc *guc) +{ + return PAGE_ALIGN(guc->ads_golden_ctxt_size); +} + static u32 guc_ads_private_data_size(struct intel_guc *guc) { return PAGE_ALIGN(guc->fw.private_data_size); @@ -62,12 +72,23 @@ static u32 guc_ads_regset_offset(struct intel_guc *guc) return offsetof(struct __guc_ads_blob, regset); } -static u32 guc_ads_private_data_offset(struct intel_guc *guc) +static u32 guc_ads_golden_ctxt_offset(struct intel_guc *guc) { u32 offset; offset = guc_ads_regset_offset(guc) + guc_ads_regset_size(guc); + + return PAGE_ALIGN(offset); +} + +static u32 guc_ads_private_data_offset(struct intel_guc *guc) +{ + u32 offset; + + offset = guc_ads_golden_ctxt_offset(guc) + + guc_ads_golden_ctxt_size(guc); + return PAGE_ALIGN(offset); } @@ -319,53 +340,166 @@ static void guc_mmio_reg_state_init(struct intel_guc *guc, GEM_BUG_ON(temp_set.size); } -/* - * The first 80 dwords of the register state context, containing the - * execlists and ppgtt registers. - */ -#define LR_HW_CONTEXT_SIZE (80 * sizeof(u32)) +static void fill_engine_enable_masks(struct intel_gt *gt, + struct guc_gt_system_info *info) +{ + info->engine_enabled_masks[GUC_RENDER_CLASS] = 1; + info->engine_enabled_masks[GUC_BLITTER_CLASS] = 1; + info->engine_enabled_masks[GUC_VIDEO_CLASS] = VDBOX_MASK(gt); + info->engine_enabled_masks[GUC_VIDEOENHANCE_CLASS] = VEBOX_MASK(gt); +} -static void __guc_ads_init(struct intel_guc *guc) +static int guc_prep_golden_context(struct intel_guc *guc, + struct __guc_ads_blob *blob) { struct intel_gt *gt = guc_to_gt(guc); - struct drm_i915_private *i915 = gt->i915; + u32 addr_ggtt, offset; + u32 total_size = 0, alloc_size, real_size; + u8 engine_class, guc_class; + struct guc_gt_system_info *info, local_info; + + /* + * Reserve the memory for the golden contexts and point GuC at it but + * leave it empty for now. The context data will be filled in later + * once there is something available to put there. + * + * Note that the HWSP and ring context are not included. + * + * Note also that the storage must be pinned in the GGTT, so that the + * address won't change after GuC has been told where to find it. The + * GuC will also validate that the LRC base + size fall within the + * allowed GGTT range. + */ + if (blob) { + offset = guc_ads_golden_ctxt_offset(guc); + addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset; + info = &blob->system_info; + } else { + memset(&local_info, 0, sizeof(local_info)); + info = &local_info; + fill_engine_enable_masks(gt, info); + } + + for (engine_class = 0; engine_class <= MAX_ENGINE_CLASS; ++engine_class) { + if (engine_class == OTHER_CLASS) + continue; + + guc_class = engine_class_to_guc_class(engine_class); + + if (!info->engine_enabled_masks[guc_class]) + continue; + + real_size = intel_engine_context_size(gt, engine_class); + alloc_size = PAGE_ALIGN(real_size); + total_size += alloc_size; + + if (!blob) + continue; + + blob->ads.eng_state_size[guc_class] = real_size; + blob->ads.golden_context_lrca[guc_class] = addr_ggtt; + addr_ggtt += alloc_size; + } + + if (!blob) + return total_size; + + GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size); + return total_size; +} + +static struct intel_engine_cs *find_engine_state(struct intel_gt *gt, u8 engine_class) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, gt, id) { + if (engine->class != engine_class) + continue; + + if (!engine->default_state) + continue; + + return engine; + } + + return NULL; +} + +static void guc_init_golden_context(struct intel_guc *guc) +{ struct __guc_ads_blob *blob = guc->ads_blob; - const u32 skipped_size = LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE; - u32 base; + struct intel_engine_cs *engine; + struct intel_gt *gt = guc_to_gt(guc); + u32 addr_ggtt, offset; + u32 total_size = 0, alloc_size, real_size; u8 engine_class, guc_class; + u8 *ptr; - /* GuC scheduling policies */ - guc_policies_init(guc, &blob->policies); + /* Skip execlist and PPGTT registers + HWSP */ + const u32 lr_hw_context_size = 80 * sizeof(u32); + const u32 skip_size = LRC_PPHWSP_SZ * PAGE_SIZE + + lr_hw_context_size; + + if (!intel_uc_uses_guc_submission(>->uc)) + return; + + GEM_BUG_ON(!blob); /* - * GuC expects a per-engine-class context image and size - * (minus hwsp and ring context). The context image will be - * used to reinitialize engines after a reset. It must exist - * and be pinned in the GGTT, so that the address won't change after - * we have told GuC where to find it. The context size will be used - * to validate that the LRC base + size fall within allowed GGTT. + * Go back and fill in the golden context data now that it is + * available. */ + offset = guc_ads_golden_ctxt_offset(guc); + addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset; + ptr = ((u8 *)blob) + offset; + for (engine_class = 0; engine_class <= MAX_ENGINE_CLASS; ++engine_class) { if (engine_class == OTHER_CLASS) continue; guc_class = engine_class_to_guc_class(engine_class); - /* - * TODO: Set context pointer to default state to allow - * GuC to re-init guilty contexts after internal reset. - */ - blob->ads.golden_context_lrca[guc_class] = 0; - blob->ads.eng_state_size[guc_class] = - intel_engine_context_size(gt, engine_class) - - skipped_size; + if (!blob->system_info.engine_enabled_masks[guc_class]) + continue; + + real_size = intel_engine_context_size(gt, engine_class); + alloc_size = PAGE_ALIGN(real_size); + total_size += alloc_size; + + engine = find_engine_state(gt, engine_class); + if (!engine) { + drm_err(>->i915->drm, "No engine state recorded for class %d!\n", + engine_class); + blob->ads.eng_state_size[guc_class] = 0; + blob->ads.golden_context_lrca[guc_class] = 0; + continue; + } + + GEM_BUG_ON(blob->ads.eng_state_size[guc_class] != real_size); + GEM_BUG_ON(blob->ads.golden_context_lrca[guc_class] != addr_ggtt); + addr_ggtt += alloc_size; + + shmem_read(engine->default_state, skip_size, ptr + skip_size, + real_size - skip_size); + ptr += alloc_size; } + GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size); +} + +static void __guc_ads_init(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct drm_i915_private *i915 = gt->i915; + struct __guc_ads_blob *blob = guc->ads_blob; + u32 base; + + /* GuC scheduling policies */ + guc_policies_init(guc, &blob->policies); + /* System info */ - blob->system_info.engine_enabled_masks[GUC_RENDER_CLASS] = 1; - blob->system_info.engine_enabled_masks[GUC_BLITTER_CLASS] = 1; - blob->system_info.engine_enabled_masks[GUC_VIDEO_CLASS] = VDBOX_MASK(gt); - blob->system_info.engine_enabled_masks[GUC_VIDEOENHANCE_CLASS] = VEBOX_MASK(gt); + fill_engine_enable_masks(gt, &blob->system_info); blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED] = hweight8(gt->info.sseu.slice_mask); @@ -380,6 +514,9 @@ static void __guc_ads_init(struct intel_guc *guc) GEN12_DOORBELLS_PER_SQIDI) + 1; } + /* Golden contexts for re-initialising after a watchdog reset */ + guc_prep_golden_context(guc, blob); + guc_mapping_table_init(guc_to_gt(guc), &blob->system_info); base = intel_guc_ggtt_offset(guc, guc->ads_vma); @@ -417,6 +554,13 @@ int intel_guc_ads_create(struct intel_guc *guc) return ret; guc->ads_regset_size = ret; + /* Likewise the golden contexts: */ + ret = guc_prep_golden_context(guc, NULL); + if (ret < 0) + return ret; + guc->ads_golden_ctxt_size = ret; + + /* Now the total size can be determined: */ size = guc_ads_blob_size(guc); ret = intel_guc_allocate_and_map_vma(guc, size, &guc->ads_vma, @@ -429,6 +573,18 @@ int intel_guc_ads_create(struct intel_guc *guc) return 0; } +void intel_guc_ads_init_late(struct intel_guc *guc) +{ + /* + * The golden context setup requires the saved engine state from + * __engines_record_defaults(). However, that requires engines to be + * operational which means the ADS must already have been configured. + * Fortunately, the golden context state is not needed until a hang + * occurs, so it can be filled in during this late init phase. + */ + guc_init_golden_context(guc); +} + void intel_guc_ads_destroy(struct intel_guc *guc) { i915_vma_unpin_and_release(&guc->ads_vma, I915_VMA_RELEASE_MAP); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h index bdcb339a5321..3d85051d57e4 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h @@ -11,6 +11,7 @@ struct drm_printer; int intel_guc_ads_create(struct intel_guc *guc); void intel_guc_ads_destroy(struct intel_guc *guc); +void intel_guc_ads_init_late(struct intel_guc *guc); void intel_guc_ads_reset(struct intel_guc *guc); void intel_guc_ads_print_policy_info(struct intel_guc *guc, struct drm_printer *p); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 77c1fe2ed883..7a69c3c027e9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -120,6 +120,11 @@ void intel_uc_init_early(struct intel_uc *uc) uc->ops = &uc_ops_off; } +void intel_uc_init_late(struct intel_uc *uc) +{ + intel_guc_init_late(&uc->guc); +} + void intel_uc_driver_late_release(struct intel_uc *uc) { } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h index 91315e3f1c58..e2da2b6e76e1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h @@ -35,6 +35,7 @@ struct intel_uc { }; void intel_uc_init_early(struct intel_uc *uc); +void intel_uc_init_late(struct intel_uc *uc); void intel_uc_driver_late_release(struct intel_uc *uc); void intel_uc_driver_remove(struct intel_uc *uc); void intel_uc_init_mmio(struct intel_uc *uc); -- cgit v1.2.3 From ae8ac10dfd2aa9e20cb5baea3c036d8535649113 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Jul 2021 17:23:39 -0700 Subject: drm/i915/guc: Implement banned contexts for GuC submission When using GuC submission, if a context gets banned disable scheduling and mark all inflight requests as complete. Cc: John Harrison Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-25-matthew.brost@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/gt/intel_context.h | 13 ++ drivers/gpu/drm/i915/gt/intel_context_types.h | 2 + drivers/gpu/drm/i915/gt/intel_reset.c | 32 ++--- drivers/gpu/drm/i915/gt/intel_ring_submission.c | 20 +++ drivers/gpu/drm/i915/gt/uc/intel_guc.h | 2 + drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 151 ++++++++++++++++++++-- drivers/gpu/drm/i915/i915_trace.h | 10 ++ 8 files changed, 195 insertions(+), 37 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index e3df01a201d7..05c3ee191710 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1084,7 +1084,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban) for_each_gem_engine(ce, engines, it) { struct intel_engine_cs *engine; - if (ban && intel_context_set_banned(ce)) + if (ban && intel_context_ban(ce, NULL)) continue; /* diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 2ed9bf5f91a5..814d9277096a 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -16,6 +16,7 @@ #include "intel_engine_types.h" #include "intel_ring_types.h" #include "intel_timeline_types.h" +#include "i915_trace.h" #define CE_TRACE(ce, fmt, ...) do { \ const struct intel_context *ce__ = (ce); \ @@ -243,6 +244,18 @@ static inline bool intel_context_set_banned(struct intel_context *ce) return test_and_set_bit(CONTEXT_BANNED, &ce->flags); } +static inline bool intel_context_ban(struct intel_context *ce, + struct i915_request *rq) +{ + bool ret = intel_context_set_banned(ce); + + trace_intel_context_ban(ce); + if (ce->ops->ban) + ce->ops->ban(ce, rq); + + return ret; +} + static inline bool intel_context_force_single_submission(const struct intel_context *ce) { diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 035108c10b2c..57c19ee3e313 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -35,6 +35,8 @@ struct intel_context_ops { int (*alloc)(struct intel_context *ce); + void (*ban)(struct intel_context *ce, struct i915_request *rq); + int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr); int (*pin)(struct intel_context *ce, void *vaddr); void (*unpin)(struct intel_context *ce); diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 4d281bc8a38c..91200c43951f 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -22,7 +22,6 @@ #include "intel_reset.h" #include "uc/intel_guc.h" -#include "uc/intel_guc_submission.h" #define RESET_MAX_RETRIES 3 @@ -39,21 +38,6 @@ static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr) intel_uncore_rmw_fw(uncore, reg, clr, 0); } -static void skip_context(struct i915_request *rq) -{ - struct intel_context *hung_ctx = rq->context; - - list_for_each_entry_from_rcu(rq, &hung_ctx->timeline->requests, link) { - if (!i915_request_is_active(rq)) - return; - - if (rq->context == hung_ctx) { - i915_request_set_error_once(rq, -EIO); - __i915_request_skip(rq); - } - } -} - static void client_mark_guilty(struct i915_gem_context *ctx, bool banned) { struct drm_i915_file_private *file_priv = ctx->file_priv; @@ -88,10 +72,8 @@ static bool mark_guilty(struct i915_request *rq) bool banned; int i; - if (intel_context_is_closed(rq->context)) { - intel_context_set_banned(rq->context); + if (intel_context_is_closed(rq->context)) return true; - } rcu_read_lock(); ctx = rcu_dereference(rq->context->gem_context); @@ -123,11 +105,9 @@ static bool mark_guilty(struct i915_request *rq) banned = !i915_gem_context_is_recoverable(ctx); if (time_before(jiffies, prev_hang + CONTEXT_FAST_HANG_JIFFIES)) banned = true; - if (banned) { + if (banned) drm_dbg(&ctx->i915->drm, "context %s: guilty %d, banned\n", ctx->name, atomic_read(&ctx->guilty_count)); - intel_context_set_banned(rq->context); - } client_mark_guilty(ctx, banned); @@ -149,6 +129,8 @@ static void mark_innocent(struct i915_request *rq) void __i915_request_reset(struct i915_request *rq, bool guilty) { + bool banned = false; + RQ_TRACE(rq, "guilty? %s\n", yesno(guilty)); GEM_BUG_ON(__i915_request_is_complete(rq)); @@ -156,13 +138,15 @@ void __i915_request_reset(struct i915_request *rq, bool guilty) if (guilty) { i915_request_set_error_once(rq, -EIO); __i915_request_skip(rq); - if (mark_guilty(rq) && !intel_engine_uses_guc(rq->engine)) - skip_context(rq); + banned = mark_guilty(rq); } else { i915_request_set_error_once(rq, -EAGAIN); mark_innocent(rq); } rcu_read_unlock(); + + if (banned) + intel_context_ban(rq->context, rq); } static bool i915_in_reset(struct pci_dev *pdev) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index a5404c7b600f..05bb9f449df1 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -586,9 +586,29 @@ static void ring_context_reset(struct intel_context *ce) clear_bit(CONTEXT_VALID_BIT, &ce->flags); } +static void ring_context_ban(struct intel_context *ce, + struct i915_request *rq) +{ + struct intel_engine_cs *engine; + + if (!rq || !i915_request_is_active(rq)) + return; + + engine = rq->engine; + lockdep_assert_held(&engine->sched_engine->lock); + list_for_each_entry_continue(rq, &engine->sched_engine->requests, + sched.link) + if (rq->context == ce) { + i915_request_set_error_once(rq, -EIO); + __i915_request_skip(rq); + } +} + static const struct intel_context_ops ring_context_ops = { .alloc = ring_context_alloc, + .ban = ring_context_ban, + .pre_pin = ring_context_pre_pin, .pin = ring_context_pin, .unpin = ring_context_unpin, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 1875303c3bca..8ab70a2223b0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -281,6 +281,8 @@ void intel_guc_find_hung_context(struct intel_engine_cs *engine); int intel_guc_global_policies_update(struct intel_guc *guc); +void intel_guc_context_ban(struct intel_context *ce, struct i915_request *rq); + void intel_guc_submission_reset_prepare(struct intel_guc *guc); void intel_guc_submission_reset(struct intel_guc *guc, bool stalled); void intel_guc_submission_reset_finish(struct intel_guc *guc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index cea3e3073a71..ad9a38a861df 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -125,6 +125,7 @@ static inline void clr_context_pending_enable(struct intel_context *ce) #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) #define SCHED_STATE_DESTROYED BIT(1) #define SCHED_STATE_PENDING_DISABLE BIT(2) +#define SCHED_STATE_BANNED BIT(3) static inline void init_sched_state(struct intel_context *ce) { /* Only should be called from guc_lrc_desc_pin() */ @@ -185,6 +186,23 @@ static inline void clr_context_pending_disable(struct intel_context *ce) ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE; } +static inline bool context_banned(struct intel_context *ce) +{ + return ce->guc_state.sched_state & SCHED_STATE_BANNED; +} + +static inline void set_context_banned(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state |= SCHED_STATE_BANNED; +} + +static inline void clr_context_banned(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state &= ~SCHED_STATE_BANNED; +} + static inline bool context_guc_id_invalid(struct intel_context *ce) { return ce->guc_id == GUC_INVALID_LRC_ID; @@ -357,13 +375,23 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop); static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) { - int err; + int err = 0; struct intel_context *ce = rq->context; u32 action[3]; int len = 0; u32 g2h_len_dw = 0; bool enabled; + /* + * Corner case where requests were sitting in the priority list or a + * request resubmitted after the context was banned. + */ + if (unlikely(intel_context_is_banned(ce))) { + i915_request_put(i915_request_mark_eio(rq)); + intel_engine_signal_breadcrumbs(ce->engine); + goto out; + } + GEM_BUG_ON(!atomic_read(&ce->guc_id_ref)); GEM_BUG_ON(context_guc_id_invalid(ce)); @@ -399,6 +427,8 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) clr_context_pending_enable(ce); intel_context_put(ce); } + if (likely(!err)) + trace_i915_request_guc_submit(rq); out: return err; @@ -463,7 +493,6 @@ resubmit: guc->stalled_request = last; return false; } - trace_i915_request_guc_submit(last); } guc->stalled_request = NULL; @@ -502,12 +531,13 @@ static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir) static void __guc_context_destroy(struct intel_context *ce); static void release_guc_id(struct intel_guc *guc, struct intel_context *ce); static void guc_signal_context_fence(struct intel_context *ce); +static void guc_cancel_context_requests(struct intel_context *ce); static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) { struct intel_context *ce; unsigned long index, flags; - bool pending_disable, pending_enable, deregister, destroyed; + bool pending_disable, pending_enable, deregister, destroyed, banned; xa_for_each(&guc->context_lookup, index, ce) { /* Flush context */ @@ -525,6 +555,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) pending_enable = context_pending_enable(ce); pending_disable = context_pending_disable(ce); deregister = context_wait_for_deregister_to_register(ce); + banned = context_banned(ce); init_sched_state(ce); if (pending_enable || destroyed || deregister) { @@ -542,6 +573,10 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) /* Not mutualy exclusive with above if statement. */ if (pending_disable) { guc_signal_context_fence(ce); + if (banned) { + guc_cancel_context_requests(ce); + intel_engine_signal_breadcrumbs(ce->engine); + } intel_context_sched_disable_unpin(ce); atomic_dec(&guc->outstanding_submission_g2h); intel_context_put(ce); @@ -661,6 +696,9 @@ static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) { struct intel_engine_cs *engine = __context_to_physical_engine(ce); + if (intel_context_is_banned(ce)) + return; + GEM_BUG_ON(!intel_context_is_pinned(ce)); /* @@ -731,6 +769,8 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled) struct i915_request *rq; u32 head; + intel_context_get(ce); + /* * GuC will implicitly mark the context as non-schedulable * when it sends the reset notification. Make sure our state @@ -756,6 +796,7 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled) out_replay: guc_reset_state(ce, head, stalled); __unwind_incomplete_requests(ce); + intel_context_put(ce); } void intel_guc_submission_reset(struct intel_guc *guc, bool stalled) @@ -940,8 +981,6 @@ static int guc_bypass_tasklet_submit(struct intel_guc *guc, ret = guc_add_request(guc, rq); if (ret == -EBUSY) guc->stalled_request = rq; - else - trace_i915_request_guc_submit(rq); if (unlikely(ret == -EPIPE)) disable_submission(guc); @@ -1344,13 +1383,77 @@ static u16 prep_context_pending_disable(struct intel_context *ce) return ce->guc_id; } +static void __guc_context_set_preemption_timeout(struct intel_guc *guc, + u16 guc_id, + u32 preemption_timeout) +{ + u32 action[] = { + INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT, + guc_id, + preemption_timeout + }; + + intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); +} + +static void guc_context_ban(struct intel_context *ce, struct i915_request *rq) +{ + struct intel_guc *guc = ce_to_guc(ce); + struct intel_runtime_pm *runtime_pm = + &ce->engine->gt->i915->runtime_pm; + intel_wakeref_t wakeref; + unsigned long flags; + + guc_flush_submissions(guc); + + spin_lock_irqsave(&ce->guc_state.lock, flags); + set_context_banned(ce); + + if (submission_disabled(guc) || + (!context_enabled(ce) && !context_pending_disable(ce))) { + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + guc_cancel_context_requests(ce); + intel_engine_signal_breadcrumbs(ce->engine); + } else if (!context_pending_disable(ce)) { + u16 guc_id; + + /* + * We add +2 here as the schedule disable complete CTB handler + * calls intel_context_sched_disable_unpin (-2 to pin_count). + */ + atomic_add(2, &ce->pin_count); + + guc_id = prep_context_pending_disable(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + /* + * In addition to disabling scheduling, set the preemption + * timeout to the minimum value (1 us) so the banned context + * gets kicked off the HW ASAP. + */ + with_intel_runtime_pm(runtime_pm, wakeref) { + __guc_context_set_preemption_timeout(guc, guc_id, 1); + __guc_context_sched_disable(guc, ce, guc_id); + } + } else { + if (!context_guc_id_invalid(ce)) + with_intel_runtime_pm(runtime_pm, wakeref) + __guc_context_set_preemption_timeout(guc, + ce->guc_id, + 1); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + } +} + static void guc_context_sched_disable(struct intel_context *ce) { struct intel_guc *guc = ce_to_guc(ce); - struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; unsigned long flags; - u16 guc_id; + struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; intel_wakeref_t wakeref; + u16 guc_id; + bool enabled; if (submission_disabled(guc) || context_guc_id_invalid(ce) || !lrc_desc_registered(guc, ce->guc_id)) { @@ -1364,14 +1467,22 @@ static void guc_context_sched_disable(struct intel_context *ce) spin_lock_irqsave(&ce->guc_state.lock, flags); /* - * We have to check if the context has been pinned again as another pin - * operation is allowed to pass this function. Checking the pin count, - * within ce->guc_state.lock, synchronizes this function with + * We have to check if the context has been disabled by another thread. + * We also have to check if the context has been pinned again as another + * pin operation is allowed to pass this function. Checking the pin + * count, within ce->guc_state.lock, synchronizes this function with * guc_request_alloc ensuring a request doesn't slip through the * 'context_pending_disable' fence. Checking within the spin lock (can't * sleep) ensures another process doesn't pin this context and generate * a request before we set the 'context_pending_disable' flag here. */ + enabled = context_enabled(ce); + if (unlikely(!enabled || submission_disabled(guc))) { + if (enabled) + clr_context_enabled(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + goto unpin; + } if (unlikely(atomic_add_unless(&ce->pin_count, -2, 2))) { spin_unlock_irqrestore(&ce->guc_state.lock, flags); return; @@ -1529,6 +1640,8 @@ static const struct intel_context_ops guc_context_ops = { .unpin = guc_context_unpin, .post_unpin = guc_context_post_unpin, + .ban = guc_context_ban, + .enter = intel_context_enter_engine, .exit = intel_context_exit_engine, @@ -1722,6 +1835,8 @@ static const struct intel_context_ops virtual_guc_context_ops = { .unpin = guc_context_unpin, .post_unpin = guc_context_post_unpin, + .ban = guc_context_ban, + .enter = guc_virtual_context_enter, .exit = guc_virtual_context_exit, @@ -2164,6 +2279,8 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, if (context_pending_enable(ce)) { clr_context_pending_enable(ce); } else if (context_pending_disable(ce)) { + bool banned; + /* * Unpin must be done before __guc_signal_context_fence, * otherwise a race exists between the requests getting @@ -2174,9 +2291,16 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, intel_context_sched_disable_unpin(ce); spin_lock_irqsave(&ce->guc_state.lock, flags); + banned = context_banned(ce); + clr_context_banned(ce); clr_context_pending_disable(ce); __guc_signal_context_fence(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + if (banned) { + guc_cancel_context_requests(ce); + intel_engine_signal_breadcrumbs(ce->engine); + } } decr_outstanding_submission_g2h(guc); @@ -2211,8 +2335,11 @@ static void guc_handle_context_reset(struct intel_guc *guc, struct intel_context *ce) { trace_intel_context_reset(ce); - capture_error_state(guc, ce); - guc_context_replay(ce); + + if (likely(!intel_context_is_banned(ce))) { + capture_error_state(guc, ce); + guc_context_replay(ce); + } } int intel_guc_context_reset_process_msg(struct intel_guc *guc, diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 3f43d904f043..9613a7c19661 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -925,6 +925,11 @@ DEFINE_EVENT(intel_context, intel_context_reset, TP_ARGS(ce) ); +DEFINE_EVENT(intel_context, intel_context_ban, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + DEFINE_EVENT(intel_context, intel_context_register, TP_PROTO(struct intel_context *ce), TP_ARGS(ce) @@ -1017,6 +1022,11 @@ trace_intel_context_reset(struct intel_context *ce) { } +static inline void +trace_intel_context_ban(struct intel_context *ce) +{ +} + static inline void trace_intel_context_register(struct intel_context *ce) { -- cgit v1.2.3 From 62eaf0ae217d45e917fd9ca6296205117a69b6d0 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Jul 2021 17:23:40 -0700 Subject: drm/i915/guc: Support request cancellation This adds GuC backend support for i915_request_cancel(), which in turn makes CONFIG_DRM_I915_REQUEST_TIMEOUT work. This implementation makes use of fence while there are likely simplier options. A fence was chosen because of another feature coming soon which requires a user to block on a context until scheduling is disabled. In that case we return the fence to the user and the user can wait on that fence. v2: (Daniele) - A comment about locking the blocked incr / decr - A comments about the use of the fence - Update commit message explaining why fence - Delete redundant check blocked count in unblock function - Ring buffer implementation - Comment about blocked in submission path - Shorter rpm path v3: (Checkpatch) - Fix typos in commit message (Daniel) - Rework to simplier locking structure in guc_context_block / unblock Signed-off-by: Matthew Brost Cc: Tvrtko Ursulin Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-26-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_context.c | 13 ++ drivers/gpu/drm/i915/gt/intel_context.h | 7 + drivers/gpu/drm/i915/gt/intel_context_types.h | 9 +- .../gpu/drm/i915/gt/intel_execlists_submission.c | 18 ++ drivers/gpu/drm/i915/gt/intel_ring_submission.c | 16 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 188 +++++++++++++++++++++ drivers/gpu/drm/i915/i915_request.c | 14 +- 7 files changed, 251 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 237b70e98744..477c42d7d693 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -366,6 +366,12 @@ static int __intel_context_active(struct i915_active *active) return 0; } +static int sw_fence_dummy_notify(struct i915_sw_fence *sf, + enum i915_sw_fence_notify state) +{ + return NOTIFY_DONE; +} + void intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) { @@ -399,6 +405,13 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) ce->guc_id = GUC_INVALID_LRC_ID; INIT_LIST_HEAD(&ce->guc_id_link); + /* + * Initialize fence to be complete as this is expected to be complete + * unless there is a pending schedule disable outstanding. + */ + i915_sw_fence_init(&ce->guc_blocked, sw_fence_dummy_notify); + i915_sw_fence_commit(&ce->guc_blocked); + i915_active_init(&ce->active, __intel_context_active, __intel_context_retire, 0); } diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 814d9277096a..876bdb08303c 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -70,6 +70,13 @@ intel_context_is_pinned(struct intel_context *ce) return atomic_read(&ce->pin_count); } +static inline void intel_context_cancel_request(struct intel_context *ce, + struct i915_request *rq) +{ + GEM_BUG_ON(!ce->ops->cancel_request); + return ce->ops->cancel_request(ce, rq); +} + /** * intel_context_unlock_pinned - Releases the earlier locking of 'pinned' status * @ce - the context diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 57c19ee3e313..a5bc876face7 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -13,6 +13,7 @@ #include #include "i915_active_types.h" +#include "i915_sw_fence.h" #include "i915_utils.h" #include "intel_engine_types.h" #include "intel_sseu.h" @@ -42,6 +43,9 @@ struct intel_context_ops { void (*unpin)(struct intel_context *ce); void (*post_unpin)(struct intel_context *ce); + void (*cancel_request)(struct intel_context *ce, + struct i915_request *rq); + void (*enter)(struct intel_context *ce); void (*exit)(struct intel_context *ce); @@ -156,7 +160,7 @@ struct intel_context { * sched_state: scheduling state of this context using GuC * submission */ - u8 sched_state; + u16 sched_state; /* * fences: maintains of list of requests that have a submit * fence related to GuC submission @@ -184,6 +188,9 @@ struct intel_context { * GuC ID link - in list when unpinned but guc_id still valid in GuC */ struct list_head guc_id_link; + + /* GuC context blocked fence */ + struct i915_sw_fence guc_blocked; }; #endif /* __INTEL_CONTEXT_TYPES__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index b4a876736074..de5f9c86b9a4 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -114,6 +114,7 @@ #include "gen8_engine_cs.h" #include "intel_breadcrumbs.h" #include "intel_context.h" +#include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" #include "intel_engine_stats.h" #include "intel_execlists_submission.h" @@ -2587,11 +2588,26 @@ static int execlists_context_alloc(struct intel_context *ce) return lrc_alloc(ce, ce->engine); } +static void execlists_context_cancel_request(struct intel_context *ce, + struct i915_request *rq) +{ + struct intel_engine_cs *engine = NULL; + + i915_request_active_engine(rq, &engine); + + if (engine && intel_engine_pulse(engine)) + intel_gt_handle_error(engine->gt, engine->mask, 0, + "request cancellation by %s", + current->comm); +} + static const struct intel_context_ops execlists_context_ops = { .flags = COPS_HAS_INFLIGHT, .alloc = execlists_context_alloc, + .cancel_request = execlists_context_cancel_request, + .pre_pin = execlists_context_pre_pin, .pin = execlists_context_pin, .unpin = lrc_unpin, @@ -3608,6 +3624,8 @@ static const struct intel_context_ops virtual_context_ops = { .alloc = virtual_context_alloc, + .cancel_request = execlists_context_cancel_request, + .pre_pin = virtual_context_pre_pin, .pin = virtual_context_pin, .unpin = lrc_unpin, diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 05bb9f449df1..2958e2fae380 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -16,6 +16,7 @@ #include "intel_reset.h" #include "intel_ring.h" #include "shmem_utils.h" +#include "intel_engine_heartbeat.h" /* Rough estimate of the typical request size, performing a flush, * set-context and then emitting the batch. @@ -604,9 +605,24 @@ static void ring_context_ban(struct intel_context *ce, } } +static void ring_context_cancel_request(struct intel_context *ce, + struct i915_request *rq) +{ + struct intel_engine_cs *engine = NULL; + + i915_request_active_engine(rq, &engine); + + if (engine && intel_engine_pulse(engine)) + intel_gt_handle_error(engine->gt, engine->mask, 0, + "request cancellation by %s", + current->comm); +} + static const struct intel_context_ops ring_context_ops = { .alloc = ring_context_alloc, + .cancel_request = ring_context_cancel_request, + .ban = ring_context_ban, .pre_pin = ring_context_pre_pin, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index ad9a38a861df..ee4f1f996efa 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -126,6 +126,9 @@ static inline void clr_context_pending_enable(struct intel_context *ce) #define SCHED_STATE_DESTROYED BIT(1) #define SCHED_STATE_PENDING_DISABLE BIT(2) #define SCHED_STATE_BANNED BIT(3) +#define SCHED_STATE_BLOCKED_SHIFT 4 +#define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) +#define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) static inline void init_sched_state(struct intel_context *ce) { /* Only should be called from guc_lrc_desc_pin() */ @@ -203,6 +206,32 @@ static inline void clr_context_banned(struct intel_context *ce) ce->guc_state.sched_state &= ~SCHED_STATE_BANNED; } +static inline u32 context_blocked(struct intel_context *ce) +{ + return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> + SCHED_STATE_BLOCKED_SHIFT; +} + +static inline void incr_context_blocked(struct intel_context *ce) +{ + lockdep_assert_held(&ce->engine->sched_engine->lock); + lockdep_assert_held(&ce->guc_state.lock); + + ce->guc_state.sched_state += SCHED_STATE_BLOCKED; + + GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */ +} + +static inline void decr_context_blocked(struct intel_context *ce) +{ + lockdep_assert_held(&ce->engine->sched_engine->lock); + lockdep_assert_held(&ce->guc_state.lock); + + GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */ + + ce->guc_state.sched_state -= SCHED_STATE_BLOCKED; +} + static inline bool context_guc_id_invalid(struct intel_context *ce) { return ce->guc_id == GUC_INVALID_LRC_ID; @@ -404,6 +433,14 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) if (unlikely(err)) goto out; } + + /* + * The request / context will be run on the hardware when scheduling + * gets enabled in the unblock. + */ + if (unlikely(context_blocked(ce))) + goto out; + enabled = context_enabled(ce); if (!enabled) { @@ -532,6 +569,7 @@ static void __guc_context_destroy(struct intel_context *ce); static void release_guc_id(struct intel_guc *guc, struct intel_context *ce); static void guc_signal_context_fence(struct intel_context *ce); static void guc_cancel_context_requests(struct intel_context *ce); +static void guc_blocked_fence_complete(struct intel_context *ce); static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) { @@ -579,6 +617,10 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) } intel_context_sched_disable_unpin(ce); atomic_dec(&guc->outstanding_submission_g2h); + spin_lock_irqsave(&ce->guc_state.lock, flags); + guc_blocked_fence_complete(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + intel_context_put(ce); } } @@ -1354,6 +1396,21 @@ static void guc_context_post_unpin(struct intel_context *ce) lrc_post_unpin(ce); } +static void __guc_context_sched_enable(struct intel_guc *guc, + struct intel_context *ce) +{ + u32 action[] = { + INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, + ce->guc_id, + GUC_CONTEXT_ENABLE + }; + + trace_intel_context_sched_enable(ce); + + guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); +} + static void __guc_context_sched_disable(struct intel_guc *guc, struct intel_context *ce, u16 guc_id) @@ -1372,17 +1429,143 @@ static void __guc_context_sched_disable(struct intel_guc *guc, G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); } +static void guc_blocked_fence_complete(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + + if (!i915_sw_fence_done(&ce->guc_blocked)) + i915_sw_fence_complete(&ce->guc_blocked); +} + +static void guc_blocked_fence_reinit(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_blocked)); + + /* + * This fence is always complete unless a pending schedule disable is + * outstanding. We arm the fence here and complete it when we receive + * the pending schedule disable complete message. + */ + i915_sw_fence_fini(&ce->guc_blocked); + i915_sw_fence_reinit(&ce->guc_blocked); + i915_sw_fence_await(&ce->guc_blocked); + i915_sw_fence_commit(&ce->guc_blocked); +} + static u16 prep_context_pending_disable(struct intel_context *ce) { lockdep_assert_held(&ce->guc_state.lock); set_context_pending_disable(ce); clr_context_enabled(ce); + guc_blocked_fence_reinit(ce); intel_context_get(ce); return ce->guc_id; } +static struct i915_sw_fence *guc_context_block(struct intel_context *ce) +{ + struct intel_guc *guc = ce_to_guc(ce); + struct i915_sched_engine *sched_engine = ce->engine->sched_engine; + unsigned long flags; + struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; + intel_wakeref_t wakeref; + u16 guc_id; + bool enabled; + + spin_lock_irqsave(&ce->guc_state.lock, flags); + + /* + * Sync with submission path, increment before below changes to context + * state. + */ + spin_lock(&sched_engine->lock); + incr_context_blocked(ce); + spin_unlock(&sched_engine->lock); + + enabled = context_enabled(ce); + if (unlikely(!enabled || submission_disabled(guc))) { + if (enabled) + clr_context_enabled(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + return &ce->guc_blocked; + } + + /* + * We add +2 here as the schedule disable complete CTB handler calls + * intel_context_sched_disable_unpin (-2 to pin_count). + */ + atomic_add(2, &ce->pin_count); + + guc_id = prep_context_pending_disable(ce); + + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + with_intel_runtime_pm(runtime_pm, wakeref) + __guc_context_sched_disable(guc, ce, guc_id); + + return &ce->guc_blocked; +} + +static void guc_context_unblock(struct intel_context *ce) +{ + struct intel_guc *guc = ce_to_guc(ce); + struct i915_sched_engine *sched_engine = ce->engine->sched_engine; + unsigned long flags; + struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; + intel_wakeref_t wakeref; + bool enable; + + GEM_BUG_ON(context_enabled(ce)); + + spin_lock_irqsave(&ce->guc_state.lock, flags); + + if (unlikely(submission_disabled(guc) || + !intel_context_is_pinned(ce) || + context_pending_disable(ce) || + context_blocked(ce) > 1)) { + enable = false; + } else { + enable = true; + set_context_pending_enable(ce); + set_context_enabled(ce); + intel_context_get(ce); + } + + /* + * Sync with submission path, decrement after above changes to context + * state. + */ + spin_lock(&sched_engine->lock); + decr_context_blocked(ce); + spin_unlock(&sched_engine->lock); + + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + if (enable) { + with_intel_runtime_pm(runtime_pm, wakeref) + __guc_context_sched_enable(guc, ce); + } +} + +static void guc_context_cancel_request(struct intel_context *ce, + struct i915_request *rq) +{ + if (i915_sw_fence_signaled(&rq->submit)) { + struct i915_sw_fence *fence = guc_context_block(ce); + + i915_sw_fence_wait(fence); + if (!i915_request_completed(rq)) { + __i915_request_skip(rq); + guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head), + true); + } + guc_context_unblock(ce); + } +} + static void __guc_context_set_preemption_timeout(struct intel_guc *guc, u16 guc_id, u32 preemption_timeout) @@ -1642,6 +1825,8 @@ static const struct intel_context_ops guc_context_ops = { .ban = guc_context_ban, + .cancel_request = guc_context_cancel_request, + .enter = intel_context_enter_engine, .exit = intel_context_exit_engine, @@ -1837,6 +2022,8 @@ static const struct intel_context_ops virtual_guc_context_ops = { .ban = guc_context_ban, + .cancel_request = guc_context_cancel_request, + .enter = guc_virtual_context_enter, .exit = guc_virtual_context_exit, @@ -2295,6 +2482,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, clr_context_banned(ce); clr_context_pending_disable(ce); __guc_signal_context_fence(ce); + guc_blocked_fence_complete(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); if (banned) { diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 28f38b02a5d2..541a20371502 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -710,18 +710,6 @@ void i915_request_unsubmit(struct i915_request *request) spin_unlock_irqrestore(&engine->sched_engine->lock, flags); } -static void __cancel_request(struct i915_request *rq) -{ - struct intel_engine_cs *engine = NULL; - - i915_request_active_engine(rq, &engine); - - if (engine && intel_engine_pulse(engine)) - intel_gt_handle_error(engine->gt, engine->mask, 0, - "request cancellation by %s", - current->comm); -} - void i915_request_cancel(struct i915_request *rq, int error) { if (!i915_request_set_error_once(rq, error)) @@ -729,7 +717,7 @@ void i915_request_cancel(struct i915_request *rq, int error) set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags); - __cancel_request(rq); + intel_context_cancel_request(rq->context, rq); } static int __i915_sw_fence_call -- cgit v1.2.3 From 3f5dff6c18aa0473158686f363184a1bdae0116b Mon Sep 17 00:00:00 2001 From: John Harrison Date: Mon, 26 Jul 2021 17:23:41 -0700 Subject: drm/i915/selftest: Better error reporting from hangcheck selftest There are many ways in which the hangcheck selftest can fail. Very few of them actually printed an error message to say what happened. So, fill in the missing messages. Signed-off-by: John Harrison Signed-off-by: Matthew Brost Reviewed-by: Matthew Brost Cc: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-27-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 93 ++++++++++++++++++++++------ 1 file changed, 75 insertions(+), 18 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 7aea10aa1fb4..f93ba4076b2b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -378,6 +378,7 @@ static int igt_reset_nop(void *arg) ce = intel_context_create(engine); if (IS_ERR(ce)) { err = PTR_ERR(ce); + pr_err("[%s] Create context failed: %d!\n", engine->name, err); break; } @@ -387,6 +388,8 @@ static int igt_reset_nop(void *arg) rq = intel_context_create_request(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); + pr_err("[%s] Create request failed: %d!\n", + engine->name, err); break; } @@ -401,24 +404,31 @@ static int igt_reset_nop(void *arg) igt_global_reset_unlock(gt); if (intel_gt_is_wedged(gt)) { + pr_err("[%s] GT is wedged!\n", engine->name); err = -EIO; break; } if (i915_reset_count(global) != reset_count + ++count) { - pr_err("Full GPU reset not recorded!\n"); + pr_err("[%s] Reset not recorded: %d vs %d + %d!\n", + engine->name, i915_reset_count(global), reset_count, count); err = -EINVAL; break; } err = igt_flush_test(gt->i915); - if (err) + if (err) { + pr_err("[%s] Flush failed: %d!\n", engine->name, err); break; + } } while (time_before(jiffies, end_time)); pr_info("%s: %d resets\n", __func__, count); - if (igt_flush_test(gt->i915)) + if (igt_flush_test(gt->i915)) { + pr_err("Post flush failed: %d!\n", err); err = -EIO; + } + return err; } @@ -441,8 +451,10 @@ static int igt_reset_nop_engine(void *arg) int err; ce = intel_context_create(engine); - if (IS_ERR(ce)) + if (IS_ERR(ce)) { + pr_err("[%s] Create context failed: %d!\n", engine->name, err); return PTR_ERR(ce); + } reset_count = i915_reset_count(global); reset_engine_count = i915_reset_engine_count(global, engine); @@ -550,8 +562,10 @@ static int igt_reset_fail_engine(void *arg) int err; ce = intel_context_create(engine); - if (IS_ERR(ce)) + if (IS_ERR(ce)) { + pr_err("[%s] Create context failed: %d!\n", engine->name, err); return PTR_ERR(ce); + } st_engine_heartbeat_disable(engine); set_bit(I915_RESET_ENGINE + id, >->reset.flags); @@ -711,6 +725,8 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); + pr_err("[%s] Create hang request failed: %d!\n", + engine->name, err); break; } @@ -765,12 +781,16 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) break; err = igt_flush_test(gt->i915); - if (err) + if (err) { + pr_err("[%s] Flush failed: %d!\n", engine->name, err); break; + } } - if (intel_gt_is_wedged(gt)) + if (intel_gt_is_wedged(gt)) { + pr_err("GT is wedged!\n"); err = -EIO; + } if (active) hang_fini(&h); @@ -837,6 +857,7 @@ static int active_engine(void *data) ce[count] = intel_context_create(engine); if (IS_ERR(ce[count])) { err = PTR_ERR(ce[count]); + pr_err("[%s] Create context #%ld failed: %d!\n", engine->name, count, err); while (--count) intel_context_put(ce[count]); return err; @@ -852,6 +873,7 @@ static int active_engine(void *data) new = intel_context_create_request(ce[idx]); if (IS_ERR(new)) { err = PTR_ERR(new); + pr_err("[%s] Create request #%d failed: %d!\n", engine->name, idx, err); break; } @@ -867,8 +889,10 @@ static int active_engine(void *data) } err = active_request_put(old); - if (err) + if (err) { + pr_err("[%s] Request put failed: %d!\n", engine->name, err); break; + } cond_resched(); } @@ -876,6 +900,9 @@ static int active_engine(void *data) for (count = 0; count < ARRAY_SIZE(rq); count++) { int err__ = active_request_put(rq[count]); + if (err) + pr_err("[%s] Request put #%ld failed: %d!\n", engine->name, count, err); + /* Keep the first error */ if (!err) err = err__; @@ -949,6 +976,7 @@ static int __igt_reset_engines(struct intel_gt *gt, "igt/%s", other->name); if (IS_ERR(tsk)) { err = PTR_ERR(tsk); + pr_err("[%s] Thread spawn failed: %d!\n", engine->name, err); goto unwind; } @@ -967,6 +995,8 @@ static int __igt_reset_engines(struct intel_gt *gt, rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); + pr_err("[%s] Create hang request failed: %d!\n", + engine->name, err); break; } @@ -998,11 +1028,10 @@ static int __igt_reset_engines(struct intel_gt *gt, if (rq) { if (rq->fence.error != -EIO) { - pr_err("i915_reset_engine(%s:%s):" - " failed to reset request %llx:%lld\n", + pr_err("i915_reset_engine(%s:%s): failed to reset request %lld:%lld [0x%04X]\n", engine->name, test_name, rq->fence.context, - rq->fence.seqno); + rq->fence.seqno, rq->context->guc_id); i915_request_put(rq); GEM_TRACE_DUMP(); @@ -1101,8 +1130,10 @@ unwind: break; err = igt_flush_test(gt->i915); - if (err) + if (err) { + pr_err("[%s] Flush failed: %d!\n", engine->name, err); break; + } } if (intel_gt_is_wedged(gt)) @@ -1180,12 +1211,15 @@ static int igt_reset_wait(void *arg) igt_global_reset_lock(gt); err = hang_init(&h, gt); - if (err) + if (err) { + pr_err("[%s] Hang init failed: %d!\n", engine->name, err); goto unlock; + } rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); + pr_err("[%s] Create hang request failed: %d!\n", engine->name, err); goto fini; } @@ -1310,12 +1344,15 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, /* Check that we can recover an unbind stuck on a hanging request */ err = hang_init(&h, gt); - if (err) + if (err) { + pr_err("[%s] Hang init failed: %d!\n", engine->name, err); return err; + } obj = i915_gem_object_create_internal(gt->i915, SZ_1M); if (IS_ERR(obj)) { err = PTR_ERR(obj); + pr_err("[%s] Create object failed: %d!\n", engine->name, err); goto fini; } @@ -1330,12 +1367,14 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, arg.vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(arg.vma)) { err = PTR_ERR(arg.vma); + pr_err("[%s] VMA instance failed: %d!\n", engine->name, err); goto out_obj; } rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); + pr_err("[%s] Create hang request failed: %d!\n", engine->name, err); goto out_obj; } @@ -1347,6 +1386,7 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, err = i915_vma_pin(arg.vma, 0, 0, pin_flags); if (err) { i915_request_add(rq); + pr_err("[%s] VMA pin failed: %d!\n", engine->name, err); goto out_obj; } @@ -1363,8 +1403,14 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, i915_vma_lock(arg.vma); err = i915_request_await_object(rq, arg.vma->obj, flags & EXEC_OBJECT_WRITE); - if (err == 0) + if (err == 0) { err = i915_vma_move_to_active(arg.vma, rq, flags); + if (err) + pr_err("[%s] Move to active failed: %d!\n", engine->name, err); + } else { + pr_err("[%s] Request await failed: %d!\n", engine->name, err); + } + i915_vma_unlock(arg.vma); if (flags & EXEC_OBJECT_NEEDS_FENCE) @@ -1392,6 +1438,7 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, tsk = kthread_run(fn, &arg, "igt/evict_vma"); if (IS_ERR(tsk)) { err = PTR_ERR(tsk); + pr_err("[%s] Thread spawn failed: %d!\n", engine->name, err); tsk = NULL; goto out_reset; } @@ -1518,6 +1565,7 @@ static int igt_reset_queue(void *arg) prev = hang_create_request(&h, engine); if (IS_ERR(prev)) { err = PTR_ERR(prev); + pr_err("[%s] Create 'prev' hang request failed: %d!\n", engine->name, err); goto fini; } @@ -1532,6 +1580,7 @@ static int igt_reset_queue(void *arg) rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); + pr_err("[%s] Create hang request failed: %d!\n", engine->name, err); goto fini; } @@ -1619,8 +1668,10 @@ static int igt_reset_queue(void *arg) i915_request_put(prev); err = igt_flush_test(gt->i915); - if (err) + if (err) { + pr_err("[%s] Flush failed: %d!\n", engine->name, err); break; + } } fini: @@ -1653,12 +1704,15 @@ static int igt_handle_error(void *arg) return 0; err = hang_init(&h, gt); - if (err) + if (err) { + pr_err("[%s] Hang init failed: %d!\n", engine->name, err); return err; + } rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); + pr_err("[%s] Create hang request failed: %d!\n", engine->name, err); goto err_fini; } @@ -1743,12 +1797,15 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine, return err; err = hang_init(&h, engine->gt); - if (err) + if (err) { + pr_err("[%s] Hang init failed: %d!\n", engine->name, err); return err; + } rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); + pr_err("[%s] Create hang request failed: %d!\n", engine->name, err); goto out; } -- cgit v1.2.3 From 3a4bfa091c46e90f7d68d219c36a86471b170cb8 Mon Sep 17 00:00:00 2001 From: Rahul Kumar Singh Date: Mon, 26 Jul 2021 17:23:42 -0700 Subject: drm/i915/selftest: Fix workarounds selftest for GuC submission When GuC submission is enabled, the GuC controls engine resets. Rather than explicitly triggering a reset, the driver must submit a hanging context to GuC and wait for the reset to occur. Signed-off-by: Rahul Kumar Singh Signed-off-by: John Harrison Signed-off-by: Matthew Brost Cc: Daniele Ceraolo Spurio Cc: Matthew Brost Reviewed-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-28-matthew.brost@intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 + drivers/gpu/drm/i915/gt/selftest_workarounds.c | 130 +++++++++++++++------ drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 3 + .../drm/i915/selftests/intel_scheduler_helpers.c | 75 ++++++++++++ .../drm/i915/selftests/intel_scheduler_helpers.h | 27 +++++ 6 files changed, 203 insertions(+), 34 deletions(-) create mode 100644 drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c create mode 100644 drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 10b3bb6207ba..ab7679957623 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -280,6 +280,7 @@ i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o i915-$(CONFIG_DRM_I915_SELFTEST) += \ gem/selftests/i915_gem_client_blt.o \ gem/selftests/igt_gem_utils.o \ + selftests/intel_scheduler_helpers.o \ selftests/i915_random.o \ selftests/i915_selftest.o \ selftests/igt_atomic.o \ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 260cce15cb62..ed91bcff20eb 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -443,6 +443,7 @@ struct intel_engine_cs { #define I915_ENGINE_IS_VIRTUAL BIT(5) #define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6) #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7) +#define I915_ENGINE_WANT_FORCED_PREEMPTION BIT(8) unsigned int flags; /* diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 7a38ce40feb2..ba7ee69414d5 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -12,6 +12,7 @@ #include "selftests/igt_flush_test.h" #include "selftests/igt_reset.h" #include "selftests/igt_spinner.h" +#include "selftests/intel_scheduler_helpers.h" #include "selftests/mock_drm.h" #include "gem/selftests/igt_gem_utils.h" @@ -261,28 +262,34 @@ static int do_engine_reset(struct intel_engine_cs *engine) return intel_engine_reset(engine, "live_workarounds"); } +static int do_guc_reset(struct intel_engine_cs *engine) +{ + /* Currently a no-op as the reset is handled by GuC */ + return 0; +} + static int switch_to_scratch_context(struct intel_engine_cs *engine, - struct igt_spinner *spin) + struct igt_spinner *spin, + struct i915_request **rq) { struct intel_context *ce; - struct i915_request *rq; int err = 0; ce = intel_context_create(engine); if (IS_ERR(ce)) return PTR_ERR(ce); - rq = igt_spinner_create_request(spin, ce, MI_NOOP); + *rq = igt_spinner_create_request(spin, ce, MI_NOOP); intel_context_put(ce); - if (IS_ERR(rq)) { + if (IS_ERR(*rq)) { spin = NULL; - err = PTR_ERR(rq); + err = PTR_ERR(*rq); goto err; } - err = request_add_spin(rq, spin); + err = request_add_spin(*rq, spin); err: if (err && spin) igt_spinner_end(spin); @@ -296,6 +303,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, { struct intel_context *ce, *tmp; struct igt_spinner spin; + struct i915_request *rq; intel_wakeref_t wakeref; int err; @@ -316,13 +324,24 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, goto out_spin; } - err = switch_to_scratch_context(engine, &spin); + err = switch_to_scratch_context(engine, &spin, &rq); if (err) goto out_spin; + /* Ensure the spinner hasn't aborted */ + if (i915_request_completed(rq)) { + pr_err("%s spinner failed to start\n", name); + err = -ETIMEDOUT; + goto out_spin; + } + with_intel_runtime_pm(engine->uncore->rpm, wakeref) err = reset(engine); + /* Ensure the reset happens and kills the engine */ + if (err == 0) + err = intel_selftest_wait_for_rq(rq); + igt_spinner_end(&spin); if (err) { @@ -787,9 +806,27 @@ static int live_reset_whitelist(void *arg) continue; if (intel_has_reset_engine(gt)) { - err = check_whitelist_across_reset(engine, - do_engine_reset, - "engine"); + if (intel_engine_uses_guc(engine)) { + struct intel_selftest_saved_policy saved; + int err2; + + err = intel_selftest_modify_policy(engine, &saved); + if (err) + goto out; + + err = check_whitelist_across_reset(engine, + do_guc_reset, + "guc"); + + err2 = intel_selftest_restore_policy(engine, &saved); + if (err == 0) + err = err2; + } else { + err = check_whitelist_across_reset(engine, + do_engine_reset, + "engine"); + } + if (err) goto out; } @@ -1235,31 +1272,40 @@ live_engine_reset_workarounds(void *arg) reference_lists_init(gt, lists); for_each_engine(engine, gt, id) { + struct intel_selftest_saved_policy saved; + bool using_guc = intel_engine_uses_guc(engine); bool ok; + int ret2; pr_info("Verifying after %s reset...\n", engine->name); + ret = intel_selftest_modify_policy(engine, &saved); + if (ret) + break; + ce = intel_context_create(engine); if (IS_ERR(ce)) { ret = PTR_ERR(ce); - break; + goto restore; } - ok = verify_wa_lists(gt, lists, "before reset"); - if (!ok) { - ret = -ESRCH; - goto err; - } + if (!using_guc) { + ok = verify_wa_lists(gt, lists, "before reset"); + if (!ok) { + ret = -ESRCH; + goto err; + } - ret = intel_engine_reset(engine, "live_workarounds:idle"); - if (ret) { - pr_err("%s: Reset failed while idle\n", engine->name); - goto err; - } + ret = intel_engine_reset(engine, "live_workarounds:idle"); + if (ret) { + pr_err("%s: Reset failed while idle\n", engine->name); + goto err; + } - ok = verify_wa_lists(gt, lists, "after idle reset"); - if (!ok) { - ret = -ESRCH; - goto err; + ok = verify_wa_lists(gt, lists, "after idle reset"); + if (!ok) { + ret = -ESRCH; + goto err; + } } ret = igt_spinner_init(&spin, engine->gt); @@ -1280,25 +1326,41 @@ live_engine_reset_workarounds(void *arg) goto err; } - ret = intel_engine_reset(engine, "live_workarounds:active"); - if (ret) { - pr_err("%s: Reset failed on an active spinner\n", - engine->name); - igt_spinner_fini(&spin); - goto err; + /* Ensure the spinner hasn't aborted */ + if (i915_request_completed(rq)) { + ret = -ETIMEDOUT; + goto skip; + } + + if (!using_guc) { + ret = intel_engine_reset(engine, "live_workarounds:active"); + if (ret) { + pr_err("%s: Reset failed on an active spinner\n", + engine->name); + igt_spinner_fini(&spin); + goto err; + } } + /* Ensure the reset happens and kills the engine */ + if (ret == 0) + ret = intel_selftest_wait_for_rq(rq); + +skip: igt_spinner_end(&spin); igt_spinner_fini(&spin); ok = verify_wa_lists(gt, lists, "after busy reset"); - if (!ok) { + if (!ok) ret = -ESRCH; - goto err; - } err: intel_context_put(ce); + +restore: + ret2 = intel_selftest_restore_policy(engine, &saved); + if (ret == 0) + ret = ret2; if (ret) break; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index ee4f1f996efa..3ff42d6e934f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1252,6 +1252,9 @@ static void guc_context_policy_init(struct intel_engine_cs *engine, { desc->policy_flags = 0; + if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) + desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE; + /* NB: For both of these, zero means disabled. */ desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c new file mode 100644 index 000000000000..5cdee1378e98 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +//#include "gt/intel_engine_user.h" +#include "gt/intel_gt.h" +#include "i915_drv.h" +#include "i915_selftest.h" + +#include "selftests/intel_scheduler_helpers.h" + +#define REDUCED_TIMESLICE 5 +#define REDUCED_PREEMPT 10 +#define WAIT_FOR_RESET_TIME 1000 + +int intel_selftest_modify_policy(struct intel_engine_cs *engine, + struct intel_selftest_saved_policy *saved) + +{ + int err; + + saved->reset = engine->i915->params.reset; + saved->flags = engine->flags; + saved->timeslice = engine->props.timeslice_duration_ms; + saved->preempt_timeout = engine->props.preempt_timeout_ms; + + /* + * Enable force pre-emption on time slice expiration + * together with engine reset on pre-emption timeout. + * This is required to make the GuC notice and reset + * the single hanging context. + * Also, reduce the preemption timeout to something + * small to speed the test up. + */ + engine->i915->params.reset = 2; + engine->flags |= I915_ENGINE_WANT_FORCED_PREEMPTION; + engine->props.timeslice_duration_ms = REDUCED_TIMESLICE; + engine->props.preempt_timeout_ms = REDUCED_PREEMPT; + + if (!intel_engine_uses_guc(engine)) + return 0; + + err = intel_guc_global_policies_update(&engine->gt->uc.guc); + if (err) + intel_selftest_restore_policy(engine, saved); + + return err; +} + +int intel_selftest_restore_policy(struct intel_engine_cs *engine, + struct intel_selftest_saved_policy *saved) +{ + /* Restore the original policies */ + engine->i915->params.reset = saved->reset; + engine->flags = saved->flags; + engine->props.timeslice_duration_ms = saved->timeslice; + engine->props.preempt_timeout_ms = saved->preempt_timeout; + + if (!intel_engine_uses_guc(engine)) + return 0; + + return intel_guc_global_policies_update(&engine->gt->uc.guc); +} + +int intel_selftest_wait_for_rq(struct i915_request *rq) +{ + long ret; + + ret = i915_request_wait(rq, 0, WAIT_FOR_RESET_TIME); + if (ret < 0) + return ret; + + return 0; +} diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h new file mode 100644 index 000000000000..79605b14bc33 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _INTEL_SELFTEST_SCHEDULER_HELPERS_H_ +#define _INTEL_SELFTEST_SCHEDULER_HELPERS_H_ + +#include + +struct i915_request; +struct intel_engine_cs; + +struct intel_selftest_saved_policy { + u32 flags; + u32 reset; + u64 timeslice; + u64 preempt_timeout; +}; + +int intel_selftest_modify_policy(struct intel_engine_cs *engine, + struct intel_selftest_saved_policy *saved); +int intel_selftest_restore_policy(struct intel_engine_cs *engine, + struct intel_selftest_saved_policy *saved); +int intel_selftest_wait_for_rq(struct i915_request *rq); + +#endif -- cgit v1.2.3 From 064a1f35bf1956130c84a4280a7ac75136fa2789 Mon Sep 17 00:00:00 2001 From: Rahul Kumar Singh Date: Mon, 26 Jul 2021 17:23:43 -0700 Subject: drm/i915/selftest: Fix MOCS selftest for GuC submission When GuC submission is enabled, the GuC controls engine resets. Rather than explicitly triggering a reset, the driver must submit a hanging context to GuC and wait for the reset to occur. Signed-off-by: Rahul Kumar Singh Signed-off-by: John Harrison Signed-off-by: Matthew Brost Cc: Daniele Ceraolo Spurio Cc: Matthew Brost Reviewed-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-29-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/selftest_mocs.c | 49 +++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c index 8763bbeca0f7..b7314739ee40 100644 --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c @@ -10,6 +10,7 @@ #include "gem/selftests/mock_context.h" #include "selftests/igt_reset.h" #include "selftests/igt_spinner.h" +#include "selftests/intel_scheduler_helpers.h" struct live_mocs { struct drm_i915_mocs_table table; @@ -318,7 +319,8 @@ static int live_mocs_clean(void *arg) } static int active_engine_reset(struct intel_context *ce, - const char *reason) + const char *reason, + bool using_guc) { struct igt_spinner spin; struct i915_request *rq; @@ -335,9 +337,13 @@ static int active_engine_reset(struct intel_context *ce, } err = request_add_spin(rq, &spin); - if (err == 0) + if (err == 0 && !using_guc) err = intel_engine_reset(ce->engine, reason); + /* Ensure the reset happens and kills the engine */ + if (err == 0) + err = intel_selftest_wait_for_rq(rq); + igt_spinner_end(&spin); igt_spinner_fini(&spin); @@ -345,21 +351,23 @@ static int active_engine_reset(struct intel_context *ce, } static int __live_mocs_reset(struct live_mocs *mocs, - struct intel_context *ce) + struct intel_context *ce, bool using_guc) { struct intel_gt *gt = ce->engine->gt; int err; if (intel_has_reset_engine(gt)) { - err = intel_engine_reset(ce->engine, "mocs"); - if (err) - return err; - - err = check_mocs_engine(mocs, ce); - if (err) - return err; + if (!using_guc) { + err = intel_engine_reset(ce->engine, "mocs"); + if (err) + return err; + + err = check_mocs_engine(mocs, ce); + if (err) + return err; + } - err = active_engine_reset(ce, "mocs"); + err = active_engine_reset(ce, "mocs", using_guc); if (err) return err; @@ -395,19 +403,32 @@ static int live_mocs_reset(void *arg) igt_global_reset_lock(gt); for_each_engine(engine, gt, id) { + bool using_guc = intel_engine_uses_guc(engine); + struct intel_selftest_saved_policy saved; struct intel_context *ce; + int err2; + + err = intel_selftest_modify_policy(engine, &saved); + if (err) + break; ce = mocs_context_create(engine); if (IS_ERR(ce)) { err = PTR_ERR(ce); - break; + goto restore; } intel_engine_pm_get(engine); - err = __live_mocs_reset(&mocs, ce); - intel_engine_pm_put(engine); + err = __live_mocs_reset(&mocs, ce, using_guc); + + intel_engine_pm_put(engine); intel_context_put(ce); + +restore: + err2 = intel_selftest_restore_policy(engine, &saved); + if (err == 0) + err = err2; if (err) break; } -- cgit v1.2.3 From 716c61c87556234570827b1d287f20691271a0b6 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Jul 2021 17:23:44 -0700 Subject: drm/i915/selftest: Increase some timeouts in live_requests Requests may take slightly longer with GuC submission, let's increase the timeouts in live_requests. Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-30-matthew.brost@intel.com --- drivers/gpu/drm/i915/selftests/i915_request.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index bd5c96a77ba3..d67710d10615 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -1313,7 +1313,7 @@ static int __live_parallel_engine1(void *arg) i915_request_add(rq); err = 0; - if (i915_request_wait(rq, 0, HZ / 5) < 0) + if (i915_request_wait(rq, 0, HZ) < 0) err = -ETIME; i915_request_put(rq); if (err) @@ -1419,7 +1419,7 @@ static int __live_parallel_spin(void *arg) } igt_spinner_end(&spin); - if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) + if (err == 0 && i915_request_wait(rq, 0, HZ) < 0) err = -EIO; i915_request_put(rq); -- cgit v1.2.3 From 617e87c05c72a88006b0604ce60cc4b105450016 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Mon, 26 Jul 2021 17:23:45 -0700 Subject: drm/i915/selftest: Fix hangcheck self test for GuC submission When GuC submission is enabled, the GuC controls engine resets. Rather than explicitly triggering a reset, the driver must submit a hanging context to GuC and wait for the reset to occur. Conversely, one of the tests specifically sends hanging batches to the engines but wants them to sit around until a manual reset of the full GT (including GuC itself). That means disabling GuC based engine resets to prevent those from killing the hanging batch too soon. So, add support to the scheduling policy helper for disabling resets as well as making them quicker! In GuC submission mode, the 'is engine idle' test basically turns into 'is engine PM wakelock held'. Independently, there is a heartbeat disable helper function that the tests use. For unexplained reasons, this acquires the engine wakelock before disabling the heartbeat and only releases it when re-enabling the heartbeat. As one of the tests tries to do a wait for idle in the middle of a heartbeat disabled section, it is therefore guaranteed to always fail. Added a 'no_pm' variant of the heartbeat helper that allows the engine to be asleep while also having heartbeats disabled. Signed-off-by: John Harrison Signed-off-by: Matthew Brost Reviewed-by: Matthew Brost Cc: Daniele Ceraolo Spurio Cc: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-31-matthew.brost@intel.com --- .../gpu/drm/i915/gt/selftest_engine_heartbeat.c | 22 ++ .../gpu/drm/i915/gt/selftest_engine_heartbeat.h | 2 + drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 227 ++++++++++++++++----- drivers/gpu/drm/i915/gt/selftest_mocs.c | 3 +- drivers/gpu/drm/i915/gt/selftest_workarounds.c | 6 +- .../drm/i915/selftests/intel_scheduler_helpers.c | 39 ++-- .../drm/i915/selftests/intel_scheduler_helpers.h | 8 +- 7 files changed, 236 insertions(+), 71 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index 4896e4ccad50..317eebf086c3 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -405,3 +405,25 @@ void st_engine_heartbeat_enable(struct intel_engine_cs *engine) engine->props.heartbeat_interval_ms = engine->defaults.heartbeat_interval_ms; } + +void st_engine_heartbeat_disable_no_pm(struct intel_engine_cs *engine) +{ + engine->props.heartbeat_interval_ms = 0; + + /* + * Park the heartbeat but without holding the PM lock as that + * makes the engines appear not-idle. Note that if/when unpark + * is called due to the PM lock being acquired later the + * heartbeat still won't be enabled because of the above = 0. + */ + if (intel_engine_pm_get_if_awake(engine)) { + intel_engine_park_heartbeat(engine); + intel_engine_pm_put(engine); + } +} + +void st_engine_heartbeat_enable_no_pm(struct intel_engine_cs *engine) +{ + engine->props.heartbeat_interval_ms = + engine->defaults.heartbeat_interval_ms; +} diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h index cd27113d5400..81da2cd8e406 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h @@ -9,6 +9,8 @@ struct intel_engine_cs; void st_engine_heartbeat_disable(struct intel_engine_cs *engine); +void st_engine_heartbeat_disable_no_pm(struct intel_engine_cs *engine); void st_engine_heartbeat_enable(struct intel_engine_cs *engine); +void st_engine_heartbeat_enable_no_pm(struct intel_engine_cs *engine); #endif /* SELFTEST_ENGINE_HEARTBEAT_H */ diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index f93ba4076b2b..e0e200ba77e9 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -17,6 +17,8 @@ #include "selftests/igt_flush_test.h" #include "selftests/igt_reset.h" #include "selftests/igt_atomic.h" +#include "selftests/igt_spinner.h" +#include "selftests/intel_scheduler_helpers.h" #include "selftests/mock_drm.h" @@ -450,6 +452,14 @@ static int igt_reset_nop_engine(void *arg) IGT_TIMEOUT(end_time); int err; + if (intel_engine_uses_guc(engine)) { + /* Engine level resets are triggered by GuC when a hang + * is detected. They can't be triggered by the KMD any + * more. Thus a nop batch cannot be used as a reset test + */ + continue; + } + ce = intel_context_create(engine); if (IS_ERR(ce)) { pr_err("[%s] Create context failed: %d!\n", engine->name, err); @@ -561,6 +571,10 @@ static int igt_reset_fail_engine(void *arg) IGT_TIMEOUT(end_time); int err; + /* Can't manually break the reset if i915 doesn't perform it */ + if (intel_engine_uses_guc(engine)) + continue; + ce = intel_context_create(engine); if (IS_ERR(ce)) { pr_err("[%s] Create context failed: %d!\n", engine->name, err); @@ -700,8 +714,12 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) for_each_engine(engine, gt, id) { unsigned int reset_count, reset_engine_count; unsigned long count; + bool using_guc = intel_engine_uses_guc(engine); IGT_TIMEOUT(end_time); + if (using_guc && !active) + continue; + if (active && !intel_engine_can_store_dword(engine)) continue; @@ -719,15 +737,24 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) set_bit(I915_RESET_ENGINE + id, >->reset.flags); count = 0; do { - if (active) { - struct i915_request *rq; + struct i915_request *rq = NULL; + struct intel_selftest_saved_policy saved; + int err2; + + err = intel_selftest_modify_policy(engine, &saved, + SELFTEST_SCHEDULER_MODIFY_FAST_RESET); + if (err) { + pr_err("[%s] Modify policy failed: %d!\n", engine->name, err); + break; + } + if (active) { rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); pr_err("[%s] Create hang request failed: %d!\n", engine->name, err); - break; + goto restore; } i915_request_get(rq); @@ -743,34 +770,59 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) i915_request_put(rq); err = -EIO; - break; + goto restore; } + } - i915_request_put(rq); + if (!using_guc) { + err = intel_engine_reset(engine, NULL); + if (err) { + pr_err("intel_engine_reset(%s) failed, err:%d\n", + engine->name, err); + goto skip; + } } - err = intel_engine_reset(engine, NULL); - if (err) { - pr_err("intel_engine_reset(%s) failed, err:%d\n", - engine->name, err); - break; + if (rq) { + /* Ensure the reset happens and kills the engine */ + err = intel_selftest_wait_for_rq(rq); + if (err) + pr_err("[%s] Wait for request %lld:%lld [0x%04X] failed: %d!\n", + engine->name, rq->fence.context, + rq->fence.seqno, rq->context->guc_id, err); } +skip: + if (rq) + i915_request_put(rq); + if (i915_reset_count(global) != reset_count) { pr_err("Full GPU reset recorded! (engine reset expected)\n"); err = -EINVAL; - break; + goto restore; } - if (i915_reset_engine_count(global, engine) != - ++reset_engine_count) { - pr_err("%s engine reset not recorded!\n", - engine->name); - err = -EINVAL; - break; + /* GuC based resets are not logged per engine */ + if (!using_guc) { + if (i915_reset_engine_count(global, engine) != + ++reset_engine_count) { + pr_err("%s engine reset not recorded!\n", + engine->name); + err = -EINVAL; + goto restore; + } } count++; + +restore: + err2 = intel_selftest_restore_policy(engine, &saved); + if (err2) + pr_err("[%s] Restore policy failed: %d!\n", engine->name, err); + if (err == 0) + err = err2; + if (err) + break; } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); st_engine_heartbeat_enable(engine); @@ -943,10 +995,13 @@ static int __igt_reset_engines(struct intel_gt *gt, struct active_engine threads[I915_NUM_ENGINES] = {}; unsigned long device = i915_reset_count(global); unsigned long count = 0, reported; + bool using_guc = intel_engine_uses_guc(engine); IGT_TIMEOUT(end_time); - if (flags & TEST_ACTIVE && - !intel_engine_can_store_dword(engine)) + if (flags & TEST_ACTIVE) { + if (!intel_engine_can_store_dword(engine)) + continue; + } else if (using_guc) continue; if (!wait_for_idle(engine)) { @@ -986,10 +1041,19 @@ static int __igt_reset_engines(struct intel_gt *gt, yield(); /* start all threads before we begin */ - st_engine_heartbeat_disable(engine); + st_engine_heartbeat_disable_no_pm(engine); set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { struct i915_request *rq = NULL; + struct intel_selftest_saved_policy saved; + int err2; + + err = intel_selftest_modify_policy(engine, &saved, + SELFTEST_SCHEDULER_MODIFY_FAST_RESET); + if (err) { + pr_err("[%s] Modify policy failed: %d!\n", engine->name, err); + break; + } if (flags & TEST_ACTIVE) { rq = hang_create_request(&h, engine); @@ -997,7 +1061,7 @@ static int __igt_reset_engines(struct intel_gt *gt, err = PTR_ERR(rq); pr_err("[%s] Create hang request failed: %d!\n", engine->name, err); - break; + goto restore; } i915_request_get(rq); @@ -1013,15 +1077,28 @@ static int __igt_reset_engines(struct intel_gt *gt, i915_request_put(rq); err = -EIO; - break; + goto restore; } + } else { + intel_engine_pm_get(engine); } - err = intel_engine_reset(engine, NULL); - if (err) { - pr_err("i915_reset_engine(%s:%s): failed, err=%d\n", - engine->name, test_name, err); - break; + if (!using_guc) { + err = intel_engine_reset(engine, NULL); + if (err) { + pr_err("i915_reset_engine(%s:%s): failed, err=%d\n", + engine->name, test_name, err); + goto restore; + } + } + + if (rq) { + /* Ensure the reset happens and kills the engine */ + err = intel_selftest_wait_for_rq(rq); + if (err) + pr_err("[%s] Wait for request %lld:%lld [0x%04X] failed: %d!\n", + engine->name, rq->fence.context, + rq->fence.seqno, rq->context->guc_id, err); } count++; @@ -1037,7 +1114,7 @@ static int __igt_reset_engines(struct intel_gt *gt, GEM_TRACE_DUMP(); intel_gt_set_wedged(gt); err = -EIO; - break; + goto restore; } if (i915_request_wait(rq, 0, HZ / 5) < 0) { @@ -1056,12 +1133,15 @@ static int __igt_reset_engines(struct intel_gt *gt, GEM_TRACE_DUMP(); intel_gt_set_wedged(gt); err = -EIO; - break; + goto restore; } i915_request_put(rq); } + if (!(flags & TEST_ACTIVE)) + intel_engine_pm_put(engine); + if (!(flags & TEST_SELF) && !wait_for_idle(engine)) { struct drm_printer p = drm_info_printer(gt->i915->drm.dev); @@ -1073,22 +1153,34 @@ static int __igt_reset_engines(struct intel_gt *gt, "%s\n", engine->name); err = -EIO; - break; + goto restore; } + +restore: + err2 = intel_selftest_restore_policy(engine, &saved); + if (err2) + pr_err("[%s] Restore policy failed: %d!\n", engine->name, err2); + if (err == 0) + err = err2; + if (err) + break; } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); - st_engine_heartbeat_enable(engine); + st_engine_heartbeat_enable_no_pm(engine); pr_info("i915_reset_engine(%s:%s): %lu resets\n", engine->name, test_name, count); - reported = i915_reset_engine_count(global, engine); - reported -= threads[engine->id].resets; - if (reported != count) { - pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n", - engine->name, test_name, count, reported); - if (!err) - err = -EINVAL; + /* GuC based resets are not logged per engine */ + if (!using_guc) { + reported = i915_reset_engine_count(global, engine); + reported -= threads[engine->id].resets; + if (reported != count) { + pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n", + engine->name, test_name, count, reported); + if (!err) + err = -EINVAL; + } } unwind: @@ -1107,15 +1199,18 @@ unwind: } put_task_struct(threads[tmp].task); - if (other->uabi_class != engine->uabi_class && - threads[tmp].resets != - i915_reset_engine_count(global, other)) { - pr_err("Innocent engine %s was reset (count=%ld)\n", - other->name, - i915_reset_engine_count(global, other) - - threads[tmp].resets); - if (!err) - err = -EINVAL; + /* GuC based resets are not logged per engine */ + if (!using_guc) { + if (other->uabi_class != engine->uabi_class && + threads[tmp].resets != + i915_reset_engine_count(global, other)) { + pr_err("Innocent engine %s was reset (count=%ld)\n", + other->name, + i915_reset_engine_count(global, other) - + threads[tmp].resets); + if (!err) + err = -EINVAL; + } } } @@ -1555,18 +1650,29 @@ static int igt_reset_queue(void *arg) goto unlock; for_each_engine(engine, gt, id) { + struct intel_selftest_saved_policy saved; struct i915_request *prev; IGT_TIMEOUT(end_time); unsigned int count; + bool using_guc = intel_engine_uses_guc(engine); if (!intel_engine_can_store_dword(engine)) continue; + if (using_guc) { + err = intel_selftest_modify_policy(engine, &saved, + SELFTEST_SCHEDULER_MODIFY_NO_HANGCHECK); + if (err) { + pr_err("[%s] Modify policy failed: %d!\n", engine->name, err); + goto fini; + } + } + prev = hang_create_request(&h, engine); if (IS_ERR(prev)) { err = PTR_ERR(prev); pr_err("[%s] Create 'prev' hang request failed: %d!\n", engine->name, err); - goto fini; + goto restore; } i915_request_get(prev); @@ -1581,7 +1687,7 @@ static int igt_reset_queue(void *arg) if (IS_ERR(rq)) { err = PTR_ERR(rq); pr_err("[%s] Create hang request failed: %d!\n", engine->name, err); - goto fini; + goto restore; } i915_request_get(rq); @@ -1606,7 +1712,7 @@ static int igt_reset_queue(void *arg) GEM_TRACE_DUMP(); intel_gt_set_wedged(gt); - goto fini; + goto restore; } if (!wait_until_running(&h, prev)) { @@ -1624,7 +1730,7 @@ static int igt_reset_queue(void *arg) intel_gt_set_wedged(gt); err = -EIO; - goto fini; + goto restore; } reset_count = fake_hangcheck(gt, BIT(id)); @@ -1635,7 +1741,7 @@ static int igt_reset_queue(void *arg) i915_request_put(rq); i915_request_put(prev); err = -EINVAL; - goto fini; + goto restore; } if (rq->fence.error) { @@ -1644,7 +1750,7 @@ static int igt_reset_queue(void *arg) i915_request_put(rq); i915_request_put(prev); err = -EINVAL; - goto fini; + goto restore; } if (i915_reset_count(global) == reset_count) { @@ -1652,7 +1758,7 @@ static int igt_reset_queue(void *arg) i915_request_put(rq); i915_request_put(prev); err = -EINVAL; - goto fini; + goto restore; } i915_request_put(prev); @@ -1667,6 +1773,19 @@ static int igt_reset_queue(void *arg) i915_request_put(prev); +restore: + if (using_guc) { + int err2 = intel_selftest_restore_policy(engine, &saved); + + if (err2) + pr_err("%s:%d> [%s] Restore policy failed: %d!\n", + __func__, __LINE__, engine->name, err2); + if (err == 0) + err = err2; + } + if (err) + goto fini; + err = igt_flush_test(gt->i915); if (err) { pr_err("[%s] Flush failed: %d!\n", engine->name, err); diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c index b7314739ee40..13d25bf2a94a 100644 --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c @@ -408,7 +408,8 @@ static int live_mocs_reset(void *arg) struct intel_context *ce; int err2; - err = intel_selftest_modify_policy(engine, &saved); + err = intel_selftest_modify_policy(engine, &saved, + SELFTEST_SCHEDULER_MODIFY_FAST_RESET); if (err) break; diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index ba7ee69414d5..e623ac45f4aa 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -810,7 +810,8 @@ static int live_reset_whitelist(void *arg) struct intel_selftest_saved_policy saved; int err2; - err = intel_selftest_modify_policy(engine, &saved); + err = intel_selftest_modify_policy(engine, &saved, + SELFTEST_SCHEDULER_MODIFY_FAST_RESET); if (err) goto out; @@ -1278,7 +1279,8 @@ live_engine_reset_workarounds(void *arg) int ret2; pr_info("Verifying after %s reset...\n", engine->name); - ret = intel_selftest_modify_policy(engine, &saved); + ret = intel_selftest_modify_policy(engine, &saved, + SELFTEST_SCHEDULER_MODIFY_FAST_RESET); if (ret) break; diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c index 5cdee1378e98..dac275e835c4 100644 --- a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c +++ b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c @@ -15,7 +15,8 @@ #define WAIT_FOR_RESET_TIME 1000 int intel_selftest_modify_policy(struct intel_engine_cs *engine, - struct intel_selftest_saved_policy *saved) + struct intel_selftest_saved_policy *saved, + u32 modify_type) { int err; @@ -25,18 +26,30 @@ int intel_selftest_modify_policy(struct intel_engine_cs *engine, saved->timeslice = engine->props.timeslice_duration_ms; saved->preempt_timeout = engine->props.preempt_timeout_ms; - /* - * Enable force pre-emption on time slice expiration - * together with engine reset on pre-emption timeout. - * This is required to make the GuC notice and reset - * the single hanging context. - * Also, reduce the preemption timeout to something - * small to speed the test up. - */ - engine->i915->params.reset = 2; - engine->flags |= I915_ENGINE_WANT_FORCED_PREEMPTION; - engine->props.timeslice_duration_ms = REDUCED_TIMESLICE; - engine->props.preempt_timeout_ms = REDUCED_PREEMPT; + switch (modify_type) { + case SELFTEST_SCHEDULER_MODIFY_FAST_RESET: + /* + * Enable force pre-emption on time slice expiration + * together with engine reset on pre-emption timeout. + * This is required to make the GuC notice and reset + * the single hanging context. + * Also, reduce the preemption timeout to something + * small to speed the test up. + */ + engine->i915->params.reset = 2; + engine->flags |= I915_ENGINE_WANT_FORCED_PREEMPTION; + engine->props.timeslice_duration_ms = REDUCED_TIMESLICE; + engine->props.preempt_timeout_ms = REDUCED_PREEMPT; + break; + + case SELFTEST_SCHEDULER_MODIFY_NO_HANGCHECK: + engine->props.preempt_timeout_ms = 0; + break; + + default: + pr_err("Invalid scheduler policy modification type: %d!\n", modify_type); + return -EINVAL; + } if (!intel_engine_uses_guc(engine)) return 0; diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h index 79605b14bc33..35c098601ac0 100644 --- a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h +++ b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h @@ -18,8 +18,14 @@ struct intel_selftest_saved_policy { u64 preempt_timeout; }; +enum selftest_scheduler_modify { + SELFTEST_SCHEDULER_MODIFY_NO_HANGCHECK = 0, + SELFTEST_SCHEDULER_MODIFY_FAST_RESET, +}; + int intel_selftest_modify_policy(struct intel_engine_cs *engine, - struct intel_selftest_saved_policy *saved); + struct intel_selftest_saved_policy *saved, + enum selftest_scheduler_modify modify_type); int intel_selftest_restore_policy(struct intel_engine_cs *engine, struct intel_selftest_saved_policy *saved); int intel_selftest_wait_for_rq(struct i915_request *rq); -- cgit v1.2.3 From 3a7b72665ea5dcfa24efe857dbb88fe135ae21a6 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Mon, 26 Jul 2021 17:23:46 -0700 Subject: drm/i915/selftest: Bump selftest timeouts for hangcheck Some testing environments and some heavier tests are slower than previous limits allowed for. For example, it can take multiple seconds for the 'context has been reset' notification handler to reach the 'kill the requests' code in the 'active' version of the 'reset engines' test. During which time the selftest gets bored, gives up waiting and fails the test. There is also an async thread that the selftest uses to pump work through the hardware in parallel to the context that is marked for reset. That also could get bored waiting for completions and kill the test off. Lastly, the flush at the of various test sections can also see timeouts due to the large amount of work backed up. This is also true of the live_hwsp_read test. Signed-off-by: John Harrison Signed-off-by: Matthew Brost Cc: Daniele Ceraolo Spurio Reviewed-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-32-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 2 +- drivers/gpu/drm/i915/selftests/igt_flush_test.c | 2 +- drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index e0e200ba77e9..08f011f893b2 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -879,7 +879,7 @@ static int active_request_put(struct i915_request *rq) if (!rq) return 0; - if (i915_request_wait(rq, 0, 5 * HZ) < 0) { + if (i915_request_wait(rq, 0, 10 * HZ) < 0) { GEM_TRACE("%s timed out waiting for completion of fence %llx:%lld\n", rq->engine->name, rq->fence.context, diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c index 7b0939e3f007..a6c71fca61aa 100644 --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c @@ -19,7 +19,7 @@ int igt_flush_test(struct drm_i915_private *i915) cond_resched(); - if (intel_gt_wait_for_idle(gt, HZ / 5) == -ETIME) { + if (intel_gt_wait_for_idle(gt, HZ) == -ETIME) { pr_err("%pS timed out, cancelling all further testing.\n", __builtin_return_address(0)); diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c index dac275e835c4..4b328346b48a 100644 --- a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c +++ b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c @@ -12,7 +12,7 @@ #define REDUCED_TIMESLICE 5 #define REDUCED_PREEMPT 10 -#define WAIT_FOR_RESET_TIME 1000 +#define WAIT_FOR_RESET_TIME 10000 int intel_selftest_modify_policy(struct intel_engine_cs *engine, struct intel_selftest_saved_policy *saved, -- cgit v1.2.3 From ee242ca704d386991d7ece0c46134e211d52412b Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Jul 2021 17:23:47 -0700 Subject: drm/i915/guc: Implement GuC priority management Implement a simple static mapping algorithm of the i915 priority levels (int, -1k to 1k exposed to user) to the 4 GuC levels. Mapping is as follows: i915 level < 0 -> GuC low level (3) i915 level == 0 -> GuC normal level (2) i915 level < INT_MAX -> GuC high level (1) i915 level == INT_MAX -> GuC highest level (0) We believe this mapping should cover the UMD use cases (3 distinct user levels + 1 kernel level). In addition to static mapping, a simple counter system is attached to each context tracking the number of requests inflight on the context at each level. This is needed as the GuC levels are per context while in the i915 levels are per request. v2: (Daniele) - Add BUILD_BUG_ON to enforce ordering of priority levels - Add missing lockdep to guc_prio_fini - Check for return before setting context registered flag - Map DISPLAY priority or higher to highest guc prio - Update comment for guc_prio Signed-off-by: Matthew Brost Cc: Daniele Ceraolo Spurio Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-33-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 3 + drivers/gpu/drm/i915/gt/intel_context_types.h | 9 +- drivers/gpu/drm/i915/gt/intel_engine_user.c | 4 + drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 212 +++++++++++++++++++++- drivers/gpu/drm/i915/i915_request.c | 5 + drivers/gpu/drm/i915/i915_request.h | 9 + drivers/gpu/drm/i915/i915_scheduler.c | 7 + drivers/gpu/drm/i915/i915_scheduler_types.h | 12 ++ drivers/gpu/drm/i915/i915_trace.h | 17 +- include/uapi/drm/i915_drm.h | 9 + 10 files changed, 282 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index 2007dc6f6b99..209cf265bf74 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -245,6 +245,9 @@ static void signal_irq_work(struct irq_work *work) llist_entry(signal, typeof(*rq), signal_node); struct list_head cb_list; + if (rq->engine->sched_engine->retire_inflight_request_prio) + rq->engine->sched_engine->retire_inflight_request_prio(rq); + spin_lock(&rq->lock); list_replace(&rq->fence.cb_list, &cb_list); __dma_fence_signal__timestamp(&rq->fence, timestamp); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index a5bc876face7..e54351a170e2 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -18,8 +18,9 @@ #include "intel_engine_types.h" #include "intel_sseu.h" -#define CONTEXT_REDZONE POISON_INUSE +#include "uc/intel_guc_fwif.h" +#define CONTEXT_REDZONE POISON_INUSE DECLARE_EWMA(runtime, 3, 8); struct i915_gem_context; @@ -191,6 +192,12 @@ struct intel_context { /* GuC context blocked fence */ struct i915_sw_fence guc_blocked; + + /* + * GuC priority management + */ + u8 guc_prio; + u32 guc_prio_count[GUC_CLIENT_PRIORITY_NUM]; }; #endif /* __INTEL_CONTEXT_TYPES__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index 84142127ebd8..8f8bea08e734 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -11,6 +11,7 @@ #include "intel_engine.h" #include "intel_engine_user.h" #include "intel_gt.h" +#include "uc/intel_guc_submission.h" struct intel_engine_cs * intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance) @@ -115,6 +116,9 @@ static void set_scheduler_caps(struct drm_i915_private *i915) disabled |= (I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY); + if (intel_uc_uses_guc_submission(&i915->gt.uc)) + enabled |= I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP; + for (i = 0; i < ARRAY_SIZE(map); i++) { if (engine->flags & BIT(map[i].engine)) enabled |= BIT(map[i].sched); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 3ff42d6e934f..b760cbf6ca0e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -81,6 +81,7 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count); */ #define SCHED_STATE_NO_LOCK_ENABLED BIT(0) #define SCHED_STATE_NO_LOCK_PENDING_ENABLE BIT(1) +#define SCHED_STATE_NO_LOCK_REGISTERED BIT(2) static inline bool context_enabled(struct intel_context *ce) { return (atomic_read(&ce->guc_sched_state_no_lock) & @@ -116,6 +117,24 @@ static inline void clr_context_pending_enable(struct intel_context *ce) &ce->guc_sched_state_no_lock); } +static inline bool context_registered(struct intel_context *ce) +{ + return (atomic_read(&ce->guc_sched_state_no_lock) & + SCHED_STATE_NO_LOCK_REGISTERED); +} + +static inline void set_context_registered(struct intel_context *ce) +{ + atomic_or(SCHED_STATE_NO_LOCK_REGISTERED, + &ce->guc_sched_state_no_lock); +} + +static inline void clr_context_registered(struct intel_context *ce) +{ + atomic_and((u32)~SCHED_STATE_NO_LOCK_REGISTERED, + &ce->guc_sched_state_no_lock); +} + /* * Below is a set of functions which control the GuC scheduling state which * require a lock, aside from the special case where the functions are called @@ -1092,6 +1111,7 @@ static int steal_guc_id(struct intel_guc *guc) list_del_init(&ce->guc_id_link); guc_id = ce->guc_id; + clr_context_registered(ce); set_context_guc_id_invalid(ce); return guc_id; } else { @@ -1201,10 +1221,15 @@ static int register_context(struct intel_context *ce, bool loop) struct intel_guc *guc = ce_to_guc(ce); u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) + ce->guc_id * sizeof(struct guc_lrc_desc); + int ret; trace_intel_context_register(ce); - return __guc_action_register_context(guc, ce->guc_id, offset, loop); + ret = __guc_action_register_context(guc, ce->guc_id, offset, loop); + if (likely(!ret)) + set_context_registered(ce); + + return ret; } static int __guc_action_deregister_context(struct intel_guc *guc, @@ -1260,6 +1285,8 @@ static void guc_context_policy_init(struct intel_engine_cs *engine, desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; } +static inline u8 map_i915_prio_to_guc_prio(int prio); + static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) { struct intel_engine_cs *engine = ce->engine; @@ -1267,6 +1294,8 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) struct intel_guc *guc = &engine->gt->uc.guc; u32 desc_idx = ce->guc_id; struct guc_lrc_desc *desc; + const struct i915_gem_context *ctx; + int prio = I915_CONTEXT_DEFAULT_PRIORITY; bool context_registered; intel_wakeref_t wakeref; int ret = 0; @@ -1282,6 +1311,12 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) context_registered = lrc_desc_registered(guc, desc_idx); + rcu_read_lock(); + ctx = rcu_dereference(ce->gem_context); + if (ctx) + prio = ctx->sched.priority; + rcu_read_unlock(); + reset_lrc_desc(guc, desc_idx); set_lrc_desc_registered(guc, desc_idx, ce); @@ -1290,7 +1325,8 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) desc->engine_submit_mask = adjust_engine_mask(engine->class, engine->mask); desc->hw_context_desc = ce->lrc.lrca; - desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL; + ce->guc_prio = map_i915_prio_to_guc_prio(prio); + desc->priority = ce->guc_prio; desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; guc_context_policy_init(engine, desc); init_sched_state(ce); @@ -1693,11 +1729,17 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce) GEM_BUG_ON(ce != __get_context(guc, ce->guc_id)); GEM_BUG_ON(context_enabled(ce)); + clr_context_registered(ce); deregister_context(ce, ce->guc_id, true); } static void __guc_context_destroy(struct intel_context *ce) { + GEM_BUG_ON(ce->guc_prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || + ce->guc_prio_count[GUC_CLIENT_PRIORITY_HIGH] || + ce->guc_prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || + ce->guc_prio_count[GUC_CLIENT_PRIORITY_NORMAL]); + lrc_fini(ce); intel_context_fini(ce); @@ -1791,15 +1833,124 @@ static int guc_context_alloc(struct intel_context *ce) return lrc_alloc(ce, ce->engine); } +static void guc_context_set_prio(struct intel_guc *guc, + struct intel_context *ce, + u8 prio) +{ + u32 action[] = { + INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY, + ce->guc_id, + prio, + }; + + GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || + prio > GUC_CLIENT_PRIORITY_NORMAL); + + if (ce->guc_prio == prio || submission_disabled(guc) || + !context_registered(ce)) + return; + + guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); + + ce->guc_prio = prio; + trace_intel_context_set_prio(ce); +} + +static inline u8 map_i915_prio_to_guc_prio(int prio) +{ + if (prio == I915_PRIORITY_NORMAL) + return GUC_CLIENT_PRIORITY_KMD_NORMAL; + else if (prio < I915_PRIORITY_NORMAL) + return GUC_CLIENT_PRIORITY_NORMAL; + else if (prio < I915_PRIORITY_DISPLAY) + return GUC_CLIENT_PRIORITY_HIGH; + else + return GUC_CLIENT_PRIORITY_KMD_HIGH; +} + +static inline void add_context_inflight_prio(struct intel_context *ce, + u8 guc_prio) +{ + lockdep_assert_held(&ce->guc_active.lock); + GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_prio_count)); + + ++ce->guc_prio_count[guc_prio]; + + /* Overflow protection */ + GEM_WARN_ON(!ce->guc_prio_count[guc_prio]); +} + +static inline void sub_context_inflight_prio(struct intel_context *ce, + u8 guc_prio) +{ + lockdep_assert_held(&ce->guc_active.lock); + GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_prio_count)); + + /* Underflow protection */ + GEM_WARN_ON(!ce->guc_prio_count[guc_prio]); + + --ce->guc_prio_count[guc_prio]; +} + +static inline void update_context_prio(struct intel_context *ce) +{ + struct intel_guc *guc = &ce->engine->gt->uc.guc; + int i; + + BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0); + BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL); + + lockdep_assert_held(&ce->guc_active.lock); + + for (i = 0; i < ARRAY_SIZE(ce->guc_prio_count); ++i) { + if (ce->guc_prio_count[i]) { + guc_context_set_prio(guc, ce, i); + break; + } + } +} + +static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio) +{ + /* Lower value is higher priority */ + return new_guc_prio < old_guc_prio; +} + static void add_to_context(struct i915_request *rq) { struct intel_context *ce = rq->context; + u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq)); + + GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI); spin_lock(&ce->guc_active.lock); list_move_tail(&rq->sched.link, &ce->guc_active.requests); + + if (rq->guc_prio == GUC_PRIO_INIT) { + rq->guc_prio = new_guc_prio; + add_context_inflight_prio(ce, rq->guc_prio); + } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) { + sub_context_inflight_prio(ce, rq->guc_prio); + rq->guc_prio = new_guc_prio; + add_context_inflight_prio(ce, rq->guc_prio); + } + update_context_prio(ce); + spin_unlock(&ce->guc_active.lock); } +static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_active.lock); + + if (rq->guc_prio != GUC_PRIO_INIT && + rq->guc_prio != GUC_PRIO_FINI) { + sub_context_inflight_prio(ce, rq->guc_prio); + update_context_prio(ce); + } + rq->guc_prio = GUC_PRIO_FINI; +} + static void remove_from_context(struct i915_request *rq) { struct intel_context *ce = rq->context; @@ -1812,6 +1963,8 @@ static void remove_from_context(struct i915_request *rq) /* Prevent further __await_execution() registering a cb, then flush */ set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); + guc_prio_fini(rq, ce); + spin_unlock_irq(&ce->guc_active.lock); atomic_dec(&ce->guc_id_ref); @@ -2093,6 +2246,39 @@ static void guc_init_breadcrumbs(struct intel_engine_cs *engine) } } +static void guc_bump_inflight_request_prio(struct i915_request *rq, + int prio) +{ + struct intel_context *ce = rq->context; + u8 new_guc_prio = map_i915_prio_to_guc_prio(prio); + + /* Short circuit function */ + if (prio < I915_PRIORITY_NORMAL || + rq->guc_prio == GUC_PRIO_FINI || + (rq->guc_prio != GUC_PRIO_INIT && + !new_guc_prio_higher(rq->guc_prio, new_guc_prio))) + return; + + spin_lock(&ce->guc_active.lock); + if (rq->guc_prio != GUC_PRIO_FINI) { + if (rq->guc_prio != GUC_PRIO_INIT) + sub_context_inflight_prio(ce, rq->guc_prio); + rq->guc_prio = new_guc_prio; + add_context_inflight_prio(ce, rq->guc_prio); + update_context_prio(ce); + } + spin_unlock(&ce->guc_active.lock); +} + +static void guc_retire_inflight_request_prio(struct i915_request *rq) +{ + struct intel_context *ce = rq->context; + + spin_lock(&ce->guc_active.lock); + guc_prio_fini(rq, ce); + spin_unlock(&ce->guc_active.lock); +} + static void sanitize_hwsp(struct intel_engine_cs *engine) { struct intel_timeline *tl; @@ -2317,6 +2503,10 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine) guc->sched_engine->disabled = guc_sched_engine_disabled; guc->sched_engine->private_data = guc; guc->sched_engine->destroy = guc_sched_engine_destroy; + guc->sched_engine->bump_inflight_request_prio = + guc_bump_inflight_request_prio; + guc->sched_engine->retire_inflight_request_prio = + guc_retire_inflight_request_prio; tasklet_setup(&guc->sched_engine->tasklet, guc_submission_tasklet); } @@ -2694,6 +2884,22 @@ void intel_guc_submission_print_info(struct intel_guc *guc, drm_printf(p, "\n"); } +static inline void guc_log_context_priority(struct drm_printer *p, + struct intel_context *ce) +{ + int i; + + drm_printf(p, "\t\tPriority: %d\n", + ce->guc_prio); + drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n"); + for (i = GUC_CLIENT_PRIORITY_KMD_HIGH; + i < GUC_CLIENT_PRIORITY_NUM; ++i) { + drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n", + i, ce->guc_prio_count[i]); + } + drm_printf(p, "\n"); +} + void intel_guc_submission_print_context_info(struct intel_guc *guc, struct drm_printer *p) { @@ -2716,6 +2922,8 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, drm_printf(p, "\t\tSchedule State: 0x%x, 0x%x\n\n", ce->guc_state.sched_state, atomic_read(&ce->guc_sched_state_no_lock)); + + guc_log_context_priority(p, ce); } } diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 541a20371502..1f1d4a6a0eff 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -114,6 +114,9 @@ static void i915_fence_release(struct dma_fence *fence) { struct i915_request *rq = to_request(fence); + GEM_BUG_ON(rq->guc_prio != GUC_PRIO_INIT && + rq->guc_prio != GUC_PRIO_FINI); + /* * The request is put onto a RCU freelist (i.e. the address * is immediately reused), mark the fences as being freed now. @@ -924,6 +927,8 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ + rq->guc_prio = GUC_PRIO_INIT; + /* We bump the ref for the fence chain */ i915_sw_fence_reinit(&i915_request_get(rq)->submit); i915_sw_fence_reinit(&i915_request_get(rq)->semaphore); diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index ac0e3326c067..e6a0e0ebc9aa 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -293,6 +293,15 @@ struct i915_request { */ struct list_head guc_fence_link; + /** + * Priority level while the request is inflight. Differs from i915 + * scheduler priority. See comment above + * I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP for details. + */ +#define GUC_PRIO_INIT 0xff +#define GUC_PRIO_FINI 0xfe + u8 guc_prio; + I915_SELFTEST_DECLARE(struct { struct list_head link; unsigned long delay; diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 28dd887eb1be..17843c204356 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -241,6 +241,9 @@ static void __i915_schedule(struct i915_sched_node *node, /* Fifo and depth-first replacement ensure our deps execute before us */ sched_engine = lock_sched_engine(node, sched_engine, &cache); list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { + struct i915_request *from = container_of(dep->signaler, + struct i915_request, + sched); INIT_LIST_HEAD(&dep->dfs_link); node = dep->signaler; @@ -254,6 +257,10 @@ static void __i915_schedule(struct i915_sched_node *node, GEM_BUG_ON(node_to_request(node)->engine->sched_engine != sched_engine); + /* Must be called before changing the nodes priority */ + if (sched_engine->bump_inflight_request_prio) + sched_engine->bump_inflight_request_prio(from, prio); + WRITE_ONCE(node->attr.priority, prio); /* diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index eaef233e9080..b0a1b58c7893 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -179,6 +179,18 @@ struct i915_sched_engine { void (*kick_backend)(const struct i915_request *rq, int prio); + /** + * @bump_inflight_request_prio: update priority of an inflight request + */ + void (*bump_inflight_request_prio)(struct i915_request *rq, + int prio); + + /** + * @retire_inflight_request_prio: indicate request is retired to + * priority tracking + */ + void (*retire_inflight_request_prio)(struct i915_request *rq); + /** * @schedule: adjust priority of request * diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 9613a7c19661..806ad688274b 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -904,6 +904,7 @@ DECLARE_EVENT_CLASS(intel_context, __field(int, pin_count) __field(u32, sched_state) __field(u32, guc_sched_state_no_lock) + __field(u8, guc_prio) ), TP_fast_assign( @@ -912,12 +913,19 @@ DECLARE_EVENT_CLASS(intel_context, __entry->sched_state = ce->guc_state.sched_state; __entry->guc_sched_state_no_lock = atomic_read(&ce->guc_sched_state_no_lock); + __entry->guc_prio = ce->guc_prio; ), - TP_printk("guc_id=%d, pin_count=%d sched_state=0x%x,0x%x", + TP_printk("guc_id=%d, pin_count=%d sched_state=0x%x,0x%x, guc_prio=%u", __entry->guc_id, __entry->pin_count, __entry->sched_state, - __entry->guc_sched_state_no_lock) + __entry->guc_sched_state_no_lock, + __entry->guc_prio) +); + +DEFINE_EVENT(intel_context, intel_context_set_prio, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) ); DEFINE_EVENT(intel_context, intel_context_reset, @@ -1017,6 +1025,11 @@ trace_i915_request_out(struct i915_request *rq) { } +static inline void +trace_intel_context_set_prio(struct intel_context *ce) +{ +} + static inline void trace_intel_context_reset(struct intel_context *ce) { diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 975087553ea0..7f13d241417f 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -572,6 +572,15 @@ typedef struct drm_i915_irq_wait { #define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2) #define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3) #define I915_SCHEDULER_CAP_ENGINE_BUSY_STATS (1ul << 4) +/* + * Indicates the 2k user priority levels are statically mapped into 3 buckets as + * follows: + * + * -1k to -1 Low priority + * 0 Normal priority + * 1 to 1k Highest priority + */ +#define I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP (1ul << 5) #define I915_PARAM_HUC_STATUS 42 -- cgit v1.2.3 From e754dccbc908701bf412378c56824409468152a4 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Mon, 26 Jul 2021 17:23:48 -0700 Subject: drm/i915/guc: Unblock GuC submission on Gen11+ Unblock GuC submission on Gen11+ platforms. v2: (Martin Peres / John H) - Delete debug message when GuC is disabled by default on certain platforms Signed-off-by: Michal Wajdeczko Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Matthew Brost Reviewed-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-34-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 8 ++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h | 3 +-- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 13 ++++++++----- 4 files changed, 18 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 8ab70a2223b0..a9547069ee7e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -55,6 +55,7 @@ struct intel_guc { struct ida guc_ids; struct list_head guc_id_list; + bool submission_supported; bool submission_selected; struct i915_vma *ads_vma; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index b760cbf6ca0e..89ff0e4b4bc7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -2543,6 +2543,13 @@ void intel_guc_submission_disable(struct intel_guc *guc) /* Note: By the time we're here, GuC may have already been reset */ } +static bool __guc_submission_supported(struct intel_guc *guc) +{ + /* GuC submission is unavailable for pre-Gen11 */ + return intel_guc_is_supported(guc) && + GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11; +} + static bool __guc_submission_selected(struct intel_guc *guc) { struct drm_i915_private *i915 = guc_to_gt(guc)->i915; @@ -2555,6 +2562,7 @@ static bool __guc_submission_selected(struct intel_guc *guc) void intel_guc_submission_init_early(struct intel_guc *guc) { + guc->submission_supported = __guc_submission_supported(guc); guc->submission_selected = __guc_submission_selected(guc); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h index 03bc1c83a4d2..c7ef44fa0c36 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h @@ -38,8 +38,7 @@ int intel_guc_wait_for_pending_msg(struct intel_guc *guc, static inline bool intel_guc_submission_is_supported(struct intel_guc *guc) { - /* XXX: GuC submission is unavailable for now */ - return false; + return guc->submission_supported; } static inline bool intel_guc_submission_is_wanted(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 7a69c3c027e9..da57d18d9f6b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -34,8 +34,14 @@ static void uc_expand_default_options(struct intel_uc *uc) return; } - /* Default: enable HuC authentication only */ - i915->params.enable_guc = ENABLE_GUC_LOAD_HUC; + /* Intermediate platforms are HuC authentication only */ + if (IS_DG1(i915) || IS_ALDERLAKE_S(i915)) { + i915->params.enable_guc = ENABLE_GUC_LOAD_HUC; + return; + } + + /* Default: enable HuC authentication and GuC submission */ + i915->params.enable_guc = ENABLE_GUC_LOAD_HUC | ENABLE_GUC_SUBMISSION; } /* Reset GuC providing us with fresh state for both GuC and HuC. @@ -313,9 +319,6 @@ static int __uc_init(struct intel_uc *uc) if (i915_inject_probe_failure(uc_to_gt(uc)->i915)) return -ENOMEM; - /* XXX: GuC submission is unavailable for now */ - GEM_BUG_ON(intel_uc_uses_guc_submission(uc)); - ret = intel_guc_init(guc); if (ret) return ret; -- cgit v1.2.3 From e43c5261a654ff9c9279dc11a8deadb8c2c3e0f7 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 23 Jul 2021 10:42:10 -0700 Subject: drm/i915/xehpsdv: Correct parameters for IS_XEHPSDV_GT_STEP() During a rebase the parameters were partially renamed, but not completely. Since the subsequent patches that start using this macro haven't landed on an upstream tree yet this didn't cause a build failure. Fixes: 086df54e20be ("drm/i915/xehpsdv: add initial XeHP SDV definitions") Signed-off-by: Matt Roper Reviewed-by: Caz Yokoyama Link: https://patchwork.freedesktop.org/patch/msgid/20210723174239.1551352-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5c82a8012332..d22cea642627 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1560,8 +1560,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, (IS_ALDERLAKE_P(__i915) && \ IS_GT_STEP(__i915, since, until)) -#define IS_XEHPSDV_GT_STEP(p, since, until) \ - (IS_XEHPSDV(p) && IS_GT_STEP(__i915, since, until)) +#define IS_XEHPSDV_GT_STEP(__i915, since, until) \ + (IS_XEHPSDV(__i915) && IS_GT_STEP(__i915, since, until)) /* * DG2 hardware steppings are a bit unusual. The hardware design was forked -- cgit v1.2.3 From 6d5de3275609c6022d6677808968b7adcdee5e66 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 27 Jul 2021 14:10:27 +0200 Subject: drm/i915: Check for nomodeset in i915_init() first When modesetting (aka the full pci driver, which has nothing to do with disable_display option, which just gives you the full pci driver without the display driver) is disabled, we load nothing and do nothing. So move that check first, for a bit of orderliness. With Jason's module init/exit table this now becomes trivial. Reviewed-by: Jason Ekstrand Cc: Jason Ekstrand Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210727121037.2041102-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/i915_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 94b5418a86c7..dbaa6d5006fb 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1258,9 +1258,9 @@ static const struct { int (*init)(void); void (*exit)(void); } init_funcs[] = { + { i915_check_nomodeset, NULL }, { i915_globals_init, i915_globals_exit }, { i915_mock_selftests, NULL }, - { i915_check_nomodeset, NULL }, { i915_pmu_init, i915_pmu_exit }, { i915_register_pci_driver, i915_unregister_pci_driver }, { i915_perf_sysctl_register, i915_perf_sysctl_unregister }, -- cgit v1.2.3 From 512ba03e35ccb2897d19d0207ef6bd55a9564fd1 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 27 Jul 2021 14:10:28 +0200 Subject: drm/i915: move i915_active slab to direct module init/exit With the global kmem_cache shrink infrastructure gone there's nothing special and we can convert them over. I'm doing this split up into each patch because there's quite a bit of noise with removing the static global.slab_cache to just a slab_cache. v2: Make slab static (Jason, 0day) Reviewed-by: Jason Ekstrand Cc: Jason Ekstrand Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210727121037.2041102-2-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/i915_active.c | 31 +++++++++++-------------------- drivers/gpu/drm/i915/i915_active.h | 3 +++ drivers/gpu/drm/i915/i915_globals.c | 2 -- drivers/gpu/drm/i915/i915_globals.h | 1 - drivers/gpu/drm/i915/i915_pci.c | 2 ++ 5 files changed, 16 insertions(+), 23 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 91723123ae9f..3103c1e1fd14 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -13,7 +13,6 @@ #include "i915_drv.h" #include "i915_active.h" -#include "i915_globals.h" /* * Active refs memory management @@ -22,10 +21,7 @@ * they idle (when we know the active requests are inactive) and allocate the * nodes from a local slab cache to hopefully reduce the fragmentation. */ -static struct i915_global_active { - struct i915_global base; - struct kmem_cache *slab_cache; -} global; +static struct kmem_cache *slab_cache; struct active_node { struct rb_node node; @@ -174,7 +170,7 @@ __active_retire(struct i915_active *ref) /* Finally free the discarded timeline tree */ rbtree_postorder_for_each_entry_safe(it, n, &root, node) { GEM_BUG_ON(i915_active_fence_isset(&it->base)); - kmem_cache_free(global.slab_cache, it); + kmem_cache_free(slab_cache, it); } } @@ -322,7 +318,7 @@ active_instance(struct i915_active *ref, u64 idx) * XXX: We should preallocate this before i915_active_ref() is ever * called, but we cannot call into fs_reclaim() anyway, so use GFP_ATOMIC. */ - node = kmem_cache_alloc(global.slab_cache, GFP_ATOMIC); + node = kmem_cache_alloc(slab_cache, GFP_ATOMIC); if (!node) goto out; @@ -788,7 +784,7 @@ void i915_active_fini(struct i915_active *ref) mutex_destroy(&ref->mutex); if (ref->cache) - kmem_cache_free(global.slab_cache, ref->cache); + kmem_cache_free(slab_cache, ref->cache); } static inline bool is_idle_barrier(struct active_node *node, u64 idx) @@ -908,7 +904,7 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, node = reuse_idle_barrier(ref, idx); rcu_read_unlock(); if (!node) { - node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); + node = kmem_cache_alloc(slab_cache, GFP_KERNEL); if (!node) goto unwind; @@ -956,7 +952,7 @@ unwind: atomic_dec(&ref->count); intel_engine_pm_put(barrier_to_engine(node)); - kmem_cache_free(global.slab_cache, node); + kmem_cache_free(slab_cache, node); } return -ENOMEM; } @@ -1176,21 +1172,16 @@ struct i915_active *i915_active_create(void) #include "selftests/i915_active.c" #endif -static void i915_global_active_exit(void) +void i915_active_module_exit(void) { - kmem_cache_destroy(global.slab_cache); + kmem_cache_destroy(slab_cache); } -static struct i915_global_active global = { { - .exit = i915_global_active_exit, -} }; - -int __init i915_global_active_init(void) +int __init i915_active_module_init(void) { - global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN); - if (!global.slab_cache) + slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN); + if (!slab_cache) return -ENOMEM; - i915_global_register(&global.base); return 0; } diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index d0feda68b874..5fcdb0e2bc9e 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -247,4 +247,7 @@ static inline int __i915_request_await_exclusive(struct i915_request *rq, return err; } +void i915_active_module_exit(void); +int i915_active_module_init(void); + #endif /* _I915_ACTIVE_H_ */ diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c index 91198f5b0a06..a53135ee831d 100644 --- a/drivers/gpu/drm/i915/i915_globals.c +++ b/drivers/gpu/drm/i915/i915_globals.c @@ -7,7 +7,6 @@ #include #include -#include "i915_active.h" #include "i915_buddy.h" #include "gem/i915_gem_context.h" #include "gem/i915_gem_object.h" @@ -34,7 +33,6 @@ static void __i915_globals_cleanup(void) } static __initconst int (* const initfn[])(void) = { - i915_global_active_init, i915_global_buddy_init, i915_global_context_init, i915_global_gem_context_init, diff --git a/drivers/gpu/drm/i915/i915_globals.h b/drivers/gpu/drm/i915/i915_globals.h index 9e6b4fd07528..d80901ba75e3 100644 --- a/drivers/gpu/drm/i915/i915_globals.h +++ b/drivers/gpu/drm/i915/i915_globals.h @@ -23,7 +23,6 @@ int i915_globals_init(void); void i915_globals_exit(void); /* constructors */ -int i915_global_active_init(void); int i915_global_context_init(void); int i915_global_gem_context_init(void); int i915_global_objects_init(void); diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index dbaa6d5006fb..930dd93de7b0 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -30,6 +30,7 @@ #include "display/intel_fbdev.h" +#include "i915_active.h" #include "i915_drv.h" #include "i915_perf.h" #include "i915_globals.h" @@ -1259,6 +1260,7 @@ static const struct { void (*exit)(void); } init_funcs[] = { { i915_check_nomodeset, NULL }, + { i915_active_module_init, i915_active_module_exit }, { i915_globals_init, i915_globals_exit }, { i915_mock_selftests, NULL }, { i915_pmu_init, i915_pmu_exit }, -- cgit v1.2.3 From a28beb344bb1f02b1306ce3ceaecbf2afe3e42cd Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 27 Jul 2021 14:10:29 +0200 Subject: drm/i915: move i915_buddy slab to direct module init/exit With the global kmem_cache shrink infrastructure gone there's nothing special and we can convert them over. I'm doing this split up into each patch because there's quite a bit of noise with removing the static global.slab_blocks to just a slab_blocks. v2: Make slab static (Jason, 0day) Reviewed-by: Jason Ekstrand Cc: Jason Ekstrand Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210727121037.2041102-3-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/i915_buddy.c | 25 ++++++++----------------- drivers/gpu/drm/i915/i915_buddy.h | 3 ++- drivers/gpu/drm/i915/i915_globals.c | 2 -- drivers/gpu/drm/i915/i915_pci.c | 2 ++ 4 files changed, 12 insertions(+), 20 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_buddy.c b/drivers/gpu/drm/i915/i915_buddy.c index caabcaea3be7..7b274c51cac0 100644 --- a/drivers/gpu/drm/i915/i915_buddy.c +++ b/drivers/gpu/drm/i915/i915_buddy.c @@ -8,13 +8,9 @@ #include "i915_buddy.h" #include "i915_gem.h" -#include "i915_globals.h" #include "i915_utils.h" -static struct i915_global_buddy { - struct i915_global base; - struct kmem_cache *slab_blocks; -} global; +static struct kmem_cache *slab_blocks; static struct i915_buddy_block *i915_block_alloc(struct i915_buddy_mm *mm, struct i915_buddy_block *parent, @@ -25,7 +21,7 @@ static struct i915_buddy_block *i915_block_alloc(struct i915_buddy_mm *mm, GEM_BUG_ON(order > I915_BUDDY_MAX_ORDER); - block = kmem_cache_zalloc(global.slab_blocks, GFP_KERNEL); + block = kmem_cache_zalloc(slab_blocks, GFP_KERNEL); if (!block) return NULL; @@ -40,7 +36,7 @@ static struct i915_buddy_block *i915_block_alloc(struct i915_buddy_mm *mm, static void i915_block_free(struct i915_buddy_mm *mm, struct i915_buddy_block *block) { - kmem_cache_free(global.slab_blocks, block); + kmem_cache_free(slab_blocks, block); } static void mark_allocated(struct i915_buddy_block *block) @@ -410,21 +406,16 @@ err_free: #include "selftests/i915_buddy.c" #endif -static void i915_global_buddy_exit(void) +void i915_buddy_module_exit(void) { - kmem_cache_destroy(global.slab_blocks); + kmem_cache_destroy(slab_blocks); } -static struct i915_global_buddy global = { { - .exit = i915_global_buddy_exit, -} }; - -int __init i915_global_buddy_init(void) +int __init i915_buddy_module_init(void) { - global.slab_blocks = KMEM_CACHE(i915_buddy_block, 0); - if (!global.slab_blocks) + slab_blocks = KMEM_CACHE(i915_buddy_block, 0); + if (!slab_blocks) return -ENOMEM; - i915_global_register(&global.base); return 0; } diff --git a/drivers/gpu/drm/i915/i915_buddy.h b/drivers/gpu/drm/i915/i915_buddy.h index d8f26706de52..3940d632f208 100644 --- a/drivers/gpu/drm/i915/i915_buddy.h +++ b/drivers/gpu/drm/i915/i915_buddy.h @@ -129,6 +129,7 @@ void i915_buddy_free(struct i915_buddy_mm *mm, struct i915_buddy_block *block); void i915_buddy_free_list(struct i915_buddy_mm *mm, struct list_head *objects); -int i915_global_buddy_init(void); +void i915_buddy_module_exit(void); +int i915_buddy_module_init(void); #endif diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c index a53135ee831d..3de7cf22ec76 100644 --- a/drivers/gpu/drm/i915/i915_globals.c +++ b/drivers/gpu/drm/i915/i915_globals.c @@ -7,7 +7,6 @@ #include #include -#include "i915_buddy.h" #include "gem/i915_gem_context.h" #include "gem/i915_gem_object.h" #include "i915_globals.h" @@ -33,7 +32,6 @@ static void __i915_globals_cleanup(void) } static __initconst int (* const initfn[])(void) = { - i915_global_buddy_init, i915_global_context_init, i915_global_gem_context_init, i915_global_objects_init, diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 930dd93de7b0..ddc4ba1cd421 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -31,6 +31,7 @@ #include "display/intel_fbdev.h" #include "i915_active.h" +#include "i915_buddy.h" #include "i915_drv.h" #include "i915_perf.h" #include "i915_globals.h" @@ -1261,6 +1262,7 @@ static const struct { } init_funcs[] = { { i915_check_nomodeset, NULL }, { i915_active_module_init, i915_active_module_exit }, + { i915_buddy_module_init, i915_buddy_module_exit }, { i915_globals_init, i915_globals_exit }, { i915_mock_selftests, NULL }, { i915_pmu_init, i915_pmu_exit }, -- cgit v1.2.3 From 2dcec7d3fe536d6b9088453377252b8aeb5cc816 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 27 Jul 2021 14:10:30 +0200 Subject: drm/i915: move intel_context slab to direct module init/exit With the global kmem_cache shrink infrastructure gone there's nothing special and we can convert them over. I'm doing this split up into each patch because there's quite a bit of noise with removing the static global.slab_ce to just a slab_ce. v2: Make slab static (Jason, 0day) Reviewed-by: Jason Ekstrand Cc: Jason Ekstrand Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210727121037.2041102-4-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gt/intel_context.c | 25 ++++++++----------------- drivers/gpu/drm/i915/gt/intel_context.h | 3 +++ drivers/gpu/drm/i915/i915_globals.c | 2 -- drivers/gpu/drm/i915/i915_globals.h | 1 - drivers/gpu/drm/i915/i915_pci.c | 2 ++ 5 files changed, 13 insertions(+), 20 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 477c42d7d693..745e84c72c90 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -7,7 +7,6 @@ #include "gem/i915_gem_pm.h" #include "i915_drv.h" -#include "i915_globals.h" #include "i915_trace.h" #include "intel_context.h" @@ -15,14 +14,11 @@ #include "intel_engine_pm.h" #include "intel_ring.h" -static struct i915_global_context { - struct i915_global base; - struct kmem_cache *slab_ce; -} global; +static struct kmem_cache *slab_ce; static struct intel_context *intel_context_alloc(void) { - return kmem_cache_zalloc(global.slab_ce, GFP_KERNEL); + return kmem_cache_zalloc(slab_ce, GFP_KERNEL); } static void rcu_context_free(struct rcu_head *rcu) @@ -30,7 +26,7 @@ static void rcu_context_free(struct rcu_head *rcu) struct intel_context *ce = container_of(rcu, typeof(*ce), rcu); trace_intel_context_free(ce); - kmem_cache_free(global.slab_ce, ce); + kmem_cache_free(slab_ce, ce); } void intel_context_free(struct intel_context *ce) @@ -426,22 +422,17 @@ void intel_context_fini(struct intel_context *ce) i915_active_fini(&ce->active); } -static void i915_global_context_exit(void) +void i915_context_module_exit(void) { - kmem_cache_destroy(global.slab_ce); + kmem_cache_destroy(slab_ce); } -static struct i915_global_context global = { { - .exit = i915_global_context_exit, -} }; - -int __init i915_global_context_init(void) +int __init i915_context_module_init(void) { - global.slab_ce = KMEM_CACHE(intel_context, SLAB_HWCACHE_ALIGN); - if (!global.slab_ce) + slab_ce = KMEM_CACHE(intel_context, SLAB_HWCACHE_ALIGN); + if (!slab_ce) return -ENOMEM; - i915_global_register(&global.base); return 0; } diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 876bdb08303c..c41098950746 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -31,6 +31,9 @@ void intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine); void intel_context_fini(struct intel_context *ce); +void i915_context_module_exit(void); +int i915_context_module_init(void); + struct intel_context * intel_context_create(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c index 3de7cf22ec76..d36eb7dc40aa 100644 --- a/drivers/gpu/drm/i915/i915_globals.c +++ b/drivers/gpu/drm/i915/i915_globals.c @@ -7,7 +7,6 @@ #include #include -#include "gem/i915_gem_context.h" #include "gem/i915_gem_object.h" #include "i915_globals.h" #include "i915_request.h" @@ -32,7 +31,6 @@ static void __i915_globals_cleanup(void) } static __initconst int (* const initfn[])(void) = { - i915_global_context_init, i915_global_gem_context_init, i915_global_objects_init, i915_global_request_init, diff --git a/drivers/gpu/drm/i915/i915_globals.h b/drivers/gpu/drm/i915/i915_globals.h index d80901ba75e3..60daa738a188 100644 --- a/drivers/gpu/drm/i915/i915_globals.h +++ b/drivers/gpu/drm/i915/i915_globals.h @@ -23,7 +23,6 @@ int i915_globals_init(void); void i915_globals_exit(void); /* constructors */ -int i915_global_context_init(void); int i915_global_gem_context_init(void); int i915_global_objects_init(void); int i915_global_request_init(void); diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index ddc4ba1cd421..16e74f04a7c7 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -33,6 +33,7 @@ #include "i915_active.h" #include "i915_buddy.h" #include "i915_drv.h" +#include "gem/i915_gem_context.h" #include "i915_perf.h" #include "i915_globals.h" #include "i915_selftest.h" @@ -1263,6 +1264,7 @@ static const struct { { i915_check_nomodeset, NULL }, { i915_active_module_init, i915_active_module_exit }, { i915_buddy_module_init, i915_buddy_module_exit }, + { i915_context_module_init, i915_context_module_exit }, { i915_globals_init, i915_globals_exit }, { i915_mock_selftests, NULL }, { i915_pmu_init, i915_pmu_exit }, -- cgit v1.2.3 From a6270d1d4ceff6ac781d762c5290ce7335e3890b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 27 Jul 2021 14:10:31 +0200 Subject: drm/i915: move gem_context slab to direct module init/exit With the global kmem_cache shrink infrastructure gone there's nothing special and we can convert them over. I'm doing this split up into each patch because there's quite a bit of noise with removing the static global.slab_luts to just a slab_luts. v2: Make slab static (Jason, 0day) Reviewed-by: Jason Ekstrand Cc: Jason Ekstrand Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210727121037.2041102-5-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 25 ++++++++----------------- drivers/gpu/drm/i915/gem/i915_gem_context.h | 3 +++ drivers/gpu/drm/i915/i915_globals.c | 2 -- drivers/gpu/drm/i915/i915_globals.h | 1 - drivers/gpu/drm/i915/i915_pci.c | 2 ++ 5 files changed, 13 insertions(+), 20 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 05c3ee191710..cff72679ad7c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -78,25 +78,21 @@ #include "gt/intel_ring.h" #include "i915_gem_context.h" -#include "i915_globals.h" #include "i915_trace.h" #include "i915_user_extensions.h" #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 -static struct i915_global_gem_context { - struct i915_global base; - struct kmem_cache *slab_luts; -} global; +static struct kmem_cache *slab_luts; struct i915_lut_handle *i915_lut_handle_alloc(void) { - return kmem_cache_alloc(global.slab_luts, GFP_KERNEL); + return kmem_cache_alloc(slab_luts, GFP_KERNEL); } void i915_lut_handle_free(struct i915_lut_handle *lut) { - return kmem_cache_free(global.slab_luts, lut); + return kmem_cache_free(slab_luts, lut); } static void lut_close(struct i915_gem_context *ctx) @@ -2283,21 +2279,16 @@ i915_gem_engines_iter_next(struct i915_gem_engines_iter *it) #include "selftests/i915_gem_context.c" #endif -static void i915_global_gem_context_exit(void) +void i915_gem_context_module_exit(void) { - kmem_cache_destroy(global.slab_luts); + kmem_cache_destroy(slab_luts); } -static struct i915_global_gem_context global = { { - .exit = i915_global_gem_context_exit, -} }; - -int __init i915_global_gem_context_init(void) +int __init i915_gem_context_module_init(void) { - global.slab_luts = KMEM_CACHE(i915_lut_handle, 0); - if (!global.slab_luts) + slab_luts = KMEM_CACHE(i915_lut_handle, 0); + if (!slab_luts) return -ENOMEM; - i915_global_register(&global.base); return 0; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index 20411db84914..18060536b0c2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -224,6 +224,9 @@ i915_gem_engines_iter_next(struct i915_gem_engines_iter *it); for (i915_gem_engines_iter_init(&(it), (engines)); \ ((ce) = i915_gem_engines_iter_next(&(it)));) +void i915_gem_context_module_exit(void); +int i915_gem_context_module_init(void); + struct i915_lut_handle *i915_lut_handle_alloc(void); void i915_lut_handle_free(struct i915_lut_handle *lut); diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c index d36eb7dc40aa..dbb3d81eeea7 100644 --- a/drivers/gpu/drm/i915/i915_globals.c +++ b/drivers/gpu/drm/i915/i915_globals.c @@ -7,7 +7,6 @@ #include #include -#include "gem/i915_gem_object.h" #include "i915_globals.h" #include "i915_request.h" #include "i915_scheduler.h" @@ -31,7 +30,6 @@ static void __i915_globals_cleanup(void) } static __initconst int (* const initfn[])(void) = { - i915_global_gem_context_init, i915_global_objects_init, i915_global_request_init, i915_global_scheduler_init, diff --git a/drivers/gpu/drm/i915/i915_globals.h b/drivers/gpu/drm/i915/i915_globals.h index 60daa738a188..f16752dbbdbf 100644 --- a/drivers/gpu/drm/i915/i915_globals.h +++ b/drivers/gpu/drm/i915/i915_globals.h @@ -23,7 +23,6 @@ int i915_globals_init(void); void i915_globals_exit(void); /* constructors */ -int i915_global_gem_context_init(void); int i915_global_objects_init(void); int i915_global_request_init(void); int i915_global_scheduler_init(void); diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 16e74f04a7c7..712af0f3a26c 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -34,6 +34,7 @@ #include "i915_buddy.h" #include "i915_drv.h" #include "gem/i915_gem_context.h" +#include "gem/i915_gem_object.h" #include "i915_perf.h" #include "i915_globals.h" #include "i915_selftest.h" @@ -1265,6 +1266,7 @@ static const struct { { i915_active_module_init, i915_active_module_exit }, { i915_buddy_module_init, i915_buddy_module_exit }, { i915_context_module_init, i915_context_module_exit }, + { i915_gem_context_module_init, i915_gem_context_module_exit }, { i915_globals_init, i915_globals_exit }, { i915_mock_selftests, NULL }, { i915_pmu_init, i915_pmu_exit }, -- cgit v1.2.3 From c8ad09affd27590fee8dbf6b24419abf9d9bcf3d Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 27 Jul 2021 14:10:32 +0200 Subject: drm/i915: move gem_objects slab to direct module init/exit With the global kmem_cache shrink infrastructure gone there's nothing special and we can convert them over. I'm doing this split up into each patch because there's quite a bit of noise with removing the static global.slab_objects to just a slab_objects. v2: Make slab static (Jason, 0day) Reviewed-by: Jason Ekstrand Cc: Jason Ekstrand Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210727121037.2041102-6-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 26 ++++++++------------------ drivers/gpu/drm/i915/gem/i915_gem_object.h | 3 +++ drivers/gpu/drm/i915/i915_globals.c | 1 - drivers/gpu/drm/i915/i915_globals.h | 1 - drivers/gpu/drm/i915/i915_pci.c | 1 + 5 files changed, 12 insertions(+), 20 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 9d3497e1235a..6fb9afb65034 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -30,14 +30,10 @@ #include "i915_gem_context.h" #include "i915_gem_mman.h" #include "i915_gem_object.h" -#include "i915_globals.h" #include "i915_memcpy.h" #include "i915_trace.h" -static struct i915_global_object { - struct i915_global base; - struct kmem_cache *slab_objects; -} global; +static struct kmem_cache *slab_objects; static const struct drm_gem_object_funcs i915_gem_object_funcs; @@ -45,7 +41,7 @@ struct drm_i915_gem_object *i915_gem_object_alloc(void) { struct drm_i915_gem_object *obj; - obj = kmem_cache_zalloc(global.slab_objects, GFP_KERNEL); + obj = kmem_cache_zalloc(slab_objects, GFP_KERNEL); if (!obj) return NULL; obj->base.funcs = &i915_gem_object_funcs; @@ -55,7 +51,7 @@ struct drm_i915_gem_object *i915_gem_object_alloc(void) void i915_gem_object_free(struct drm_i915_gem_object *obj) { - return kmem_cache_free(global.slab_objects, obj); + return kmem_cache_free(slab_objects, obj); } void i915_gem_object_init(struct drm_i915_gem_object *obj, @@ -658,23 +654,17 @@ void i915_gem_init__objects(struct drm_i915_private *i915) INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); } -static void i915_global_objects_exit(void) +void i915_objects_module_exit(void) { - kmem_cache_destroy(global.slab_objects); + kmem_cache_destroy(slab_objects); } -static struct i915_global_object global = { { - .exit = i915_global_objects_exit, -} }; - -int __init i915_global_objects_init(void) +int __init i915_objects_module_init(void) { - global.slab_objects = - KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN); - if (!global.slab_objects) + slab_objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN); + if (!slab_objects) return -ENOMEM; - i915_global_register(&global.base); return 0; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 0896ac532f5e..48112b9d76df 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -48,6 +48,9 @@ static inline bool i915_gem_object_size_2big(u64 size) void i915_gem_init__objects(struct drm_i915_private *i915); +void i915_objects_module_exit(void); +int i915_objects_module_init(void); + struct drm_i915_gem_object *i915_gem_object_alloc(void); void i915_gem_object_free(struct drm_i915_gem_object *obj); diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c index dbb3d81eeea7..40a592fbc3e0 100644 --- a/drivers/gpu/drm/i915/i915_globals.c +++ b/drivers/gpu/drm/i915/i915_globals.c @@ -30,7 +30,6 @@ static void __i915_globals_cleanup(void) } static __initconst int (* const initfn[])(void) = { - i915_global_objects_init, i915_global_request_init, i915_global_scheduler_init, i915_global_vma_init, diff --git a/drivers/gpu/drm/i915/i915_globals.h b/drivers/gpu/drm/i915/i915_globals.h index f16752dbbdbf..9734740708f4 100644 --- a/drivers/gpu/drm/i915/i915_globals.h +++ b/drivers/gpu/drm/i915/i915_globals.h @@ -23,7 +23,6 @@ int i915_globals_init(void); void i915_globals_exit(void); /* constructors */ -int i915_global_objects_init(void); int i915_global_request_init(void); int i915_global_scheduler_init(void); int i915_global_vma_init(void); diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 712af0f3a26c..337c30ff0d3c 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1267,6 +1267,7 @@ static const struct { { i915_buddy_module_init, i915_buddy_module_exit }, { i915_context_module_init, i915_context_module_exit }, { i915_gem_context_module_init, i915_gem_context_module_exit }, + { i915_objects_module_init, i915_objects_module_exit }, { i915_globals_init, i915_globals_exit }, { i915_mock_selftests, NULL }, { i915_pmu_init, i915_pmu_exit }, -- cgit v1.2.3 From 47514ac752efdf4ff3ca1edeed8a811dc56a082c Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 27 Jul 2021 14:10:33 +0200 Subject: drm/i915: move request slabs to direct module init/exit With the global kmem_cache shrink infrastructure gone there's nothing special and we can convert them over. I'm doing this split up into each patch because there's quite a bit of noise with removing the static global.slab_requests|execute_cbs to just a slab_requests|execute_cbs. v2: Make slab static (Jason, 0day) Reviewed-by: Jason Ekstrand Cc: Jason Ekstrand Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210727121037.2041102-7-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/i915_globals.c | 2 -- drivers/gpu/drm/i915/i915_pci.c | 2 ++ drivers/gpu/drm/i915/i915_request.c | 47 +++++++++++++++---------------------- drivers/gpu/drm/i915/i915_request.h | 3 +++ 4 files changed, 24 insertions(+), 30 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c index 40a592fbc3e0..8fffa8d93bc5 100644 --- a/drivers/gpu/drm/i915/i915_globals.c +++ b/drivers/gpu/drm/i915/i915_globals.c @@ -8,7 +8,6 @@ #include #include "i915_globals.h" -#include "i915_request.h" #include "i915_scheduler.h" #include "i915_vma.h" @@ -30,7 +29,6 @@ static void __i915_globals_cleanup(void) } static __initconst int (* const initfn[])(void) = { - i915_global_request_init, i915_global_scheduler_init, i915_global_vma_init, }; diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 337c30ff0d3c..028a12cc8eb8 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -35,6 +35,7 @@ #include "i915_drv.h" #include "gem/i915_gem_context.h" #include "gem/i915_gem_object.h" +#include "i915_request.h" #include "i915_perf.h" #include "i915_globals.h" #include "i915_selftest.h" @@ -1268,6 +1269,7 @@ static const struct { { i915_context_module_init, i915_context_module_exit }, { i915_gem_context_module_init, i915_gem_context_module_exit }, { i915_objects_module_init, i915_objects_module_exit }, + { i915_request_module_init, i915_request_module_exit }, { i915_globals_init, i915_globals_exit }, { i915_mock_selftests, NULL }, { i915_pmu_init, i915_pmu_exit }, diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 1f1d4a6a0eff..ce446716d092 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -42,7 +42,6 @@ #include "i915_active.h" #include "i915_drv.h" -#include "i915_globals.h" #include "i915_trace.h" #include "intel_pm.h" @@ -52,11 +51,8 @@ struct execute_cb { struct i915_request *signal; }; -static struct i915_global_request { - struct i915_global base; - struct kmem_cache *slab_requests; - struct kmem_cache *slab_execute_cbs; -} global; +static struct kmem_cache *slab_requests; +static struct kmem_cache *slab_execute_cbs; static const char *i915_fence_get_driver_name(struct dma_fence *fence) { @@ -107,7 +103,7 @@ static signed long i915_fence_wait(struct dma_fence *fence, struct kmem_cache *i915_request_slab_cache(void) { - return global.slab_requests; + return slab_requests; } static void i915_fence_release(struct dma_fence *fence) @@ -140,7 +136,7 @@ static void i915_fence_release(struct dma_fence *fence) intel_context_put(rq->context); - kmem_cache_free(global.slab_requests, rq); + kmem_cache_free(slab_requests, rq); } const struct dma_fence_ops i915_fence_ops = { @@ -157,7 +153,7 @@ static void irq_execute_cb(struct irq_work *wrk) struct execute_cb *cb = container_of(wrk, typeof(*cb), work); i915_sw_fence_complete(cb->fence); - kmem_cache_free(global.slab_execute_cbs, cb); + kmem_cache_free(slab_execute_cbs, cb); } static __always_inline void @@ -462,7 +458,7 @@ __await_execution(struct i915_request *rq, if (i915_request_is_active(signal)) return 0; - cb = kmem_cache_alloc(global.slab_execute_cbs, gfp); + cb = kmem_cache_alloc(slab_execute_cbs, gfp); if (!cb) return -ENOMEM; @@ -808,7 +804,7 @@ request_alloc_slow(struct intel_timeline *tl, rq = list_first_entry(&tl->requests, typeof(*rq), link); i915_request_retire(rq); - rq = kmem_cache_alloc(global.slab_requests, + rq = kmem_cache_alloc(slab_requests, gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (rq) return rq; @@ -821,7 +817,7 @@ request_alloc_slow(struct intel_timeline *tl, retire_requests(tl); out: - return kmem_cache_alloc(global.slab_requests, gfp); + return kmem_cache_alloc(slab_requests, gfp); } static void __i915_request_ctor(void *arg) @@ -882,7 +878,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) * * Do not use kmem_cache_zalloc() here! */ - rq = kmem_cache_alloc(global.slab_requests, + rq = kmem_cache_alloc(slab_requests, gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (unlikely(!rq)) { rq = request_alloc_slow(tl, &ce->engine->request_pool, gfp); @@ -984,7 +980,7 @@ err_unwind: err_free: intel_context_put(ce); - kmem_cache_free(global.slab_requests, rq); + kmem_cache_free(slab_requests, rq); err_unreserve: intel_context_unpin(ce); return ERR_PTR(ret); @@ -2080,19 +2076,15 @@ enum i915_request_state i915_test_request_state(struct i915_request *rq) #include "selftests/i915_request.c" #endif -static void i915_global_request_exit(void) +void i915_request_module_exit(void) { - kmem_cache_destroy(global.slab_execute_cbs); - kmem_cache_destroy(global.slab_requests); + kmem_cache_destroy(slab_execute_cbs); + kmem_cache_destroy(slab_requests); } -static struct i915_global_request global = { { - .exit = i915_global_request_exit, -} }; - -int __init i915_global_request_init(void) +int __init i915_request_module_init(void) { - global.slab_requests = + slab_requests = kmem_cache_create("i915_request", sizeof(struct i915_request), __alignof__(struct i915_request), @@ -2100,20 +2092,19 @@ int __init i915_global_request_init(void) SLAB_RECLAIM_ACCOUNT | SLAB_TYPESAFE_BY_RCU, __i915_request_ctor); - if (!global.slab_requests) + if (!slab_requests) return -ENOMEM; - global.slab_execute_cbs = KMEM_CACHE(execute_cb, + slab_execute_cbs = KMEM_CACHE(execute_cb, SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | SLAB_TYPESAFE_BY_RCU); - if (!global.slab_execute_cbs) + if (!slab_execute_cbs) goto err_requests; - i915_global_register(&global.base); return 0; err_requests: - kmem_cache_destroy(global.slab_requests); + kmem_cache_destroy(slab_requests); return -ENOMEM; } diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index e6a0e0ebc9aa..1bc1349ba3c2 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -668,4 +668,7 @@ enum i915_request_state { enum i915_request_state i915_test_request_state(struct i915_request *rq); +void i915_request_module_exit(void); +int i915_request_module_init(void); + #endif /* I915_REQUEST_H */ -- cgit v1.2.3 From 210a0f5ce46080e6728e54010d50317fc9e1d969 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 27 Jul 2021 14:10:34 +0200 Subject: drm/i915: move scheduler slabs to direct module init/exit With the global kmem_cache shrink infrastructure gone there's nothing special and we can convert them over. I'm doing this split up into each patch because there's quite a bit of noise with removing the static global.slab_dependencies|priorities to just a slab_dependencies|priorities. v2: Make slab static (Jason, 0day) Reviewed-by: Jason Ekstrand Cc: Jason Ekstrand Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210727121037.2041102-8-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/i915_globals.c | 2 -- drivers/gpu/drm/i915/i915_globals.h | 2 -- drivers/gpu/drm/i915/i915_pci.c | 2 ++ drivers/gpu/drm/i915/i915_scheduler.c | 39 ++++++++++++++--------------------- drivers/gpu/drm/i915/i915_scheduler.h | 3 +++ 5 files changed, 20 insertions(+), 28 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c index 8fffa8d93bc5..8923589057ab 100644 --- a/drivers/gpu/drm/i915/i915_globals.c +++ b/drivers/gpu/drm/i915/i915_globals.c @@ -8,7 +8,6 @@ #include #include "i915_globals.h" -#include "i915_scheduler.h" #include "i915_vma.h" static LIST_HEAD(globals); @@ -29,7 +28,6 @@ static void __i915_globals_cleanup(void) } static __initconst int (* const initfn[])(void) = { - i915_global_scheduler_init, i915_global_vma_init, }; diff --git a/drivers/gpu/drm/i915/i915_globals.h b/drivers/gpu/drm/i915/i915_globals.h index 9734740708f4..7a57bce1da05 100644 --- a/drivers/gpu/drm/i915/i915_globals.h +++ b/drivers/gpu/drm/i915/i915_globals.h @@ -23,8 +23,6 @@ int i915_globals_init(void); void i915_globals_exit(void); /* constructors */ -int i915_global_request_init(void); -int i915_global_scheduler_init(void); int i915_global_vma_init(void); #endif /* _I915_GLOBALS_H_ */ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 028a12cc8eb8..7069c3b75ab1 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -39,6 +39,7 @@ #include "i915_perf.h" #include "i915_globals.h" #include "i915_selftest.h" +#include "i915_scheduler.h" #define PLATFORM(x) .platform = (x) #define GEN(x) \ @@ -1270,6 +1271,7 @@ static const struct { { i915_gem_context_module_init, i915_gem_context_module_exit }, { i915_objects_module_init, i915_objects_module_exit }, { i915_request_module_init, i915_request_module_exit }, + { i915_scheduler_module_init, i915_scheduler_module_exit }, { i915_globals_init, i915_globals_exit }, { i915_mock_selftests, NULL }, { i915_pmu_init, i915_pmu_exit }, diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 17843c204356..762127dd56c5 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -7,15 +7,11 @@ #include #include "i915_drv.h" -#include "i915_globals.h" #include "i915_request.h" #include "i915_scheduler.h" -static struct i915_global_scheduler { - struct i915_global base; - struct kmem_cache *slab_dependencies; - struct kmem_cache *slab_priorities; -} global; +static struct kmem_cache *slab_dependencies; +static struct kmem_cache *slab_priorities; static DEFINE_SPINLOCK(schedule_lock); @@ -93,7 +89,7 @@ find_priolist: if (prio == I915_PRIORITY_NORMAL) { p = &sched_engine->default_priolist; } else { - p = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC); + p = kmem_cache_alloc(slab_priorities, GFP_ATOMIC); /* Convert an allocation failure to a priority bump */ if (unlikely(!p)) { prio = I915_PRIORITY_NORMAL; /* recurses just once */ @@ -122,7 +118,7 @@ find_priolist: void __i915_priolist_free(struct i915_priolist *p) { - kmem_cache_free(global.slab_priorities, p); + kmem_cache_free(slab_priorities, p); } struct sched_cache { @@ -320,13 +316,13 @@ void i915_sched_node_reinit(struct i915_sched_node *node) static struct i915_dependency * i915_dependency_alloc(void) { - return kmem_cache_alloc(global.slab_dependencies, GFP_KERNEL); + return kmem_cache_alloc(slab_dependencies, GFP_KERNEL); } static void i915_dependency_free(struct i915_dependency *dep) { - kmem_cache_free(global.slab_dependencies, dep); + kmem_cache_free(slab_dependencies, dep); } bool __i915_sched_node_add_dependency(struct i915_sched_node *node, @@ -489,32 +485,27 @@ i915_sched_engine_create(unsigned int subclass) return sched_engine; } -static void i915_global_scheduler_exit(void) +void i915_scheduler_module_exit(void) { - kmem_cache_destroy(global.slab_dependencies); - kmem_cache_destroy(global.slab_priorities); + kmem_cache_destroy(slab_dependencies); + kmem_cache_destroy(slab_priorities); } -static struct i915_global_scheduler global = { { - .exit = i915_global_scheduler_exit, -} }; - -int __init i915_global_scheduler_init(void) +int __init i915_scheduler_module_init(void) { - global.slab_dependencies = KMEM_CACHE(i915_dependency, + slab_dependencies = KMEM_CACHE(i915_dependency, SLAB_HWCACHE_ALIGN | SLAB_TYPESAFE_BY_RCU); - if (!global.slab_dependencies) + if (!slab_dependencies) return -ENOMEM; - global.slab_priorities = KMEM_CACHE(i915_priolist, 0); - if (!global.slab_priorities) + slab_priorities = KMEM_CACHE(i915_priolist, 0); + if (!slab_priorities) goto err_priorities; - i915_global_register(&global.base); return 0; err_priorities: - kmem_cache_destroy(global.slab_priorities); + kmem_cache_destroy(slab_priorities); return -ENOMEM; } diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index f4d9811ade5b..0b9b86af6c7f 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -102,4 +102,7 @@ i915_sched_engine_disabled(struct i915_sched_engine *sched_engine) return sched_engine->disabled(sched_engine); } +void i915_scheduler_module_exit(void); +int i915_scheduler_module_init(void); + #endif /* _I915_SCHEDULER_H_ */ -- cgit v1.2.3 From 64fc7cc71c224cb028107130d7513d964f95ec45 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 27 Jul 2021 14:10:35 +0200 Subject: drm/i915: move vma slab to direct module init/exit With the global kmem_cache shrink infrastructure gone there's nothing special and we can convert them over. I'm doing this split up into each patch because there's quite a bit of noise with removing the static global.slab_vmas to just a slab_vmas. We have to keep i915_drv.h include in i915_globals otherwise there's nothing anymore that pulls in GEM_BUG_ON. v2: Make slab static (Jason, 0day) Reviewed-by: Jason Ekstrand Cc: Jason Ekstrand Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210727121037.2041102-9-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/i915_globals.c | 3 +-- drivers/gpu/drm/i915/i915_globals.h | 3 --- drivers/gpu/drm/i915/i915_pci.c | 2 ++ drivers/gpu/drm/i915/i915_vma.c | 25 ++++++++----------------- drivers/gpu/drm/i915/i915_vma.h | 3 +++ 5 files changed, 14 insertions(+), 22 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c index 8923589057ab..04979789e7be 100644 --- a/drivers/gpu/drm/i915/i915_globals.c +++ b/drivers/gpu/drm/i915/i915_globals.c @@ -8,7 +8,7 @@ #include #include "i915_globals.h" -#include "i915_vma.h" +#include "i915_drv.h" static LIST_HEAD(globals); @@ -28,7 +28,6 @@ static void __i915_globals_cleanup(void) } static __initconst int (* const initfn[])(void) = { - i915_global_vma_init, }; int __init i915_globals_init(void) diff --git a/drivers/gpu/drm/i915/i915_globals.h b/drivers/gpu/drm/i915/i915_globals.h index 7a57bce1da05..57d2998bba45 100644 --- a/drivers/gpu/drm/i915/i915_globals.h +++ b/drivers/gpu/drm/i915/i915_globals.h @@ -22,7 +22,4 @@ void i915_global_register(struct i915_global *global); int i915_globals_init(void); void i915_globals_exit(void); -/* constructors */ -int i915_global_vma_init(void); - #endif /* _I915_GLOBALS_H_ */ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 7069c3b75ab1..11a220e6a34f 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -40,6 +40,7 @@ #include "i915_globals.h" #include "i915_selftest.h" #include "i915_scheduler.h" +#include "i915_vma.h" #define PLATFORM(x) .platform = (x) #define GEN(x) \ @@ -1272,6 +1273,7 @@ static const struct { { i915_objects_module_init, i915_objects_module_exit }, { i915_request_module_init, i915_request_module_exit }, { i915_scheduler_module_init, i915_scheduler_module_exit }, + { i915_vma_module_init, i915_vma_module_exit }, { i915_globals_init, i915_globals_exit }, { i915_mock_selftests, NULL }, { i915_pmu_init, i915_pmu_exit }, diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 09a7c47926f7..4b7fc4647e46 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -34,24 +34,20 @@ #include "gt/intel_gt_requests.h" #include "i915_drv.h" -#include "i915_globals.h" #include "i915_sw_fence_work.h" #include "i915_trace.h" #include "i915_vma.h" -static struct i915_global_vma { - struct i915_global base; - struct kmem_cache *slab_vmas; -} global; +static struct kmem_cache *slab_vmas; struct i915_vma *i915_vma_alloc(void) { - return kmem_cache_zalloc(global.slab_vmas, GFP_KERNEL); + return kmem_cache_zalloc(slab_vmas, GFP_KERNEL); } void i915_vma_free(struct i915_vma *vma) { - return kmem_cache_free(global.slab_vmas, vma); + return kmem_cache_free(slab_vmas, vma); } #if IS_ENABLED(CONFIG_DRM_I915_ERRLOG_GEM) && IS_ENABLED(CONFIG_DRM_DEBUG_MM) @@ -1414,21 +1410,16 @@ void i915_vma_make_purgeable(struct i915_vma *vma) #include "selftests/i915_vma.c" #endif -static void i915_global_vma_exit(void) +void i915_vma_module_exit(void) { - kmem_cache_destroy(global.slab_vmas); + kmem_cache_destroy(slab_vmas); } -static struct i915_global_vma global = { { - .exit = i915_global_vma_exit, -} }; - -int __init i915_global_vma_init(void) +int __init i915_vma_module_init(void) { - global.slab_vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); - if (!global.slab_vmas) + slab_vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); + if (!slab_vmas) return -ENOMEM; - i915_global_register(&global.base); return 0; } diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index eca452a9851f..ed69f66c7ab0 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -426,4 +426,7 @@ static inline int i915_vma_sync(struct i915_vma *vma) return i915_active_wait(&vma->active); } +void i915_vma_module_exit(void); +int i915_vma_module_init(void); + #endif -- cgit v1.2.3 From bb13ea2825356de476b142b8186ea595a722c2b1 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 27 Jul 2021 14:10:36 +0200 Subject: drm/i915: Remove i915_globals No longer used. Reviewed-by: Jason Ekstrand Cc: Jason Ekstrand Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210727121037.2041102-10-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/Makefile | 1 - drivers/gpu/drm/i915/gt/intel_gt_pm.c | 1 - drivers/gpu/drm/i915/i915_globals.c | 53 ----------------------------------- drivers/gpu/drm/i915/i915_globals.h | 25 ----------------- drivers/gpu/drm/i915/i915_pci.c | 2 -- 5 files changed, 82 deletions(-) delete mode 100644 drivers/gpu/drm/i915/i915_globals.c delete mode 100644 drivers/gpu/drm/i915/i915_globals.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index ab7679957623..b73a0861b7ca 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -166,7 +166,6 @@ i915-y += \ i915_gem_gtt.o \ i915_gem_ww.o \ i915_gem.o \ - i915_globals.o \ i915_query.o \ i915_request.o \ i915_scheduler.o \ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index cd7b96005d29..dea8e2479897 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -6,7 +6,6 @@ #include #include "i915_drv.h" -#include "i915_globals.h" #include "i915_params.h" #include "intel_context.h" #include "intel_engine_pm.h" diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c deleted file mode 100644 index 04979789e7be..000000000000 --- a/drivers/gpu/drm/i915/i915_globals.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2019 Intel Corporation - */ - -#include -#include - -#include "i915_globals.h" -#include "i915_drv.h" - -static LIST_HEAD(globals); - -void __init i915_global_register(struct i915_global *global) -{ - GEM_BUG_ON(!global->exit); - - list_add_tail(&global->link, &globals); -} - -static void __i915_globals_cleanup(void) -{ - struct i915_global *global, *next; - - list_for_each_entry_safe_reverse(global, next, &globals, link) - global->exit(); -} - -static __initconst int (* const initfn[])(void) = { -}; - -int __init i915_globals_init(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(initfn); i++) { - int err; - - err = initfn[i](); - if (err) { - __i915_globals_cleanup(); - return err; - } - } - - return 0; -} - -void i915_globals_exit(void) -{ - __i915_globals_cleanup(); -} diff --git a/drivers/gpu/drm/i915/i915_globals.h b/drivers/gpu/drm/i915/i915_globals.h deleted file mode 100644 index 57d2998bba45..000000000000 --- a/drivers/gpu/drm/i915/i915_globals.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2019 Intel Corporation - */ - -#ifndef _I915_GLOBALS_H_ -#define _I915_GLOBALS_H_ - -#include - -typedef void (*i915_global_func_t)(void); - -struct i915_global { - struct list_head link; - - i915_global_func_t exit; -}; - -void i915_global_register(struct i915_global *global); - -int i915_globals_init(void); -void i915_globals_exit(void); - -#endif /* _I915_GLOBALS_H_ */ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 11a220e6a34f..fbe7795a2027 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -37,7 +37,6 @@ #include "gem/i915_gem_object.h" #include "i915_request.h" #include "i915_perf.h" -#include "i915_globals.h" #include "i915_selftest.h" #include "i915_scheduler.h" #include "i915_vma.h" @@ -1274,7 +1273,6 @@ static const struct { { i915_request_module_init, i915_request_module_exit }, { i915_scheduler_module_init, i915_scheduler_module_exit }, { i915_vma_module_init, i915_vma_module_exit }, - { i915_globals_init, i915_globals_exit }, { i915_mock_selftests, NULL }, { i915_pmu_init, i915_pmu_exit }, { i915_register_pci_driver, i915_unregister_pci_driver }, -- cgit v1.2.3 From 708b7df3480aff61449e848876876b930ce93c4a Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 27 Jul 2021 14:10:37 +0200 Subject: drm/i915: Extract i915_module.c The module init code is somewhat misplaced in i915_pci.c, since it needs to pull in init/exit functions from every part of the driver and pollutes the include list a lot. Extract an i915_module.c file which pulls all the bits together, and allows us to massively trim the include list of i915_pci.c. The downside is that have to drop the error path check Jason added to catch when we set up the pci driver too early. I think that risk is acceptable for this pretty nice include. Reviewed-by: Jason Ekstrand Cc: Jason Ekstrand Cc: Tvrtko Ursulin Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210727121037.2041102-11-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_module.c | 113 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_pci.c | 117 +------------------------------------ drivers/gpu/drm/i915/i915_pci.h | 8 +++ 4 files changed, 125 insertions(+), 114 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_module.c create mode 100644 drivers/gpu/drm/i915/i915_pci.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index b73a0861b7ca..a985c92c8c19 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -38,6 +38,7 @@ i915-y += i915_drv.o \ i915_irq.o \ i915_getparam.o \ i915_mitigations.o \ + i915_module.o \ i915_params.o \ i915_pci.o \ i915_scatterlist.o \ diff --git a/drivers/gpu/drm/i915/i915_module.c b/drivers/gpu/drm/i915/i915_module.c new file mode 100644 index 000000000000..c578ea8f56a0 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_module.c @@ -0,0 +1,113 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2021 Intel Corporation + */ + +#include + +#include "gem/i915_gem_context.h" +#include "gem/i915_gem_object.h" +#include "i915_active.h" +#include "i915_buddy.h" +#include "i915_params.h" +#include "i915_pci.h" +#include "i915_perf.h" +#include "i915_request.h" +#include "i915_scheduler.h" +#include "i915_selftest.h" +#include "i915_vma.h" + +static int i915_check_nomodeset(void) +{ + bool use_kms = true; + + /* + * Enable KMS by default, unless explicitly overriden by + * either the i915.modeset prarameter or by the + * vga_text_mode_force boot option. + */ + + if (i915_modparams.modeset == 0) + use_kms = false; + + if (vgacon_text_force() && i915_modparams.modeset == -1) + use_kms = false; + + if (!use_kms) { + /* Silently fail loading to not upset userspace. */ + DRM_DEBUG_DRIVER("KMS disabled.\n"); + return 1; + } + + return 0; +} + +static const struct { + int (*init)(void); + void (*exit)(void); +} init_funcs[] = { + { i915_check_nomodeset, NULL }, + { i915_active_module_init, i915_active_module_exit }, + { i915_buddy_module_init, i915_buddy_module_exit }, + { i915_context_module_init, i915_context_module_exit }, + { i915_gem_context_module_init, i915_gem_context_module_exit }, + { i915_objects_module_init, i915_objects_module_exit }, + { i915_request_module_init, i915_request_module_exit }, + { i915_scheduler_module_init, i915_scheduler_module_exit }, + { i915_vma_module_init, i915_vma_module_exit }, + { i915_mock_selftests, NULL }, + { i915_pmu_init, i915_pmu_exit }, + { i915_register_pci_driver, i915_unregister_pci_driver }, + { i915_perf_sysctl_register, i915_perf_sysctl_unregister }, +}; +static int init_progress; + +static int __init i915_init(void) +{ + int err, i; + + for (i = 0; i < ARRAY_SIZE(init_funcs); i++) { + err = init_funcs[i].init(); + if (err < 0) { + while (i--) { + if (init_funcs[i].exit) + init_funcs[i].exit(); + } + return err; + } else if (err > 0) { + /* + * Early-exit success is reserved for things which + * don't have an exit() function because we have no + * idea how far they got or how to partially tear + * them down. + */ + WARN_ON(init_funcs[i].exit); + break; + } + } + + init_progress = i; + + return 0; +} + +static void __exit i915_exit(void) +{ + int i; + + for (i = init_progress - 1; i >= 0; i--) { + GEM_BUG_ON(i >= ARRAY_SIZE(init_funcs)); + if (init_funcs[i].exit) + init_funcs[i].exit(); + } +} + +module_init(i915_init); +module_exit(i915_exit); + +MODULE_AUTHOR("Tungsten Graphics, Inc."); +MODULE_AUTHOR("Intel Corporation"); + +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_LICENSE("GPL and additional rights"); diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index fbe7795a2027..b76c2ba8451c 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -22,24 +22,13 @@ * */ -#include #include #include #include -#include "display/intel_fbdev.h" - -#include "i915_active.h" -#include "i915_buddy.h" #include "i915_drv.h" -#include "gem/i915_gem_context.h" -#include "gem/i915_gem_object.h" -#include "i915_request.h" -#include "i915_perf.h" -#include "i915_selftest.h" -#include "i915_scheduler.h" -#include "i915_vma.h" +#include "i915_pci.h" #define PLATFORM(x) .platform = (x) #define GEN(x) \ @@ -1216,31 +1205,6 @@ static void i915_pci_shutdown(struct pci_dev *pdev) i915_driver_shutdown(i915); } -static int i915_check_nomodeset(void) -{ - bool use_kms = true; - - /* - * Enable KMS by default, unless explicitly overriden by - * either the i915.modeset prarameter or by the - * vga_text_mode_force boot option. - */ - - if (i915_modparams.modeset == 0) - use_kms = false; - - if (vgacon_text_force() && i915_modparams.modeset == -1) - use_kms = false; - - if (!use_kms) { - /* Silently fail loading to not upset userspace. */ - DRM_DEBUG_DRIVER("KMS disabled.\n"); - return 1; - } - - return 0; -} - static struct pci_driver i915_pci_driver = { .name = DRIVER_NAME, .id_table = pciidlist, @@ -1250,87 +1214,12 @@ static struct pci_driver i915_pci_driver = { .driver.pm = &i915_pm_ops, }; -static int i915_register_pci_driver(void) +int i915_register_pci_driver(void) { return pci_register_driver(&i915_pci_driver); } -static void i915_unregister_pci_driver(void) +void i915_unregister_pci_driver(void) { pci_unregister_driver(&i915_pci_driver); } - -static const struct { - int (*init)(void); - void (*exit)(void); -} init_funcs[] = { - { i915_check_nomodeset, NULL }, - { i915_active_module_init, i915_active_module_exit }, - { i915_buddy_module_init, i915_buddy_module_exit }, - { i915_context_module_init, i915_context_module_exit }, - { i915_gem_context_module_init, i915_gem_context_module_exit }, - { i915_objects_module_init, i915_objects_module_exit }, - { i915_request_module_init, i915_request_module_exit }, - { i915_scheduler_module_init, i915_scheduler_module_exit }, - { i915_vma_module_init, i915_vma_module_exit }, - { i915_mock_selftests, NULL }, - { i915_pmu_init, i915_pmu_exit }, - { i915_register_pci_driver, i915_unregister_pci_driver }, - { i915_perf_sysctl_register, i915_perf_sysctl_unregister }, -}; -static int init_progress; - -static int __init i915_init(void) -{ - int err, i; - - for (i = 0; i < ARRAY_SIZE(init_funcs); i++) { - err = init_funcs[i].init(); - if (err < 0) { - while (i--) { - if (init_funcs[i].exit) - init_funcs[i].exit(); - } - return err; - } else if (err > 0) { - /* - * Early-exit success is reserved for things which - * don't have an exit() function because we have no - * idea how far they got or how to partially tear - * them down. - */ - WARN_ON(init_funcs[i].exit); - - /* - * We don't want to advertise devices with an only - * partially initialized driver. - */ - WARN_ON(i915_pci_driver.driver.owner); - break; - } - } - - init_progress = i; - - return 0; -} - -static void __exit i915_exit(void) -{ - int i; - - for (i = init_progress - 1; i >= 0; i--) { - GEM_BUG_ON(i >= ARRAY_SIZE(init_funcs)); - if (init_funcs[i].exit) - init_funcs[i].exit(); - } -} - -module_init(i915_init); -module_exit(i915_exit); - -MODULE_AUTHOR("Tungsten Graphics, Inc."); -MODULE_AUTHOR("Intel Corporation"); - -MODULE_DESCRIPTION(DRIVER_DESC); -MODULE_LICENSE("GPL and additional rights"); diff --git a/drivers/gpu/drm/i915/i915_pci.h b/drivers/gpu/drm/i915/i915_pci.h new file mode 100644 index 000000000000..b386f319f52e --- /dev/null +++ b/drivers/gpu/drm/i915/i915_pci.h @@ -0,0 +1,8 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2021 Intel Corporation + */ + +int i915_register_pci_driver(void); +void i915_unregister_pci_driver(void); -- cgit v1.2.3 From 94fd8400c2a37e88f6f82ef1553bf6336b8569b1 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 28 Jul 2021 15:03:23 -0700 Subject: drm/i915/gt: remove explicit CNL handling from intel_mocs.c Only one reference to CNL that is not needed, but code is the same for GEN9_BC, so leave the code around and just remove the special case for CNL. Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210728220326.1578242-2-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/intel_mocs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 17848807f111..582c4423b95d 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -352,7 +352,7 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, table->size = ARRAY_SIZE(icl_mocs_table); table->table = icl_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; - } else if (IS_GEN9_BC(i915) || IS_CANNONLAKE(i915)) { + } else if (IS_GEN9_BC(i915)) { table->size = ARRAY_SIZE(skl_mocs_table); table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->table = skl_mocs_table; -- cgit v1.2.3 From 91a197e4e140dcf2a525a43348b87378c9c3a234 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 28 Jul 2021 15:03:24 -0700 Subject: drm/i915/gt: remove explicit CNL handling from intel_sseu.c CNL is the only platform with GRAPHICS_VER == 10. With its removal we don't need to handle that version anymore. Also we can now reduce the max number of slices: the call to intel_sseu_set_info() with the highest number of slices comes from SKL and BDW with 3 slices. Recent platforms actually increase the number of subslices so the number of slices remain 1. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210728220326.1578242-3-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/intel_sseu.c | 79 ------------------------------------ drivers/gpu/drm/i915/gt/intel_sseu.h | 2 +- 2 files changed, 1 insertion(+), 80 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 367fd44b81c8..9542c3f3822a 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -188,83 +188,6 @@ static void gen11_sseu_info_init(struct intel_gt *gt) sseu->has_eu_pg = 1; } -static void gen10_sseu_info_init(struct intel_gt *gt) -{ - struct intel_uncore *uncore = gt->uncore; - struct sseu_dev_info *sseu = >->info.sseu; - const u32 fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); - const int eu_mask = 0xff; - u32 subslice_mask, eu_en; - int s, ss; - - intel_sseu_set_info(sseu, 6, 4, 8); - - sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >> - GEN10_F2_S_ENA_SHIFT; - - /* Slice0 */ - eu_en = ~intel_uncore_read(uncore, GEN8_EU_DISABLE0); - for (ss = 0; ss < sseu->max_subslices; ss++) - sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) & eu_mask); - /* Slice1 */ - sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask); - eu_en = ~intel_uncore_read(uncore, GEN8_EU_DISABLE1); - sseu_set_eus(sseu, 1, 1, eu_en & eu_mask); - /* Slice2 */ - sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask); - sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask); - /* Slice3 */ - sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask); - eu_en = ~intel_uncore_read(uncore, GEN8_EU_DISABLE2); - sseu_set_eus(sseu, 3, 1, eu_en & eu_mask); - /* Slice4 */ - sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask); - sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask); - /* Slice5 */ - sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask); - eu_en = ~intel_uncore_read(uncore, GEN10_EU_DISABLE3); - sseu_set_eus(sseu, 5, 1, eu_en & eu_mask); - - subslice_mask = (1 << 4) - 1; - subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> - GEN10_F2_SS_DIS_SHIFT); - - for (s = 0; s < sseu->max_slices; s++) { - u32 subslice_mask_with_eus = subslice_mask; - - for (ss = 0; ss < sseu->max_subslices; ss++) { - if (sseu_get_eus(sseu, s, ss) == 0) - subslice_mask_with_eus &= ~BIT(ss); - } - - /* - * Slice0 can have up to 3 subslices, but there are only 2 in - * slice1/2. - */ - intel_sseu_set_subslices(sseu, s, s == 0 ? - subslice_mask_with_eus : - subslice_mask_with_eus & 0x3); - } - - sseu->eu_total = compute_eu_total(sseu); - - /* - * CNL is expected to always have a uniform distribution - * of EU across subslices with the exception that any one - * EU in any one subslice may be fused off for die - * recovery. - */ - sseu->eu_per_subslice = - intel_sseu_subslice_total(sseu) ? - DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) : - 0; - - /* No restrictions on Power Gating */ - sseu->has_slice_pg = 1; - sseu->has_subslice_pg = 1; - sseu->has_eu_pg = 1; -} - static void cherryview_sseu_info_init(struct intel_gt *gt) { struct sseu_dev_info *sseu = >->info.sseu; @@ -592,8 +515,6 @@ void intel_sseu_info_init(struct intel_gt *gt) bdw_sseu_info_init(gt); else if (GRAPHICS_VER(i915) == 9) gen9_sseu_info_init(gt); - else if (GRAPHICS_VER(i915) == 10) - gen10_sseu_info_init(gt); else if (GRAPHICS_VER(i915) == 11) gen11_sseu_info_init(gt); else if (GRAPHICS_VER(i915) >= 12) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 4cd1a8a7298a..8d85ec05f610 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -15,7 +15,7 @@ struct drm_i915_private; struct intel_gt; struct drm_printer; -#define GEN_MAX_SLICES (6) /* CNL upper bound */ +#define GEN_MAX_SLICES (3) /* SKL upper bound */ #define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE) #define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES) -- cgit v1.2.3 From 701d31860d34302190bfb0fdbedc987977562961 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 28 Jul 2021 15:03:25 -0700 Subject: drm/i915/gt: rename CNL references in intel_engine.h With the removal of CNL, let's consider ICL as the first platform using that index. Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210728220326.1578242-4-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/intel_engine.h | 2 +- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index c2a5640ae055..87579affb952 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -179,7 +179,7 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) #define I915_HWS_CSB_BUF0_INDEX 0x10 #define I915_HWS_CSB_WRITE_INDEX 0x1f -#define CNL_HWS_CSB_WRITE_INDEX 0x2f +#define ICL_HWS_CSB_WRITE_INDEX 0x2f void intel_engine_stop(struct intel_engine_cs *engine); void intel_engine_cleanup(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d22cea642627..65000b57ddb6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1958,8 +1958,8 @@ int remap_io_sg(struct vm_area_struct *vma, static inline int intel_hws_csb_write_index(struct drm_i915_private *i915) { - if (GRAPHICS_VER(i915) >= 10) - return CNL_HWS_CSB_WRITE_INDEX; + if (GRAPHICS_VER(i915) >= 11) + return ICL_HWS_CSB_WRITE_INDEX; else return I915_HWS_CSB_WRITE_INDEX; } -- cgit v1.2.3 From 6266992cf1052432bcb592dd6f22f9be7960ddb0 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 28 Jul 2021 15:03:26 -0700 Subject: drm/i915/gt: remove GRAPHICS_VER == 10 Replace all remaining handling of GRAPHICS_VER {==,>=} 10 with {==,>=} 11. With the removal of CNL, there is no platform with graphics version equals 10. Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210728220326.1578242-5-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/debugfs_gt_pm.c | 10 +++--- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 3 -- drivers/gpu/drm/i915/gt/intel_ggtt.c | 4 +-- drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c | 10 +++--- drivers/gpu/drm/i915/gt/intel_gtt.c | 6 ++-- drivers/gpu/drm/i915/gt/intel_lrc.c | 42 +------------------------- drivers/gpu/drm/i915/gt/intel_rc6.c | 2 +- drivers/gpu/drm/i915/gt/intel_rps.c | 4 +-- drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c | 6 ++-- 9 files changed, 22 insertions(+), 65 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c index 4270b5a34a83..d6f5836396f8 100644 --- a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c @@ -437,20 +437,20 @@ static int frequency_show(struct seq_file *m, void *unused) max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 : rp_state_cap >> 16) & 0xff; max_freq *= (IS_GEN9_BC(i915) || - GRAPHICS_VER(i915) >= 10 ? GEN9_FREQ_SCALER : 1); + GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", intel_gpu_freq(rps, max_freq)); max_freq = (rp_state_cap & 0xff00) >> 8; max_freq *= (IS_GEN9_BC(i915) || - GRAPHICS_VER(i915) >= 10 ? GEN9_FREQ_SCALER : 1); + GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", intel_gpu_freq(rps, max_freq)); max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 16 : rp_state_cap >> 0) & 0xff; max_freq *= (IS_GEN9_BC(i915) || - GRAPHICS_VER(i915) >= 10 ? GEN9_FREQ_SCALER : 1); + GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", intel_gpu_freq(rps, max_freq)); seq_printf(m, "Max overclocked frequency: %dMHz\n", @@ -500,7 +500,7 @@ static int llc_show(struct seq_file *m, void *data) min_gpu_freq = rps->min_freq; max_gpu_freq = rps->max_freq; - if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 10) { + if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) { /* Convert GT frequency to 50 HZ units */ min_gpu_freq /= GEN9_FREQ_SCALER; max_gpu_freq /= GEN9_FREQ_SCALER; @@ -518,7 +518,7 @@ static int llc_show(struct seq_file *m, void *data) intel_gpu_freq(rps, (gpu_freq * (IS_GEN9_BC(i915) || - GRAPHICS_VER(i915) >= 10 ? + GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1))), ((ia_freq >> 0) & 0xff) * 100, ((ia_freq >> 8) & 0xff) * 100); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index dea0e522c5c7..0d9105a31d84 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -35,7 +35,6 @@ #define DEFAULT_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) #define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) -#define GEN10_LR_CONTEXT_RENDER_SIZE (18 * PAGE_SIZE) #define GEN11_LR_CONTEXT_RENDER_SIZE (14 * PAGE_SIZE) #define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) @@ -186,8 +185,6 @@ u32 intel_engine_context_size(struct intel_gt *gt, u8 class) case 12: case 11: return GEN11_LR_CONTEXT_RENDER_SIZE; - case 10: - return GEN10_LR_CONTEXT_RENDER_SIZE; case 9: return GEN9_LR_CONTEXT_RENDER_SIZE; case 8: diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 9d445ad9a342..de3ac58fceec 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -826,13 +826,13 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; /* - * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range + * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range * will be dropped. For WC mappings in general we have 64 byte burst * writes when the WC buffer is flushed, so we can't use it, but have to * resort to an uncached mapping. The WC issue is easily caught by the * readback check when writing GTT PTE entries. */ - if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 10) + if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 11) ggtt->gsm = ioremap(phys_addr, size); else ggtt->gsm = ioremap_wc(phys_addr, size); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c index 9f0e729d2d15..3513d6f90747 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c @@ -24,8 +24,8 @@ static u32 read_reference_ts_freq(struct intel_uncore *uncore) return base_freq + frac_freq; } -static u32 gen10_get_crystal_clock_freq(struct intel_uncore *uncore, - u32 rpm_config_reg) +static u32 gen9_get_crystal_clock_freq(struct intel_uncore *uncore, + u32 rpm_config_reg) { u32 f19_2_mhz = 19200000; u32 f24_mhz = 24000000; @@ -128,10 +128,10 @@ static u32 read_clock_frequency(struct intel_uncore *uncore) } else { u32 c0 = intel_uncore_read(uncore, RPM_CONFIG0); - if (GRAPHICS_VER(uncore->i915) <= 10) - freq = gen10_get_crystal_clock_freq(uncore, c0); - else + if (GRAPHICS_VER(uncore->i915) >= 11) freq = gen11_get_crystal_clock_freq(uncore, c0); + else + freq = gen9_get_crystal_clock_freq(uncore, c0); /* * Now figure out how the command stream's timestamp diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index f7e0352edb62..e137dd32b5b8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -426,7 +426,7 @@ static void tgl_setup_private_ppat(struct intel_uncore *uncore) intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB); } -static void cnl_setup_private_ppat(struct intel_uncore *uncore) +static void icl_setup_private_ppat(struct intel_uncore *uncore) { intel_uncore_write(uncore, GEN10_PAT_INDEX(0), @@ -526,8 +526,8 @@ void setup_private_pat(struct intel_uncore *uncore) if (GRAPHICS_VER(i915) >= 12) tgl_setup_private_ppat(uncore); - else if (GRAPHICS_VER(i915) >= 10) - cnl_setup_private_ppat(uncore); + else if (GRAPHICS_VER(i915) >= 11) + icl_setup_private_ppat(uncore); else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915)) chv_setup_private_ppat(uncore); else diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index c3f5bec8ae15..bb4af4977920 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -70,7 +70,7 @@ static void set_offsets(u32 *regs, if (close) { /* Close the batch; used mainly by live_lrc_layout() */ *regs = MI_BATCH_BUFFER_END; - if (GRAPHICS_VER(engine->i915) >= 10) + if (GRAPHICS_VER(engine->i915) >= 11) *regs |= BIT(0); } } @@ -653,8 +653,6 @@ lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine) return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; case 11: return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - case 10: - return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; case 9: return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; case 8: @@ -1448,40 +1446,6 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) return batch; } -static u32 * -gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) -{ - int i; - - /* - * WaPipeControlBefore3DStateSamplePattern: cnl - * - * Ensure the engine is idle prior to programming a - * 3DSTATE_SAMPLE_PATTERN during a context restore. - */ - batch = gen8_emit_pipe_control(batch, - PIPE_CONTROL_CS_STALL, - 0); - /* - * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for - * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in - * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is - * confusing. Since gen8_emit_pipe_control() already advances the - * batch by 6 dwords, we advance the other 10 here, completing a - * cacheline. It's not clear if the workaround requires this padding - * before other commands, or if it's just the regular padding we would - * already have for the workaround bb, so leave it here for now. - */ - for (i = 0; i < 10; i++) - *batch++ = MI_NOOP; - - /* Pad to end of cacheline */ - while ((unsigned long)batch % CACHELINE_BYTES) - *batch++ = MI_NOOP; - - return batch; -} - #define CTX_WA_BB_SIZE (PAGE_SIZE) static int lrc_create_wa_ctx(struct intel_engine_cs *engine) @@ -1534,10 +1498,6 @@ void lrc_init_wa_ctx(struct intel_engine_cs *engine) case 12: case 11: return; - case 10: - wa_bb_fn[0] = gen10_init_indirectctx_bb; - wa_bb_fn[1] = NULL; - break; case 9: wa_bb_fn[0] = gen9_init_indirectctx_bb; wa_bb_fn[1] = NULL; diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 259d7eb4e165..a7d13fe35b2e 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -126,7 +126,7 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6) enum intel_engine_id id; /* 2b: Program RC6 thresholds.*/ - if (GRAPHICS_VER(rc6_to_i915(rc6)) >= 10) { + if (GRAPHICS_VER(rc6_to_i915(rc6)) >= 11) { set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150); } else if (IS_SKYLAKE(rc6_to_i915(rc6))) { diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 0c8e7f2b06f0..bc0f7d8baa84 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -999,7 +999,7 @@ static void gen6_rps_init(struct intel_rps *rps) rps->efficient_freq = rps->rp1_freq; if (IS_HASWELL(i915) || IS_BROADWELL(i915) || - IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 10) { + IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) { u32 ddcc_status = 0; if (sandybridge_pcode_read(i915, @@ -1012,7 +1012,7 @@ static void gen6_rps_init(struct intel_rps *rps) rps->max_freq); } - if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 10) { + if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) { /* Store the frequency values in 16.66 MHZ units, which is * the natural hardware unit for SKL */ diff --git a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c index 714fe8495775..5e7b09c5e36f 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c @@ -50,7 +50,7 @@ static void cherryview_sseu_device_status(struct intel_gt *gt, #undef SS_MAX } -static void gen10_sseu_device_status(struct intel_gt *gt, +static void gen11_sseu_device_status(struct intel_gt *gt, struct sseu_dev_info *sseu) { #define SS_MAX 6 @@ -267,8 +267,8 @@ int intel_sseu_status(struct seq_file *m, struct intel_gt *gt) bdw_sseu_device_status(gt, &sseu); else if (GRAPHICS_VER(i915) == 9) gen9_sseu_device_status(gt, &sseu); - else if (GRAPHICS_VER(i915) >= 10) - gen10_sseu_device_status(gt, &sseu); + else if (GRAPHICS_VER(i915) >= 11) + gen11_sseu_device_status(gt, &sseu); } i915_print_sseu_info(m, false, HAS_POOLED_EU(i915), &sseu); -- cgit v1.2.3 From 6d0e4f077c895549bc02b2b3db2d66aae6e865a3 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 29 Jul 2021 10:47:31 +0100 Subject: drm/i915/selftests: prefer the create_user helper No need to hand roll the set_placements stuff, now that we have a helper for this. v2: add back the -ENODEV checking since it's possible for stolen to be probed, and yet still be non-functional Signed-off-by: Matthew Auld Cc: Jason Ekstrand Reviewed-by: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210729094731.1953091-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 34 +++------------------- 1 file changed, 4 insertions(+), 30 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 0b2b73d8a364..b20f5621f62b 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -860,24 +860,6 @@ static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type) return !no_map; } -static void object_set_placements(struct drm_i915_gem_object *obj, - struct intel_memory_region **placements, - unsigned int n_placements) -{ - GEM_BUG_ON(!n_placements); - - if (n_placements == 1) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct intel_memory_region *mr = placements[0]; - - obj->mm.placements = &i915->mm.regions[mr->id]; - obj->mm.n_placements = 1; - } else { - obj->mm.placements = placements; - obj->mm.n_placements = n_placements; - } -} - #define expand32(x) (((x) << 0) | ((x) << 8) | ((x) << 16) | ((x) << 24)) static int __igt_mmap(struct drm_i915_private *i915, struct drm_i915_gem_object *obj, @@ -972,15 +954,13 @@ static int igt_mmap(void *arg) struct drm_i915_gem_object *obj; int err; - obj = i915_gem_object_create_region(mr, sizes[i], 0, I915_BO_ALLOC_USER); + obj = __i915_gem_object_create_user(i915, sizes[i], &mr, 1); if (obj == ERR_PTR(-ENODEV)) continue; if (IS_ERR(obj)) return PTR_ERR(obj); - object_set_placements(obj, &mr, 1); - err = __igt_mmap(i915, obj, I915_MMAP_TYPE_GTT); if (err == 0) err = __igt_mmap(i915, obj, I915_MMAP_TYPE_WC); @@ -1101,15 +1081,13 @@ static int igt_mmap_access(void *arg) struct drm_i915_gem_object *obj; int err; - obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0, I915_BO_ALLOC_USER); + obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &mr, 1); if (obj == ERR_PTR(-ENODEV)) continue; if (IS_ERR(obj)) return PTR_ERR(obj); - object_set_placements(obj, &mr, 1); - err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_GTT); if (err == 0) err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_WB); @@ -1248,15 +1226,13 @@ static int igt_mmap_gpu(void *arg) struct drm_i915_gem_object *obj; int err; - obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0, I915_BO_ALLOC_USER); + obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &mr, 1); if (obj == ERR_PTR(-ENODEV)) continue; if (IS_ERR(obj)) return PTR_ERR(obj); - object_set_placements(obj, &mr, 1); - err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_GTT); if (err == 0) err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_WC); @@ -1405,15 +1381,13 @@ static int igt_mmap_revoke(void *arg) struct drm_i915_gem_object *obj; int err; - obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0, I915_BO_ALLOC_USER); + obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &mr, 1); if (obj == ERR_PTR(-ENODEV)) continue; if (IS_ERR(obj)) return PTR_ERR(obj); - object_set_placements(obj, &mr, 1); - err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_GTT); if (err == 0) err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_WC); -- cgit v1.2.3 From 3989de0ef562a9168782258f3c6d2f517d82bbed Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 30 Jul 2021 12:11:15 -0700 Subject: drm/i915/xehp: Fix missing sentinel on mcr_ranges_xehp There's a missing sentinel since we are not using ARRAY_SIZE(), but rather checking that the .start is 0 to stop the iteration in mcr_range(). BUG: KASAN: global-out-of-bounds in mcr_range.isra.0+0x69/0xa0 [i915] Read of size 4 at addr ffffffffa0889928 by task modprobe/3881 Fixes: d8905ba705ab ("drm/i915/xehp: Define multicast register ranges") Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210730191115.2514581-1-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 9173df59821a..053fa7251cd0 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2000,6 +2000,7 @@ static const struct mcr_range mcr_ranges_xehp[] = { { .start = 0xdc00, .end = 0xffff }, { .start = 0x17000, .end = 0x17fff }, { .start = 0x24a00, .end = 0x24a7f }, + {}, }; static bool mcr_range(struct drm_i915_private *i915, u32 offset) -- cgit v1.2.3 From dff0fc4990929858eccab824bd310e7fb4bb20ee Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Fri, 30 Jul 2021 13:21:06 -0700 Subject: drm/i915/guc/slpc: Initial definitions for SLPC Add macros to check for SLPC support. This feature is currently supported for Gen12+ and enabled whenever GuC submission is enabled/selected. Include templates for SLPC init/fini and enable. v2: Move SLPC helper functions to intel_guc_slpc.c/.h. Define basic template for SLPC structure in intel_guc_slpc_types.h. Fix copyright (Michal W) v3: Review comments (Michal W) v4: Include supported/selected inside slpc struct (Michal W) Reviewed-by: Michal Wajdeczko Signed-off-by: Vinay Belgaumkar Signed-off-by: Sundaresan Sujaritha Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210730202119.23810-2-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc.c | 2 + drivers/gpu/drm/i915/gt/uc/intel_guc.h | 2 + drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 45 +++++++++++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 33 +++++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h | 16 ++++++++ drivers/gpu/drm/i915/gt/uc/intel_uc.c | 6 ++- drivers/gpu/drm/i915/gt/uc/intel_uc.h | 2 + 8 files changed, 105 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index a985c92c8c19..134b0edf4a2e 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -186,6 +186,7 @@ i915-y += gt/uc/intel_uc.o \ gt/uc/intel_guc_fw.o \ gt/uc/intel_guc_log.o \ gt/uc/intel_guc_log_debugfs.o \ + gt/uc/intel_guc_slpc.o \ gt/uc/intel_guc_submission.o \ gt/uc/intel_huc.o \ gt/uc/intel_huc_debugfs.o \ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 979128e28372..39bc3c16057b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -7,6 +7,7 @@ #include "gt/intel_gt_irq.h" #include "gt/intel_gt_pm_irq.h" #include "intel_guc.h" +#include "intel_guc_slpc.h" #include "intel_guc_ads.h" #include "intel_guc_submission.h" #include "i915_drv.h" @@ -157,6 +158,7 @@ void intel_guc_init_early(struct intel_guc *guc) intel_guc_ct_init_early(&guc->ct); intel_guc_log_init_early(&guc->log); intel_guc_submission_init_early(guc); + intel_guc_slpc_init_early(&guc->slpc); mutex_init(&guc->send_mutex); spin_lock_init(&guc->irq_lock); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index a9547069ee7e..7da11a0b6059 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -15,6 +15,7 @@ #include "intel_guc_ct.h" #include "intel_guc_log.h" #include "intel_guc_reg.h" +#include "intel_guc_slpc_types.h" #include "intel_uc_fw.h" #include "i915_utils.h" #include "i915_vma.h" @@ -30,6 +31,7 @@ struct intel_guc { struct intel_uc_fw fw; struct intel_guc_log log; struct intel_guc_ct ct; + struct intel_guc_slpc slpc; /* Global engine used to submit requests to GuC */ struct i915_sched_engine *sched_engine; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c new file mode 100644 index 000000000000..40950f1bf05c --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_guc_slpc.h" +#include "gt/intel_gt.h" + +static inline struct intel_guc *slpc_to_guc(struct intel_guc_slpc *slpc) +{ + return container_of(slpc, struct intel_guc, slpc); +} + +static bool __detect_slpc_supported(struct intel_guc *guc) +{ + /* GuC SLPC is unavailable for pre-Gen12 */ + return guc->submission_supported && + GRAPHICS_VER(guc_to_gt(guc)->i915) >= 12; +} + +static bool __guc_slpc_selected(struct intel_guc *guc) +{ + if (!intel_guc_slpc_is_supported(guc)) + return false; + + return guc->submission_selected; +} + +void intel_guc_slpc_init_early(struct intel_guc_slpc *slpc) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + + slpc->supported = __detect_slpc_supported(guc); + slpc->selected = __guc_slpc_selected(guc); +} + +int intel_guc_slpc_init(struct intel_guc_slpc *slpc) +{ + return 0; +} + +void intel_guc_slpc_fini(struct intel_guc_slpc *slpc) +{ +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h new file mode 100644 index 000000000000..bc139682ad0f --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _INTEL_GUC_SLPC_H_ +#define _INTEL_GUC_SLPC_H_ + +#include "intel_guc_submission.h" +#include "intel_guc_slpc_types.h" + +static inline bool intel_guc_slpc_is_supported(struct intel_guc *guc) +{ + return guc->slpc.supported; +} + +static inline bool intel_guc_slpc_is_wanted(struct intel_guc *guc) +{ + return guc->slpc.selected; +} + +static inline bool intel_guc_slpc_is_used(struct intel_guc *guc) +{ + return intel_guc_submission_is_used(guc) && intel_guc_slpc_is_wanted(guc); +} + +void intel_guc_slpc_init_early(struct intel_guc_slpc *slpc); + +int intel_guc_slpc_init(struct intel_guc_slpc *slpc); +int intel_guc_slpc_enable(struct intel_guc_slpc *slpc); +void intel_guc_slpc_fini(struct intel_guc_slpc *slpc); + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h new file mode 100644 index 000000000000..769c162305a0 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _INTEL_GUC_SLPC_TYPES_H_ +#define _INTEL_GUC_SLPC_TYPES_H_ + +#include + +struct intel_guc_slpc { + bool supported; + bool selected; +}; + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index da57d18d9f6b..e6bd9406c7b2 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -75,16 +75,18 @@ static void __confirm_options(struct intel_uc *uc) struct drm_i915_private *i915 = uc_to_gt(uc)->i915; drm_dbg(&i915->drm, - "enable_guc=%d (guc:%s submission:%s huc:%s)\n", + "enable_guc=%d (guc:%s submission:%s huc:%s slpc:%s)\n", i915->params.enable_guc, yesno(intel_uc_wants_guc(uc)), yesno(intel_uc_wants_guc_submission(uc)), - yesno(intel_uc_wants_huc(uc))); + yesno(intel_uc_wants_huc(uc)), + yesno(intel_uc_wants_guc_slpc(uc))); if (i915->params.enable_guc == 0) { GEM_BUG_ON(intel_uc_wants_guc(uc)); GEM_BUG_ON(intel_uc_wants_guc_submission(uc)); GEM_BUG_ON(intel_uc_wants_huc(uc)); + GEM_BUG_ON(intel_uc_wants_guc_slpc(uc)); return; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h index e2da2b6e76e1..925a58ca6b94 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h @@ -8,6 +8,7 @@ #include "intel_guc.h" #include "intel_guc_submission.h" +#include "intel_guc_slpc.h" #include "intel_huc.h" #include "i915_params.h" @@ -83,6 +84,7 @@ __uc_state_checker(x, func, uses, used) uc_state_checkers(guc, guc); uc_state_checkers(huc, huc); uc_state_checkers(guc, guc_submission); +uc_state_checkers(guc, guc_slpc); #undef uc_state_checkers #undef __uc_state_checker -- cgit v1.2.3 From 7ba79a671568b8d9d014f7a4c6fc8dfeb70c0d2d Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Fri, 30 Jul 2021 13:21:07 -0700 Subject: drm/i915/guc/slpc: Gate Host RPS when SLPC is enabled Also ensure uc_init is called before we initialize RPS so that we can check for SLPC support. We do not need to enable up/down interrupts when SLPC is enabled. However, we still need the ARAT interrupt, which will be enabled separately later. v2: Explicitly return from intel_rps_enable with slpc check (Matthew B) Reviewed-by: Matthew Brost Signed-off-by: Vinay Belgaumkar Signed-off-by: Sundaresan Sujaritha Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210730202119.23810-3-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 2 +- drivers/gpu/drm/i915/gt/intel_rps.c | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index a64aa43f7cd9..04dd69bcf6cb 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -41,8 +41,8 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) intel_gt_init_timelines(gt); intel_gt_pm_init_early(gt); - intel_rps_init_early(>->rps); intel_uc_init_early(>->uc); + intel_rps_init_early(>->rps); } int intel_gt_probe_lmem(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index bc0f7d8baa84..cdfce5b8d861 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -37,6 +37,13 @@ static struct intel_uncore *rps_to_uncore(struct intel_rps *rps) return rps_to_gt(rps)->uncore; } +static bool rps_uses_slpc(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + return intel_uc_uses_guc_slpc(>->uc); +} + static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask) { return mask & ~rps->pm_intrmsk_mbz; @@ -167,6 +174,8 @@ static void rps_enable_interrupts(struct intel_rps *rps) { struct intel_gt *gt = rps_to_gt(rps); + GEM_BUG_ON(rps_uses_slpc(rps)); + GT_TRACE(gt, "interrupts:on rps->pm_events: %x, rps_pm_mask:%x\n", rps->pm_events, rps_pm_mask(rps, rps->last_freq)); @@ -771,6 +780,8 @@ static int gen6_rps_set(struct intel_rps *rps, u8 val) struct drm_i915_private *i915 = rps_to_i915(rps); u32 swreq; + GEM_BUG_ON(rps_uses_slpc(rps)); + if (GRAPHICS_VER(i915) >= 9) swreq = GEN9_FREQUENCY(val); else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) @@ -861,6 +872,9 @@ void intel_rps_park(struct intel_rps *rps) { int adj; + if (!intel_rps_is_enabled(rps)) + return; + GEM_BUG_ON(atomic_read(&rps->num_waiters)); if (!intel_rps_clear_active(rps)) @@ -1356,6 +1370,9 @@ void intel_rps_enable(struct intel_rps *rps) if (!HAS_RPS(i915)) return; + if (rps_uses_slpc(rps)) + return; + intel_gt_check_clock_frequency(rps_to_gt(rps)); intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); @@ -1829,6 +1846,9 @@ void intel_rps_init(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); + if (rps_uses_slpc(rps)) + return; + if (IS_CHERRYVIEW(i915)) chv_rps_init(rps); else if (IS_VALLEYVIEW(i915)) @@ -1885,6 +1905,9 @@ void intel_rps_init(struct intel_rps *rps) void intel_rps_sanitize(struct intel_rps *rps) { + if (rps_uses_slpc(rps)) + return; + if (GRAPHICS_VER(rps_to_i915(rps)) >= 6) rps_disable_interrupts(rps); } -- cgit v1.2.3 From 7695d08f1e30f361502e9c19b6d330234d4ca755 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Fri, 30 Jul 2021 13:21:08 -0700 Subject: drm/i915/guc/slpc: Adding SLPC communication interfaces Add constants and params that are needed to configure SLPC. v2: Add a new abi header for SLPC. Replace bitfields with genmasks. Address other comments from Michal W. v3: Add slpc H2G format in abi, other review commments (Michal W) v4: Update status bits according to latest spec v5: checkpatch() Reviewed-by: Michal Wajdeczko Signed-off-by: Vinay Belgaumkar Signed-off-by: Sundaresan Sujaritha Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210730202119.23810-4-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h | 1 - .../gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 235 +++++++++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc.c | 3 + drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 7 + 4 files changed, 245 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index d832c8f11c11..ca538e5de940 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -135,7 +135,6 @@ enum intel_guc_action { INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007, INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008, INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009, - INTEL_GUC_ACTION_SLPC_REQUEST = 0x3003, INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, INTEL_GUC_ACTION_REGISTER_CONTEXT = 0x4502, INTEL_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503, diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h new file mode 100644 index 000000000000..7a8d4bfc5f6a --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h @@ -0,0 +1,235 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _GUC_ACTIONS_SLPC_ABI_H_ +#define _GUC_ACTIONS_SLPC_ABI_H_ + +#include +#include "i915_reg.h" + +/** + * DOC: SLPC SHARED DATA STRUCTURE + * + * +----+------+--------------------------------------------------------------+ + * | CL | Bytes| Description | + * +====+======+==============================================================+ + * | 1 | 0-3 | SHARED DATA SIZE | + * | +------+--------------------------------------------------------------+ + * | | 4-7 | GLOBAL STATE | + * | +------+--------------------------------------------------------------+ + * | | 8-11 | DISPLAY DATA ADDRESS | + * | +------+--------------------------------------------------------------+ + * | | 12:63| PADDING | + * +----+------+--------------------------------------------------------------+ + * | | 0:63 | PADDING(PLATFORM INFO) | + * +----+------+--------------------------------------------------------------+ + * | 3 | 0-3 | TASK STATE DATA | + * + +------+--------------------------------------------------------------+ + * | | 4:63 | PADDING | + * +----+------+--------------------------------------------------------------+ + * |4-21|0:1087| OVERRIDE PARAMS AND BIT FIELDS | + * +----+------+--------------------------------------------------------------+ + * | | | PADDING + EXTRA RESERVED PAGE | + * +----+------+--------------------------------------------------------------+ + */ + +/* + * SLPC exposes certain parameters for global configuration by the host. + * These are referred to as override parameters, because in most cases + * the host will not need to modify the default values used by SLPC. + * SLPC remembers the default values which allows the host to easily restore + * them by simply unsetting the override. The host can set or unset override + * parameters during SLPC (re-)initialization using the SLPC Reset event. + * The host can also set or unset override parameters on the fly using the + * Parameter Set and Parameter Unset events + */ + +#define SLPC_MAX_OVERRIDE_PARAMETERS 256 +#define SLPC_OVERRIDE_BITFIELD_SIZE \ + (SLPC_MAX_OVERRIDE_PARAMETERS / 32) + +#define SLPC_PAGE_SIZE_BYTES 4096 +#define SLPC_CACHELINE_SIZE_BYTES 64 +#define SLPC_SHARED_DATA_SIZE_BYTE_HEADER SLPC_CACHELINE_SIZE_BYTES +#define SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO SLPC_CACHELINE_SIZE_BYTES +#define SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE SLPC_CACHELINE_SIZE_BYTES +#define SLPC_SHARED_DATA_MODE_DEFN_TABLE_SIZE SLPC_PAGE_SIZE_BYTES +#define SLPC_SHARED_DATA_SIZE_BYTE_MAX (2 * SLPC_PAGE_SIZE_BYTES) + +/* + * Cacheline size aligned (Total size needed for + * SLPM_KMD_MAX_OVERRIDE_PARAMETERS=256 is 1088 bytes) + */ +#define SLPC_OVERRIDE_PARAMS_TOTAL_BYTES (((((SLPC_MAX_OVERRIDE_PARAMETERS * 4) \ + + ((SLPC_MAX_OVERRIDE_PARAMETERS / 32) * 4)) \ + + (SLPC_CACHELINE_SIZE_BYTES - 1)) / SLPC_CACHELINE_SIZE_BYTES) * \ + SLPC_CACHELINE_SIZE_BYTES) + +#define SLPC_SHARED_DATA_SIZE_BYTE_OTHER (SLPC_SHARED_DATA_SIZE_BYTE_MAX - \ + (SLPC_SHARED_DATA_SIZE_BYTE_HEADER \ + + SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO \ + + SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE \ + + SLPC_OVERRIDE_PARAMS_TOTAL_BYTES \ + + SLPC_SHARED_DATA_MODE_DEFN_TABLE_SIZE)) + +enum slpc_task_enable { + SLPC_PARAM_TASK_DEFAULT = 0, + SLPC_PARAM_TASK_ENABLED, + SLPC_PARAM_TASK_DISABLED, + SLPC_PARAM_TASK_UNKNOWN +}; + +enum slpc_global_state { + SLPC_GLOBAL_STATE_NOT_RUNNING = 0, + SLPC_GLOBAL_STATE_INITIALIZING = 1, + SLPC_GLOBAL_STATE_RESETTING = 2, + SLPC_GLOBAL_STATE_RUNNING = 3, + SLPC_GLOBAL_STATE_SHUTTING_DOWN = 4, + SLPC_GLOBAL_STATE_ERROR = 5 +}; + +enum slpc_param_id { + SLPC_PARAM_TASK_ENABLE_GTPERF = 0, + SLPC_PARAM_TASK_DISABLE_GTPERF = 1, + SLPC_PARAM_TASK_ENABLE_BALANCER = 2, + SLPC_PARAM_TASK_DISABLE_BALANCER = 3, + SLPC_PARAM_TASK_ENABLE_DCC = 4, + SLPC_PARAM_TASK_DISABLE_DCC = 5, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ = 6, + SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ = 7, + SLPC_PARAM_GLOBAL_MIN_GT_SLICE_FREQ_MHZ = 8, + SLPC_PARAM_GLOBAL_MAX_GT_SLICE_FREQ_MHZ = 9, + SLPC_PARAM_GTPERF_THRESHOLD_MAX_FPS = 10, + SLPC_PARAM_GLOBAL_DISABLE_GT_FREQ_MANAGEMENT = 11, + SLPC_PARAM_GTPERF_ENABLE_FRAMERATE_STALLING = 12, + SLPC_PARAM_GLOBAL_DISABLE_RC6_MODE_CHANGE = 13, + SLPC_PARAM_GLOBAL_OC_UNSLICE_FREQ_MHZ = 14, + SLPC_PARAM_GLOBAL_OC_SLICE_FREQ_MHZ = 15, + SLPC_PARAM_GLOBAL_ENABLE_IA_GT_BALANCING = 16, + SLPC_PARAM_GLOBAL_ENABLE_ADAPTIVE_BURST_TURBO = 17, + SLPC_PARAM_GLOBAL_ENABLE_EVAL_MODE = 18, + SLPC_PARAM_GLOBAL_ENABLE_BALANCER_IN_NON_GAMING_MODE = 19, + SLPC_PARAM_GLOBAL_RT_MODE_TURBO_FREQ_DELTA_MHZ = 20, + SLPC_PARAM_PWRGATE_RC_MODE = 21, + SLPC_PARAM_EDR_MODE_COMPUTE_TIMEOUT_MS = 22, + SLPC_PARAM_EDR_QOS_FREQ_MHZ = 23, + SLPC_PARAM_MEDIA_FF_RATIO_MODE = 24, + SLPC_PARAM_ENABLE_IA_FREQ_LIMITING = 25, + SLPC_PARAM_STRATEGIES = 26, + SLPC_PARAM_POWER_PROFILE = 27, + SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY = 28, + SLPC_MAX_PARAM = 32, +}; + +enum slpc_event_id { + SLPC_EVENT_RESET = 0, + SLPC_EVENT_SHUTDOWN = 1, + SLPC_EVENT_PLATFORM_INFO_CHANGE = 2, + SLPC_EVENT_DISPLAY_MODE_CHANGE = 3, + SLPC_EVENT_FLIP_COMPLETE = 4, + SLPC_EVENT_QUERY_TASK_STATE = 5, + SLPC_EVENT_PARAMETER_SET = 6, + SLPC_EVENT_PARAMETER_UNSET = 7, +}; + +struct slpc_task_state_data { + union { + u32 task_status_padding; + struct { + u32 status; +#define SLPC_GTPERF_TASK_ENABLED REG_BIT(0) +#define SLPC_DCC_TASK_ENABLED REG_BIT(11) +#define SLPC_IN_DCC REG_BIT(12) +#define SLPC_BALANCER_ENABLED REG_BIT(15) +#define SLPC_IBC_TASK_ENABLED REG_BIT(16) +#define SLPC_BALANCER_IA_LMT_ENABLED REG_BIT(17) +#define SLPC_BALANCER_IA_LMT_ACTIVE REG_BIT(18) + }; + }; + union { + u32 freq_padding; + struct { +#define SLPC_MAX_UNSLICE_FREQ_MASK REG_GENMASK(7, 0) +#define SLPC_MIN_UNSLICE_FREQ_MASK REG_GENMASK(15, 8) +#define SLPC_MAX_SLICE_FREQ_MASK REG_GENMASK(23, 16) +#define SLPC_MIN_SLICE_FREQ_MASK REG_GENMASK(31, 24) + u32 freq; + }; + }; +} __packed; + +struct slpc_shared_data_header { + /* Total size in bytes of this shared buffer. */ + u32 size; + u32 global_state; + u32 display_data_addr; +} __packed; + +struct slpc_override_params { + u32 bits[SLPC_OVERRIDE_BITFIELD_SIZE]; + u32 values[SLPC_MAX_OVERRIDE_PARAMETERS]; +} __packed; + +struct slpc_shared_data { + struct slpc_shared_data_header header; + u8 shared_data_header_pad[SLPC_SHARED_DATA_SIZE_BYTE_HEADER - + sizeof(struct slpc_shared_data_header)]; + + u8 platform_info_pad[SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO]; + + struct slpc_task_state_data task_state_data; + u8 task_state_data_pad[SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE - + sizeof(struct slpc_task_state_data)]; + + struct slpc_override_params override_params; + u8 override_params_pad[SLPC_OVERRIDE_PARAMS_TOTAL_BYTES - + sizeof(struct slpc_override_params)]; + + u8 shared_data_pad[SLPC_SHARED_DATA_SIZE_BYTE_OTHER]; + + /* PAGE 2 (4096 bytes), mode based parameter will be removed soon */ + u8 reserved_mode_definition[4096]; +} __packed; + +/** + * DOC: SLPC H2G MESSAGE FORMAT + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_PC_SLPM_REQUEST` = 0x3003 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:8 | **EVENT_ID** | + * + +-------+--------------------------------------------------------------+ + * | | 7:0 | **EVENT_ARGC** - number of data arguments | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | **EVENT_DATA1** | + * +---+-------+--------------------------------------------------------------+ + * |...| 31:0 | ... | + * +---+-------+--------------------------------------------------------------+ + * |2+n| 31:0 | **EVENT_DATAn** | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST 0x3003 + +#define HOST2GUC_PC_SLPC_REQUEST_MSG_MIN_LEN \ + (GUC_HXG_REQUEST_MSG_MIN_LEN + 1u) +#define HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS 9 +#define HOST2GUC_PC_SLPC_REQUEST_MSG_MAX_LEN \ + (HOST2GUC_PC_SLPC_REQUEST_REQUEST_MSG_MIN_LEN + \ + HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS) +#define HOST2GUC_PC_SLPC_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID (0xff << 8) +#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC (0xff << 0) +#define HOST2GUC_PC_SLPC_REQUEST_MSG_N_EVENT_DATA_N GUC_HXG_REQUEST_MSG_n_DATAn + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 39bc3c16057b..5b0f8c541b69 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -208,6 +208,9 @@ static u32 guc_ctl_feature_flags(struct intel_guc *guc) if (!intel_guc_submission_is_used(guc)) flags |= GUC_CTL_DISABLE_SCHEDULER; + if (intel_guc_slpc_is_used(guc)) + flags |= GUC_CTL_ENABLE_SLPC; + return flags; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 82534259b7ad..fa4be13c8854 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -12,6 +12,7 @@ #include "gt/intel_engine_types.h" #include "abi/guc_actions_abi.h" +#include "abi/guc_actions_slpc_abi.h" #include "abi/guc_errors_abi.h" #include "abi/guc_communication_mmio_abi.h" #include "abi/guc_communication_ctb_abi.h" @@ -95,6 +96,7 @@ #define GUC_CTL_WA 1 #define GUC_CTL_FEATURE 2 #define GUC_CTL_DISABLE_SCHEDULER (1 << 14) +#define GUC_CTL_ENABLE_SLPC BIT(2) #define GUC_CTL_DEBUG 3 #define GUC_LOG_VERBOSITY_SHIFT 0 @@ -141,6 +143,11 @@ #define GUC_ID_TO_ENGINE_INSTANCE(guc_id) \ (((guc_id) & GUC_ENGINE_INSTANCE_MASK) >> GUC_ENGINE_INSTANCE_SHIFT) +#define SLPC_EVENT(id, c) (\ +FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID, id) | \ +FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, c) \ +) + static inline u8 engine_class_to_guc_class(u8 class) { BUILD_BUG_ON(GUC_RENDER_CLASS != RENDER_CLASS); -- cgit v1.2.3 From 869cd27ece296be710d2039ba930abea3185268f Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Fri, 30 Jul 2021 13:21:09 -0700 Subject: drm/i915/guc/slpc: Allocate, initialize and release SLPC Allocate data structures for SLPC and functions for initializing on host side. v2: Address review comments (Michal W) v3: Remove unnecessary header includes (Michal W) v4: Rebase v5: Move allocation of shared data into slpc_init() (Michal W) Reviewed-by: Michal Wajdeczko Signed-off-by: Vinay Belgaumkar Signed-off-by: Sundaresan Sujaritha Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210730202119.23810-5-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc.c | 11 ++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 31 ++++++++++++++++++++++- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h | 2 ++ 3 files changed, 43 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 5b0f8c541b69..13d162353b1a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -336,6 +336,12 @@ int intel_guc_init(struct intel_guc *guc) goto err_ct; } + if (intel_guc_slpc_is_used(guc)) { + ret = intel_guc_slpc_init(&guc->slpc); + if (ret) + goto err_submission; + } + /* now that everything is perma-pinned, initialize the parameters */ guc_init_params(guc); @@ -346,6 +352,8 @@ int intel_guc_init(struct intel_guc *guc) return 0; +err_submission: + intel_guc_submission_fini(guc); err_ct: intel_guc_ct_fini(&guc->ct); err_ads: @@ -368,6 +376,9 @@ void intel_guc_fini(struct intel_guc *guc) i915_ggtt_disable_guc(gt->ggtt); + if (intel_guc_slpc_is_used(guc)) + intel_guc_slpc_fini(&guc->slpc); + if (intel_guc_submission_is_used(guc)) intel_guc_submission_fini(guc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 40950f1bf05c..377e09187e9f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -12,6 +12,16 @@ static inline struct intel_guc *slpc_to_guc(struct intel_guc_slpc *slpc) return container_of(slpc, struct intel_guc, slpc); } +static inline struct intel_gt *slpc_to_gt(struct intel_guc_slpc *slpc) +{ + return guc_to_gt(slpc_to_guc(slpc)); +} + +static inline struct drm_i915_private *slpc_to_i915(struct intel_guc_slpc *slpc) +{ + return slpc_to_gt(slpc)->i915; +} + static bool __detect_slpc_supported(struct intel_guc *guc) { /* GuC SLPC is unavailable for pre-Gen12 */ @@ -37,9 +47,28 @@ void intel_guc_slpc_init_early(struct intel_guc_slpc *slpc) int intel_guc_slpc_init(struct intel_guc_slpc *slpc) { - return 0; + struct intel_guc *guc = slpc_to_guc(slpc); + struct drm_i915_private *i915 = slpc_to_i915(slpc); + u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); + int err; + + GEM_BUG_ON(slpc->vma); + + err = intel_guc_allocate_and_map_vma(guc, size, &slpc->vma, (void **)&slpc->vaddr); + if (unlikely(err)) { + drm_err(&i915->drm, + "Failed to allocate SLPC struct (err=%pe)\n", + ERR_PTR(err)); + return err; + } + + return err; } void intel_guc_slpc_fini(struct intel_guc_slpc *slpc) { + if (!slpc->vma) + return; + + i915_vma_unpin_and_release(&slpc->vma, I915_VMA_RELEASE_MAP); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h index 769c162305a0..8bd753167234 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h @@ -9,6 +9,8 @@ #include struct intel_guc_slpc { + struct i915_vma *vma; + struct slpc_shared_data *vaddr; bool supported; bool selected; }; -- cgit v1.2.3 From 63c0eb30bfe9269d79f0cb9cbe6ff86b14928a19 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Fri, 30 Jul 2021 13:21:10 -0700 Subject: drm/i915/guc/slpc: Enable SLPC and add related H2G events Add methods for interacting with GuC for enabling SLPC. Enable SLPC after GuC submission has been established. GuC load will fail if SLPC cannot be successfully initialized. Add various helper methods to set/unset the parameters for SLPC. They can be set using H2G calls or directly setting bits in the shared data structure. v2: Address several review comments, add new helpers for decoding the SLPC min/max frequencies. Use masks instead of hardcoded constants. (Michal W) v3: Split global_state_to_string function, and check for positive non-zero return value from intel_guc_send() (Michal W) v4: Optimize the stringify function and other comments (Michal W) v5: Enable slpc as well before declaring GuC submission status (Michal W) v6: Checkpatch() Reviewed-by: Michal Wajdeczko Signed-off-by: Vinay Belgaumkar Signed-off-by: Sundaresan Sujaritha Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210730202119.23810-6-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 202 ++++++++++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h | 2 + drivers/gpu/drm/i915/gt/uc/intel_uc.c | 11 ++ 3 files changed, 215 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 377e09187e9f..f26cf07ec87b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -45,6 +45,40 @@ void intel_guc_slpc_init_early(struct intel_guc_slpc *slpc) slpc->selected = __guc_slpc_selected(guc); } +static void slpc_mem_set_param(struct slpc_shared_data *data, + u32 id, u32 value) +{ + GEM_BUG_ON(id >= SLPC_MAX_OVERRIDE_PARAMETERS); + /* + * When the flag bit is set, corresponding value will be read + * and applied by SLPC. + */ + data->override_params.bits[id >> 5] |= (1 << (id % 32)); + data->override_params.values[id] = value; +} + +static void slpc_mem_set_enabled(struct slpc_shared_data *data, + u8 enable_id, u8 disable_id) +{ + /* + * Enabling a param involves setting the enable_id + * to 1 and disable_id to 0. + */ + slpc_mem_set_param(data, enable_id, 1); + slpc_mem_set_param(data, disable_id, 0); +} + +static void slpc_mem_set_disabled(struct slpc_shared_data *data, + u8 enable_id, u8 disable_id) +{ + /* + * Disabling a param involves setting the enable_id + * to 0 and disable_id to 1. + */ + slpc_mem_set_param(data, disable_id, 1); + slpc_mem_set_param(data, enable_id, 0); +} + int intel_guc_slpc_init(struct intel_guc_slpc *slpc) { struct intel_guc *guc = slpc_to_guc(slpc); @@ -65,6 +99,174 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc) return err; } +static u32 slpc_get_state(struct intel_guc_slpc *slpc) +{ + struct slpc_shared_data *data; + + GEM_BUG_ON(!slpc->vma); + + drm_clflush_virt_range(slpc->vaddr, sizeof(u32)); + data = slpc->vaddr; + + return data->header.global_state; +} + +static bool slpc_is_running(struct intel_guc_slpc *slpc) +{ + return slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING; +} + +static int guc_action_slpc_query(struct intel_guc *guc, u32 offset) +{ + u32 request[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2), + offset, + 0, + }; + int ret; + + ret = intel_guc_send(guc, request, ARRAY_SIZE(request)); + + return ret > 0 ? -EPROTO : ret; +} + +static int slpc_query_task_state(struct intel_guc_slpc *slpc) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + struct drm_i915_private *i915 = slpc_to_i915(slpc); + u32 offset = intel_guc_ggtt_offset(guc, slpc->vma); + int ret; + + ret = guc_action_slpc_query(guc, offset); + if (unlikely(ret)) + drm_err(&i915->drm, "Failed to query task state (%pe)\n", + ERR_PTR(ret)); + + drm_clflush_virt_range(slpc->vaddr, SLPC_PAGE_SIZE_BYTES); + + return ret; +} + +static const char *slpc_global_state_to_string(enum slpc_global_state state) +{ + switch (state) { + case SLPC_GLOBAL_STATE_NOT_RUNNING: + return "not running"; + case SLPC_GLOBAL_STATE_INITIALIZING: + return "initializing"; + case SLPC_GLOBAL_STATE_RESETTING: + return "resetting"; + case SLPC_GLOBAL_STATE_RUNNING: + return "running"; + case SLPC_GLOBAL_STATE_SHUTTING_DOWN: + return "shutting down"; + case SLPC_GLOBAL_STATE_ERROR: + return "error"; + default: + return "unknown"; + } +} + +static const char *slpc_get_state_string(struct intel_guc_slpc *slpc) +{ + return slpc_global_state_to_string(slpc_get_state(slpc)); +} + +static int guc_action_slpc_reset(struct intel_guc *guc, u32 offset) +{ + u32 request[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_RESET, 2), + offset, + 0, + }; + int ret; + + ret = intel_guc_send(guc, request, ARRAY_SIZE(request)); + + return ret > 0 ? -EPROTO : ret; +} + +static int slpc_reset(struct intel_guc_slpc *slpc) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + struct intel_guc *guc = slpc_to_guc(slpc); + u32 offset = intel_guc_ggtt_offset(guc, slpc->vma); + int ret; + + ret = guc_action_slpc_reset(guc, offset); + + if (unlikely(ret < 0)) { + drm_err(&i915->drm, "SLPC reset action failed (%pe)\n", + ERR_PTR(ret)); + return ret; + } + + if (!ret) { + if (wait_for(slpc_is_running(slpc), SLPC_RESET_TIMEOUT_MS)) { + drm_err(&i915->drm, "SLPC not enabled! State = %s\n", + slpc_get_state_string(slpc)); + return -EIO; + } + } + + return 0; +} + +static void slpc_shared_data_reset(struct slpc_shared_data *data) +{ + memset(data, 0, sizeof(struct slpc_shared_data)); + + data->header.size = sizeof(struct slpc_shared_data); + + /* Enable only GTPERF task, disable others */ + slpc_mem_set_enabled(data, SLPC_PARAM_TASK_ENABLE_GTPERF, + SLPC_PARAM_TASK_DISABLE_GTPERF); + + slpc_mem_set_disabled(data, SLPC_PARAM_TASK_ENABLE_BALANCER, + SLPC_PARAM_TASK_DISABLE_BALANCER); + + slpc_mem_set_disabled(data, SLPC_PARAM_TASK_ENABLE_DCC, + SLPC_PARAM_TASK_DISABLE_DCC); +} + +/* + * intel_guc_slpc_enable() - Start SLPC + * @slpc: pointer to intel_guc_slpc. + * + * SLPC is enabled by setting up the shared data structure and + * sending reset event to GuC SLPC. Initial data is setup in + * intel_guc_slpc_init. Here we send the reset event. We do + * not currently need a slpc_disable since this is taken care + * of automatically when a reset/suspend occurs and the GuC + * CTB is destroyed. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + int ret; + + GEM_BUG_ON(!slpc->vma); + + slpc_shared_data_reset(slpc->vaddr); + + ret = slpc_reset(slpc); + if (unlikely(ret < 0)) { + drm_err(&i915->drm, "SLPC Reset event returned (%pe)\n", + ERR_PTR(ret)); + return ret; + } + + ret = slpc_query_task_state(slpc); + if (unlikely(ret < 0)) + return ret; + + return 0; +} + void intel_guc_slpc_fini(struct intel_guc_slpc *slpc) { if (!slpc->vma) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h index 8bd753167234..3cefe19b17b2 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h @@ -8,6 +8,8 @@ #include +#define SLPC_RESET_TIMEOUT_MS 5 + struct intel_guc_slpc { struct i915_vma *vma; struct slpc_shared_data *vaddr; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index e6bd9406c7b2..3e0cd1f05e3b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -500,12 +500,21 @@ static int __uc_init_hw(struct intel_uc *uc) if (intel_uc_uses_guc_submission(uc)) intel_guc_submission_enable(guc); + if (intel_uc_uses_guc_slpc(uc)) { + ret = intel_guc_slpc_enable(&guc->slpc); + if (ret) + goto err_submission; + } + drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n", intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_GUC), guc->fw.path, guc->fw.major_ver_found, guc->fw.minor_ver_found, "submission", enableddisabled(intel_uc_uses_guc_submission(uc))); + drm_info(&i915->drm, "GuC SLPC: %s\n", + enableddisabled(intel_uc_uses_guc_slpc(uc))); + if (intel_uc_uses_huc(uc)) { drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n", intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_HUC), @@ -520,6 +529,8 @@ static int __uc_init_hw(struct intel_uc *uc) /* * We've failed to load the firmware :( */ +err_submission: + intel_guc_submission_disable(guc); err_log_capture: __uc_capture_load_err_log(uc); err_out: -- cgit v1.2.3 From db301cffd8a285bbe73879671e6d666a0b654138 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Fri, 30 Jul 2021 13:21:11 -0700 Subject: drm/i915/guc/slpc: Remove BUG_ON in guc_submission_disable The assumption when it was added was that GT would not be holding any gt_pm references. However, uc_init is called from gt_init_hw, which holds a forcewake ref. If SLPC enable fails, we will still be holding this ref, which will result in the BUG_ON. Reviewed-by: Matthew Brost Signed-off-by: Vinay Belgaumkar Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210730202119.23810-7-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 89ff0e4b4bc7..87d8dc8f51b9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -2536,10 +2536,6 @@ void intel_guc_submission_enable(struct intel_guc *guc) void intel_guc_submission_disable(struct intel_guc *guc) { - struct intel_gt *gt = guc_to_gt(guc); - - GEM_BUG_ON(gt->awake); /* GT should be parked first */ - /* Note: By the time we're here, GuC may have already been reset */ } -- cgit v1.2.3 From d41f6f82d319ca0effae840553cac74425328ddf Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Fri, 30 Jul 2021 13:21:12 -0700 Subject: drm/i915/guc/slpc: Add methods to set min/max frequency Add param set h2g helpers to set the min and max frequencies for use by SLPC. v2: Address review comments (Michal W) v3: Check for positive error code (Michal W) v4: Print generic error in set_param (Michal W) Reviewed-by: Michal Wajdeczko Signed-off-by: Sundaresan Sujaritha Signed-off-by: Vinay Belgaumkar Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210730202119.23810-8-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 89 +++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 2 + 2 files changed, 91 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index f26cf07ec87b..2635feb59576 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -111,6 +111,21 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc) return data->header.global_state; } +static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) +{ + u32 request[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2), + id, + value, + }; + int ret; + + ret = intel_guc_send(guc, request, ARRAY_SIZE(request)); + + return ret > 0 ? -EPROTO : ret; +} + static bool slpc_is_running(struct intel_guc_slpc *slpc) { return slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING; @@ -148,6 +163,22 @@ static int slpc_query_task_state(struct intel_guc_slpc *slpc) return ret; } +static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + struct drm_i915_private *i915 = slpc_to_i915(slpc); + int ret; + + GEM_BUG_ON(id >= SLPC_MAX_PARAM); + + ret = guc_action_slpc_set_param(guc, id, value); + if (ret) + drm_err(&i915->drm, "Failed to set param %d to %u (%pe)\n", + id, value, ERR_PTR(ret)); + + return ret; +} + static const char *slpc_global_state_to_string(enum slpc_global_state state) { switch (state) { @@ -231,6 +262,64 @@ static void slpc_shared_data_reset(struct slpc_shared_data *data) SLPC_PARAM_TASK_DISABLE_DCC); } +/** + * intel_guc_slpc_set_max_freq() - Set max frequency limit for SLPC. + * @slpc: pointer to intel_guc_slpc. + * @val: frequency (MHz) + * + * This function will invoke GuC SLPC action to update the max frequency + * limit for unslice. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + int ret; + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + ret = slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ, + val); + + /* Return standardized err code for sysfs calls */ + if (ret) + ret = -EIO; + } + + return ret; +} + +/** + * intel_guc_slpc_set_min_freq() - Set min frequency limit for SLPC. + * @slpc: pointer to intel_guc_slpc. + * @val: frequency (MHz) + * + * This function will invoke GuC SLPC action to update the min unslice + * frequency. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + int ret; + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + ret = slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, + val); + + /* Return standardized err code for sysfs calls */ + if (ret) + ret = -EIO; + } + + return ret; +} + /* * intel_guc_slpc_enable() - Start SLPC * @slpc: pointer to intel_guc_slpc. diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h index bc139682ad0f..788d87ff7b58 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -29,5 +29,7 @@ void intel_guc_slpc_init_early(struct intel_guc_slpc *slpc); int intel_guc_slpc_init(struct intel_guc_slpc *slpc); int intel_guc_slpc_enable(struct intel_guc_slpc *slpc); void intel_guc_slpc_fini(struct intel_guc_slpc *slpc); +int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val); +int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val); #endif -- cgit v1.2.3 From c279bec18e97735aeb4ab2c931da778bb31f4f02 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Fri, 30 Jul 2021 13:21:13 -0700 Subject: drm/i915/guc/slpc: Add get max/min freq hooks Add helpers to read the min/max frequency being used by SLPC. This is done by send a H2G command which forces SLPC to update the shared data struct which can then be read. These helpers will be used in a sysfs patch later on. v2: Address review comments (Michal W) v3: Return err in case of query failure (Michal W) v4: Move decode_min/max_freq to this patch Reviewed-by: Michal Wajdeczko Signed-off-by: Vinay Belgaumkar Signed-off-by: Sundaresan Sujaritha Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210730202119.23810-9-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 76 +++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 2 + 2 files changed, 78 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 2635feb59576..8bfd1880b2f5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -245,6 +245,28 @@ static int slpc_reset(struct intel_guc_slpc *slpc) return 0; } +static u32 slpc_decode_min_freq(struct intel_guc_slpc *slpc) +{ + struct slpc_shared_data *data = slpc->vaddr; + + GEM_BUG_ON(!slpc->vma); + + return DIV_ROUND_CLOSEST(REG_FIELD_GET(SLPC_MIN_UNSLICE_FREQ_MASK, + data->task_state_data.freq) * + GT_FREQUENCY_MULTIPLIER, GEN9_FREQ_SCALER); +} + +static u32 slpc_decode_max_freq(struct intel_guc_slpc *slpc) +{ + struct slpc_shared_data *data = slpc->vaddr; + + GEM_BUG_ON(!slpc->vma); + + return DIV_ROUND_CLOSEST(REG_FIELD_GET(SLPC_MAX_UNSLICE_FREQ_MASK, + data->task_state_data.freq) * + GT_FREQUENCY_MULTIPLIER, GEN9_FREQ_SCALER); +} + static void slpc_shared_data_reset(struct slpc_shared_data *data) { memset(data, 0, sizeof(struct slpc_shared_data)); @@ -291,6 +313,33 @@ int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val) return ret; } +/** + * intel_guc_slpc_get_max_freq() - Get max frequency limit for SLPC. + * @slpc: pointer to intel_guc_slpc. + * @val: pointer to val which will hold max frequency (MHz) + * + * This function will invoke GuC SLPC action to read the max frequency + * limit for unslice. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + int ret = 0; + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + /* Force GuC to update task data */ + ret = slpc_query_task_state(slpc); + + if (!ret) + *val = slpc_decode_max_freq(slpc); + } + + return ret; +} + /** * intel_guc_slpc_set_min_freq() - Set min frequency limit for SLPC. * @slpc: pointer to intel_guc_slpc. @@ -320,6 +369,33 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val) return ret; } +/** + * intel_guc_slpc_get_min_freq() - Get min frequency limit for SLPC. + * @slpc: pointer to intel_guc_slpc. + * @val: pointer to val which will hold min frequency (MHz) + * + * This function will invoke GuC SLPC action to read the min frequency + * limit for unslice. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + int ret = 0; + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + /* Force GuC to update task data */ + ret = slpc_query_task_state(slpc); + + if (!ret) + *val = slpc_decode_min_freq(slpc); + } + + return ret; +} + /* * intel_guc_slpc_enable() - Start SLPC * @slpc: pointer to intel_guc_slpc. diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h index 788d87ff7b58..78a7893ce489 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -31,5 +31,7 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc); void intel_guc_slpc_fini(struct intel_guc_slpc *slpc); int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val); int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val); +int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val); +int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val); #endif -- cgit v1.2.3 From f1928ac2a18ffa0784783de09cc6fb5ca17c92f9 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Fri, 30 Jul 2021 13:21:14 -0700 Subject: drm/i915/guc/slpc: Add debugfs for SLPC info This prints out relevant SLPC info from the SLPC shared structure. We will send a H2G message which forces SLPC to update the shared data structure with latest information before reading it. v2: Address review comments (Michal W) v3: Remove unnecessary tasks from slpc_info (Michal W) v4: Rename function to intel_guc_slpc_print_info() (Michal W) v5: checkpatch() Reviewed-by: Michal Wajdeczko Signed-off-by: Vinay Belgaumkar Signed-off-by: Sundaresan Sujaritha Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210730202119.23810-10-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c | 22 +++++++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 29 ++++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 3 +++ 3 files changed, 54 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c index 72ddfff42f7d..887c8c8f35db 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c @@ -12,6 +12,7 @@ #include "gt/uc/intel_guc_ct.h" #include "gt/uc/intel_guc_ads.h" #include "gt/uc/intel_guc_submission.h" +#include "gt/uc/intel_guc_slpc.h" static int guc_info_show(struct seq_file *m, void *data) { @@ -50,11 +51,32 @@ static int guc_registered_contexts_show(struct seq_file *m, void *data) } DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_registered_contexts); +static int guc_slpc_info_show(struct seq_file *m, void *unused) +{ + struct intel_guc *guc = m->private; + struct intel_guc_slpc *slpc = &guc->slpc; + struct drm_printer p = drm_seq_file_printer(m); + + if (!intel_guc_slpc_is_used(guc)) + return -ENODEV; + + return intel_guc_slpc_print_info(slpc, &p); +} +DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_slpc_info); + +static bool intel_eval_slpc_support(void *data) +{ + struct intel_guc *guc = (struct intel_guc *)data; + + return intel_guc_slpc_is_used(guc); +} + void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root) { static const struct debugfs_gt_file files[] = { { "guc_info", &guc_info_fops, NULL }, { "guc_registered_contexts", &guc_registered_contexts_fops, NULL }, + { "guc_slpc_info", &guc_slpc_info_fops, &intel_eval_slpc_support}, }; if (!intel_guc_is_supported(guc)) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 8bfd1880b2f5..4bf7c150330a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -432,6 +432,35 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) return 0; } +int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + struct slpc_shared_data *data = slpc->vaddr; + struct slpc_task_state_data *slpc_tasks; + intel_wakeref_t wakeref; + int ret = 0; + + GEM_BUG_ON(!slpc->vma); + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + ret = slpc_query_task_state(slpc); + + if (!ret) { + slpc_tasks = &data->task_state_data; + + drm_printf(p, "\tSLPC state: %s\n", slpc_get_state_string(slpc)); + drm_printf(p, "\tGTPERF task active: %s\n", + yesno(slpc_tasks->status & SLPC_GTPERF_TASK_ENABLED)); + drm_printf(p, "\tMax freq: %u MHz\n", + slpc_decode_max_freq(slpc)); + drm_printf(p, "\tMin freq: %u MHz\n", + slpc_decode_min_freq(slpc)); + } + } + + return ret; +} + void intel_guc_slpc_fini(struct intel_guc_slpc *slpc) { if (!slpc->vma) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h index 78a7893ce489..3ffd4f2e3151 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -9,6 +9,8 @@ #include "intel_guc_submission.h" #include "intel_guc_slpc_types.h" +struct drm_printer; + static inline bool intel_guc_slpc_is_supported(struct intel_guc *guc) { return guc->slpc.supported; @@ -33,5 +35,6 @@ int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val); int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val); int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val); int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val); +int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p); #endif -- cgit v1.2.3 From 899a0fd73a41f3e3babedbc2e5bf73fd38a4461f Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Fri, 30 Jul 2021 13:21:15 -0700 Subject: drm/i915/guc/slpc: Enable ARAT timer interrupt This interrupt is enabled during RPS initialization, and now needs to be done by SLPC code. It allows ARAT timer expiry interrupts to get forwarded to GuC. v2: Fix comment (Matthew Brost) v3: checkpatch() Reviewed-by: Matthew Brost Signed-off-by: Vinay Belgaumkar Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210730202119.23810-11-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 17 +++++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 2 ++ drivers/gpu/drm/i915/gt/uc/intel_uc.c | 8 ++++++++ 3 files changed, 27 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 4bf7c150330a..ebabc84d4e39 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -396,6 +396,21 @@ int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val) return ret; } +void intel_guc_pm_intrmsk_enable(struct intel_gt *gt) +{ + u32 pm_intrmsk_mbz = 0; + + /* + * Allow GuC to receive ARAT timer expiry event. + * This interrupt register is setup by RPS code + * when host based Turbo is enabled. + */ + pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; + + intel_uncore_rmw(gt->uncore, + GEN6_PMINTRMSK, pm_intrmsk_mbz, 0); +} + /* * intel_guc_slpc_enable() - Start SLPC * @slpc: pointer to intel_guc_slpc. @@ -429,6 +444,8 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) if (unlikely(ret < 0)) return ret; + intel_guc_pm_intrmsk_enable(&i915->gt); + return 0; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h index 3ffd4f2e3151..e45054d5b9b4 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -9,6 +9,7 @@ #include "intel_guc_submission.h" #include "intel_guc_slpc_types.h" +struct intel_gt; struct drm_printer; static inline bool intel_guc_slpc_is_supported(struct intel_guc *guc) @@ -36,5 +37,6 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val); int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val); int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val); int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p); +void intel_guc_pm_intrmsk_enable(struct intel_gt *gt); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 3e0cd1f05e3b..b104fb7607eb 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -655,6 +655,7 @@ void intel_uc_suspend(struct intel_uc *uc) static int __uc_resume(struct intel_uc *uc, bool enable_communication) { struct intel_guc *guc = &uc->guc; + struct intel_gt *gt = guc_to_gt(guc); int err; if (!intel_guc_is_fw_running(guc)) @@ -666,6 +667,13 @@ static int __uc_resume(struct intel_uc *uc, bool enable_communication) if (enable_communication) guc_enable_communication(guc); + /* If we are only resuming GuC communication but not reloading + * GuC, we need to ensure the ARAT timer interrupt is enabled + * again. In case of GuC reload, it is enabled during SLPC enable. + */ + if (enable_communication && intel_uc_uses_guc_slpc(uc)) + intel_guc_pm_intrmsk_enable(gt); + err = intel_guc_resume(guc); if (err) { DRM_DEBUG_DRIVER("Failed to resume GuC, err=%d", err); -- cgit v1.2.3 From 025cb07bebfaf9e3703f902cce92b4656129a62b Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Fri, 30 Jul 2021 13:21:16 -0700 Subject: drm/i915/guc/slpc: Cache platform frequency limits Cache rp0, rp1 and rpn platform limits into SLPC structure for range checking while setting min/max frequencies. Also add "soft" limits which keep track of frequency changes made from userland. These are initially set to platform min and max. v2: Address review comments (Michal W) v3: Formatting (Michal W) v4: Add separate function to parse rp values (Michal W) v5: Perform range checking for set min/max (Michal W) v6: checkpatch() and rename static functions (Michal W) v7: check ret code while setting SLPC limits (Michal W) Signed-off-by: Vinay Belgaumkar Reviewed-by: Michal Wajdeczko Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210730202119.23810-12-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 139 ++++++++++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h | 9 ++ drivers/gpu/drm/i915/i915_reg.h | 3 + 3 files changed, 151 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index ebabc84d4e39..65a3e7fdb2b2 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -96,6 +96,9 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc) return err; } + slpc->max_freq_softlimit = 0; + slpc->min_freq_softlimit = 0; + return err; } @@ -126,6 +129,17 @@ static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) return ret > 0 ? -EPROTO : ret; } +static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id) +{ + u32 request[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 2), + id, + }; + + return intel_guc_send(guc, request, ARRAY_SIZE(request)); +} + static bool slpc_is_running(struct intel_guc_slpc *slpc) { return slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING; @@ -179,6 +193,16 @@ static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value) return ret; } +static int slpc_unset_param(struct intel_guc_slpc *slpc, + u8 id) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + + GEM_BUG_ON(id >= SLPC_MAX_PARAM); + + return guc_action_slpc_unset_param(guc, id); +} + static const char *slpc_global_state_to_string(enum slpc_global_state state) { switch (state) { @@ -300,6 +324,11 @@ int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val) intel_wakeref_t wakeref; int ret; + if (val < slpc->min_freq || + val > slpc->rp0_freq || + val < slpc->min_freq_softlimit) + return -EINVAL; + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { ret = slpc_set_param(slpc, SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ, @@ -310,6 +339,9 @@ int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val) ret = -EIO; } + if (!ret) + slpc->max_freq_softlimit = val; + return ret; } @@ -356,6 +388,11 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val) intel_wakeref_t wakeref; int ret; + if (val < slpc->min_freq || + val > slpc->rp0_freq || + val > slpc->max_freq_softlimit) + return -EINVAL; + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { ret = slpc_set_param(slpc, SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, @@ -366,6 +403,9 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val) ret = -EIO; } + if (!ret) + slpc->min_freq_softlimit = val; + return ret; } @@ -411,6 +451,79 @@ void intel_guc_pm_intrmsk_enable(struct intel_gt *gt) GEN6_PMINTRMSK, pm_intrmsk_mbz, 0); } +static int slpc_set_softlimits(struct intel_guc_slpc *slpc) +{ + int ret = 0; + + /* + * Softlimits are initially equivalent to platform limits + * unless they have deviated from defaults, in which case, + * we retain the values and set min/max accordingly. + */ + if (!slpc->max_freq_softlimit) + slpc->max_freq_softlimit = slpc->rp0_freq; + else if (slpc->max_freq_softlimit != slpc->rp0_freq) + ret = intel_guc_slpc_set_max_freq(slpc, + slpc->max_freq_softlimit); + + if (unlikely(ret)) + return ret; + + if (!slpc->min_freq_softlimit) + slpc->min_freq_softlimit = slpc->min_freq; + else if (slpc->min_freq_softlimit != slpc->min_freq) + return intel_guc_slpc_set_min_freq(slpc, + slpc->min_freq_softlimit); + + return 0; +} + +static int slpc_ignore_eff_freq(struct intel_guc_slpc *slpc, bool ignore) +{ + int ret = 0; + + if (ignore) { + ret = slpc_set_param(slpc, + SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, + ignore); + if (!ret) + return slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, + slpc->min_freq); + } else { + ret = slpc_unset_param(slpc, + SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY); + if (!ret) + return slpc_unset_param(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ); + } + + return ret; +} + +static int slpc_use_fused_rp0(struct intel_guc_slpc *slpc) +{ + /* Force SLPC to used platform rp0 */ + return slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ, + slpc->rp0_freq); +} + +static void slpc_get_rp_values(struct intel_guc_slpc *slpc) +{ + u32 rp_state_cap; + + rp_state_cap = intel_uncore_read(slpc_to_gt(slpc)->uncore, + GEN6_RP_STATE_CAP); + + slpc->rp0_freq = REG_FIELD_GET(RP0_CAP_MASK, rp_state_cap) * + GT_FREQUENCY_MULTIPLIER; + slpc->rp1_freq = REG_FIELD_GET(RP1_CAP_MASK, rp_state_cap) * + GT_FREQUENCY_MULTIPLIER; + slpc->min_freq = REG_FIELD_GET(RPN_CAP_MASK, rp_state_cap) * + GT_FREQUENCY_MULTIPLIER; +} + /* * intel_guc_slpc_enable() - Start SLPC * @slpc: pointer to intel_guc_slpc. @@ -446,6 +559,32 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) intel_guc_pm_intrmsk_enable(&i915->gt); + slpc_get_rp_values(slpc); + + /* Ignore efficient freq and set min to platform min */ + ret = slpc_ignore_eff_freq(slpc, true); + if (unlikely(ret)) { + drm_err(&i915->drm, "Failed to set SLPC min to RPn (%pe)\n", + ERR_PTR(ret)); + return ret; + } + + /* Set SLPC max limit to RP0 */ + ret = slpc_use_fused_rp0(slpc); + if (unlikely(ret)) { + drm_err(&i915->drm, "Failed to set SLPC max to RP0 (%pe)\n", + ERR_PTR(ret)); + return ret; + } + + /* Revert SLPC min/max to softlimits if necessary */ + ret = slpc_set_softlimits(slpc); + if (unlikely(ret)) { + drm_err(&i915->drm, "Failed to set SLPC softlimits (%pe)\n", + ERR_PTR(ret)); + return ret; + } + return 0; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h index 3cefe19b17b2..41d13527666f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h @@ -15,6 +15,15 @@ struct intel_guc_slpc { struct slpc_shared_data *vaddr; bool supported; bool selected; + + /* platform frequency limits */ + u32 min_freq; + u32 rp0_freq; + u32 rp1_freq; + + /* frequency softlimits */ + u32 min_freq_softlimit; + u32 max_freq_softlimit; }; #endif diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 5603377e06ca..f3a445f79a36 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4109,6 +4109,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define BXT_GT_PERF_STATUS _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x7070) #define GEN6_RP_STATE_LIMITS _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5994) #define GEN6_RP_STATE_CAP _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5998) +#define RP0_CAP_MASK REG_GENMASK(7, 0) +#define RP1_CAP_MASK REG_GENMASK(15, 8) +#define RPN_CAP_MASK REG_GENMASK(23, 16) #define BXT_RP_STATE_CAP _MMIO(0x138170) #define GEN9_RP_STATE_LIMITS _MMIO(0x138148) -- cgit v1.2.3 From 41e5c17ebfc20bd840993e9c36e5993298e3fe6d Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Fri, 30 Jul 2021 13:21:17 -0700 Subject: drm/i915/guc/slpc: Sysfs hooks for SLPC Update the get/set min/max freq hooks to work for SLPC case as well. Consolidate helpers for requested/min/max frequency get/set to intel_rps where the proper action can be taken depending on whether SLPC is enabled. v2: Add wrappers for getting rp0/1/n frequencies, update softlimits in set min/max SLPC functions. Also check for boundary conditions before setting them. v3: Address review comments (Michal W) v4: Add helper for host part of intel_rps_set_freq helpers (Michal W) v5: checkpatch() Reviewed-by: Michal Wajdeczko Acked-by: Michal Wajdeczko Signed-off-by: Vinay Belgaumkar Signed-off-by: Tvrtko Ursulin Signed-off-by: Sujaritha Sundaresan Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210730202119.23810-13-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/gt/intel_rps.c | 177 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_rps.h | 10 ++ drivers/gpu/drm/i915/i915_pmu.c | 2 +- drivers/gpu/drm/i915/i915_reg.h | 2 + drivers/gpu/drm/i915/i915_sysfs.c | 83 ++++------------- 5 files changed, 207 insertions(+), 67 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index cdfce5b8d861..30acfae125aa 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -37,6 +37,13 @@ static struct intel_uncore *rps_to_uncore(struct intel_rps *rps) return rps_to_gt(rps)->uncore; } +static struct intel_guc_slpc *rps_to_slpc(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + return >->uc.guc.slpc; +} + static bool rps_uses_slpc(struct intel_rps *rps) { struct intel_gt *gt = rps_to_gt(rps); @@ -1963,6 +1970,176 @@ u32 intel_rps_read_actual_frequency(struct intel_rps *rps) return freq; } +u32 intel_rps_read_punit_req(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + + return intel_uncore_read(uncore, GEN6_RPNSWREQ); +} + +static u32 intel_rps_get_req(u32 pureq) +{ + u32 req = pureq >> GEN9_SW_REQ_UNSLICE_RATIO_SHIFT; + + return req; +} + +u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps) +{ + u32 freq = intel_rps_get_req(intel_rps_read_punit_req(rps)); + + return intel_gpu_freq(rps, freq); +} + +u32 intel_rps_get_requested_frequency(struct intel_rps *rps) +{ + if (rps_uses_slpc(rps)) + return intel_rps_read_punit_req_frequency(rps); + else + return intel_gpu_freq(rps, rps->cur_freq); +} + +u32 intel_rps_get_max_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->max_freq_softlimit; + else + return intel_gpu_freq(rps, rps->max_freq_softlimit); +} + +u32 intel_rps_get_rp0_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->rp0_freq; + else + return intel_gpu_freq(rps, rps->rp0_freq); +} + +u32 intel_rps_get_rp1_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->rp1_freq; + else + return intel_gpu_freq(rps, rps->rp1_freq); +} + +u32 intel_rps_get_rpn_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->min_freq; + else + return intel_gpu_freq(rps, rps->min_freq); +} + +static int set_max_freq(struct intel_rps *rps, u32 val) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + int ret = 0; + + mutex_lock(&rps->lock); + + val = intel_freq_opcode(rps, val); + if (val < rps->min_freq || + val > rps->max_freq || + val < rps->min_freq_softlimit) { + ret = -EINVAL; + goto unlock; + } + + if (val > rps->rp0_freq) + drm_dbg(&i915->drm, "User requested overclocking to %d\n", + intel_gpu_freq(rps, val)); + + rps->max_freq_softlimit = val; + + val = clamp_t(int, rps->cur_freq, + rps->min_freq_softlimit, + rps->max_freq_softlimit); + + /* + * We still need *_set_rps to process the new max_delay and + * update the interrupt limits and PMINTRMSK even though + * frequency request may be unchanged. + */ + intel_rps_set(rps, val); + +unlock: + mutex_unlock(&rps->lock); + + return ret; +} + +int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return intel_guc_slpc_set_max_freq(slpc, val); + else + return set_max_freq(rps, val); +} + +u32 intel_rps_get_min_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->min_freq_softlimit; + else + return intel_gpu_freq(rps, rps->min_freq_softlimit); +} + +static int set_min_freq(struct intel_rps *rps, u32 val) +{ + int ret = 0; + + mutex_lock(&rps->lock); + + val = intel_freq_opcode(rps, val); + if (val < rps->min_freq || + val > rps->max_freq || + val > rps->max_freq_softlimit) { + ret = -EINVAL; + goto unlock; + } + + rps->min_freq_softlimit = val; + + val = clamp_t(int, rps->cur_freq, + rps->min_freq_softlimit, + rps->max_freq_softlimit); + + /* + * We still need *_set_rps to process the new min_delay and + * update the interrupt limits and PMINTRMSK even though + * frequency request may be unchanged. + */ + intel_rps_set(rps, val); + +unlock: + mutex_unlock(&rps->lock); + + return ret; +} + +int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return intel_guc_slpc_set_min_freq(slpc, val); + else + return set_min_freq(rps, val); +} + /* External interface for intel_ips.ko */ static struct drm_i915_private __rcu *ips_mchdev; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index 1d2cfc98b510..4213bcce1667 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -31,6 +31,16 @@ int intel_gpu_freq(struct intel_rps *rps, int val); int intel_freq_opcode(struct intel_rps *rps, int val); u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1); u32 intel_rps_read_actual_frequency(struct intel_rps *rps); +u32 intel_rps_get_requested_frequency(struct intel_rps *rps); +u32 intel_rps_get_min_frequency(struct intel_rps *rps); +int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val); +u32 intel_rps_get_max_frequency(struct intel_rps *rps); +int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val); +u32 intel_rps_get_rp0_frequency(struct intel_rps *rps); +u32 intel_rps_get_rp1_frequency(struct intel_rps *rps); +u32 intel_rps_get_rpn_frequency(struct intel_rps *rps); +u32 intel_rps_read_punit_req(struct intel_rps *rps); +u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps); void gen5_rps_irq_handler(struct intel_rps *rps); void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index eca92076f31d..0b488d49694c 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -407,7 +407,7 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) { add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ], - intel_gpu_freq(rps, rps->cur_freq), + intel_rps_get_requested_frequency(rps), period_ns / 1000); } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f3a445f79a36..2416a5e63f2a 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -9229,6 +9229,8 @@ enum { #define GEN9_FREQUENCY(x) ((x) << 23) #define GEN6_OFFSET(x) ((x) << 19) #define GEN6_AGGRESSIVE_TURBO (0 << 15) +#define GEN9_SW_REQ_UNSLICE_RATIO_SHIFT 23 + #define GEN6_RC_VIDEO_FREQ _MMIO(0xA00C) #define GEN6_RC_CONTROL _MMIO(0xA090) #define GEN6_RC_CTL_RC6pp_ENABLE (1 << 16) diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 873bf996ceb5..cdf0e9c6fd73 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -272,7 +272,7 @@ static ssize_t gt_cur_freq_mhz_show(struct device *kdev, struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); struct intel_rps *rps = &i915->gt.rps; - return sysfs_emit(buf, "%d\n", intel_gpu_freq(rps, rps->cur_freq)); + return sysfs_emit(buf, "%d\n", intel_rps_get_requested_frequency(rps)); } static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) @@ -326,9 +326,10 @@ static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev, static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt.rps; + struct intel_gt *gt = &dev_priv->gt; + struct intel_rps *rps = >->rps; - return sysfs_emit(buf, "%d\n", intel_gpu_freq(rps, rps->max_freq_softlimit)); + return sysfs_emit(buf, "%d\n", intel_rps_get_max_frequency(rps)); } static ssize_t gt_max_freq_mhz_store(struct device *kdev, @@ -336,7 +337,8 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt.rps; + struct intel_gt *gt = &dev_priv->gt; + struct intel_rps *rps = >->rps; ssize_t ret; u32 val; @@ -344,53 +346,26 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, if (ret) return ret; - mutex_lock(&rps->lock); - - val = intel_freq_opcode(rps, val); - if (val < rps->min_freq || - val > rps->max_freq || - val < rps->min_freq_softlimit) { - ret = -EINVAL; - goto unlock; - } - - if (val > rps->rp0_freq) - DRM_DEBUG("User requested overclocking to %d\n", - intel_gpu_freq(rps, val)); - - rps->max_freq_softlimit = val; - - val = clamp_t(int, rps->cur_freq, - rps->min_freq_softlimit, - rps->max_freq_softlimit); - - /* - * We still need *_set_rps to process the new max_delay and - * update the interrupt limits and PMINTRMSK even though - * frequency request may be unchanged. - */ - intel_rps_set(rps, val); - -unlock: - mutex_unlock(&rps->lock); + ret = intel_rps_set_max_frequency(rps, val); return ret ?: count; } static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt.rps; + struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); + struct intel_gt *gt = &i915->gt; + struct intel_rps *rps = >->rps; - return sysfs_emit(buf, "%d\n", intel_gpu_freq(rps, rps->min_freq_softlimit)); + return sysfs_emit(buf, "%d\n", intel_rps_get_min_frequency(rps)); } static ssize_t gt_min_freq_mhz_store(struct device *kdev, struct device_attribute *attr, const char *buf, size_t count) { - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt.rps; + struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &i915->gt.rps; ssize_t ret; u32 val; @@ -398,31 +373,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, if (ret) return ret; - mutex_lock(&rps->lock); - - val = intel_freq_opcode(rps, val); - if (val < rps->min_freq || - val > rps->max_freq || - val > rps->max_freq_softlimit) { - ret = -EINVAL; - goto unlock; - } - - rps->min_freq_softlimit = val; - - val = clamp_t(int, rps->cur_freq, - rps->min_freq_softlimit, - rps->max_freq_softlimit); - - /* - * We still need *_set_rps to process the new min_delay and - * update the interrupt limits and PMINTRMSK even though - * frequency request may be unchanged. - */ - intel_rps_set(rps, val); - -unlock: - mutex_unlock(&rps->lock); + ret = intel_rps_set_min_frequency(rps, val); return ret ?: count; } @@ -448,11 +399,11 @@ static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr u32 val; if (attr == &dev_attr_gt_RP0_freq_mhz) - val = intel_gpu_freq(rps, rps->rp0_freq); + val = intel_rps_get_rp0_frequency(rps); else if (attr == &dev_attr_gt_RP1_freq_mhz) - val = intel_gpu_freq(rps, rps->rp1_freq); + val = intel_rps_get_rp1_frequency(rps); else if (attr == &dev_attr_gt_RPn_freq_mhz) - val = intel_gpu_freq(rps, rps->min_freq); + val = intel_rps_get_rpn_frequency(rps); else BUG(); -- cgit v1.2.3 From 8ee2c227822e755d0dd4375337e626c9c79c6363 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Fri, 30 Jul 2021 13:21:18 -0700 Subject: drm/i915/guc/slpc: Add SLPC selftest Tests that exercise the SLPC get/set frequency interfaces. Clamp_max will set max frequency to multiple levels and check that SLPC requests frequency lower than or equal to it. Clamp_min will set min frequency to different levels and check if SLPC requests are higher or equal to those levels. v2: Address review comments (Michal W) v3: Checkpatch() corrections v4: Remove unnecessary header file (Matthew Brost) v5: checkpatch() and define const for 50/3 (Matthew Brost) Reviewed-by: Matthew Brost Signed-off-by: Vinay Belgaumkar Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210730202119.23810-14-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/gt/intel_rps.c | 1 + drivers/gpu/drm/i915/gt/selftest_slpc.c | 311 +++++++++++++++++++++ .../gpu/drm/i915/selftests/i915_live_selftests.h | 1 + 3 files changed, 313 insertions(+) create mode 100644 drivers/gpu/drm/i915/gt/selftest_slpc.c (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 30acfae125aa..d812b27835f8 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2333,4 +2333,5 @@ EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_rps.c" +#include "selftest_slpc.c" #endif diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c new file mode 100644 index 000000000000..9334bad131a2 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -0,0 +1,311 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#define NUM_STEPS 5 +#define H2G_DELAY 50000 +#define delay_for_h2g() usleep_range(H2G_DELAY, H2G_DELAY + 10000) +#define FREQUENCY_REQ_UNIT DIV_ROUND_CLOSEST(GT_FREQUENCY_MULTIPLIER, \ + GEN9_FREQ_SCALER) + +static int slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 freq) +{ + int ret; + + ret = intel_guc_slpc_set_min_freq(slpc, freq); + if (ret) + pr_err("Could not set min frequency to [%u]\n", freq); + else /* Delay to ensure h2g completes */ + delay_for_h2g(); + + return ret; +} + +static int slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 freq) +{ + int ret; + + ret = intel_guc_slpc_set_max_freq(slpc, freq); + if (ret) + pr_err("Could not set maximum frequency [%u]\n", + freq); + else /* Delay to ensure h2g completes */ + delay_for_h2g(); + + return ret; +} + +static int live_slpc_clamp_min(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_gt *gt = &i915->gt; + struct intel_guc_slpc *slpc = >->uc.guc.slpc; + struct intel_rps *rps = >->rps; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + u32 slpc_min_freq, slpc_max_freq; + int err = 0; + + if (!intel_uc_uses_guc_slpc(>->uc)) + return 0; + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + if (intel_guc_slpc_get_max_freq(slpc, &slpc_max_freq)) { + pr_err("Could not get SLPC max freq\n"); + return -EIO; + } + + if (intel_guc_slpc_get_min_freq(slpc, &slpc_min_freq)) { + pr_err("Could not get SLPC min freq\n"); + return -EIO; + } + + if (slpc_min_freq == slpc_max_freq) { + pr_err("Min/Max are fused to the same value\n"); + return -EINVAL; + } + + intel_gt_pm_wait_for_idle(gt); + intel_gt_pm_get(gt); + for_each_engine(engine, gt, id) { + struct i915_request *rq; + u32 step, min_freq, req_freq; + u32 act_freq, max_act_freq; + + if (!intel_engine_can_store_dword(engine)) + continue; + + /* Go from min to max in 5 steps */ + step = (slpc_max_freq - slpc_min_freq) / NUM_STEPS; + max_act_freq = slpc_min_freq; + for (min_freq = slpc_min_freq; min_freq < slpc_max_freq; + min_freq += step) { + err = slpc_set_min_freq(slpc, min_freq); + if (err) + break; + + st_engine_heartbeat_disable(engine); + + rq = igt_spinner_create_request(&spin, + engine->kernel_context, + MI_NOOP); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + st_engine_heartbeat_enable(engine); + break; + } + + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + pr_err("%s: Spinner did not start\n", + engine->name); + igt_spinner_end(&spin); + st_engine_heartbeat_enable(engine); + intel_gt_set_wedged(engine->gt); + err = -EIO; + break; + } + + /* Wait for GuC to detect business and raise + * requested frequency if necessary. + */ + delay_for_h2g(); + + req_freq = intel_rps_read_punit_req_frequency(rps); + + /* GuC requests freq in multiples of 50/3 MHz */ + if (req_freq < (min_freq - FREQUENCY_REQ_UNIT)) { + pr_err("SWReq is %d, should be at least %d\n", req_freq, + min_freq - FREQUENCY_REQ_UNIT); + igt_spinner_end(&spin); + st_engine_heartbeat_enable(engine); + err = -EINVAL; + break; + } + + act_freq = intel_rps_read_actual_frequency(rps); + if (act_freq > max_act_freq) + max_act_freq = act_freq; + + igt_spinner_end(&spin); + st_engine_heartbeat_enable(engine); + } + + pr_info("Max actual frequency for %s was %d\n", + engine->name, max_act_freq); + + /* Actual frequency should rise above min */ + if (max_act_freq == slpc_min_freq) { + pr_err("Actual freq did not rise above min\n"); + err = -EINVAL; + } + + if (err) + break; + } + + /* Restore min/max frequencies */ + slpc_set_max_freq(slpc, slpc_max_freq); + slpc_set_min_freq(slpc, slpc_min_freq); + + if (igt_flush_test(gt->i915)) + err = -EIO; + + intel_gt_pm_put(gt); + igt_spinner_fini(&spin); + intel_gt_pm_wait_for_idle(gt); + + return err; +} + +static int live_slpc_clamp_max(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_gt *gt = &i915->gt; + struct intel_guc_slpc *slpc; + struct intel_rps *rps; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + int err = 0; + u32 slpc_min_freq, slpc_max_freq; + + slpc = >->uc.guc.slpc; + rps = >->rps; + + if (!intel_uc_uses_guc_slpc(>->uc)) + return 0; + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + if (intel_guc_slpc_get_max_freq(slpc, &slpc_max_freq)) { + pr_err("Could not get SLPC max freq\n"); + return -EIO; + } + + if (intel_guc_slpc_get_min_freq(slpc, &slpc_min_freq)) { + pr_err("Could not get SLPC min freq\n"); + return -EIO; + } + + if (slpc_min_freq == slpc_max_freq) { + pr_err("Min/Max are fused to the same value\n"); + return -EINVAL; + } + + intel_gt_pm_wait_for_idle(gt); + intel_gt_pm_get(gt); + for_each_engine(engine, gt, id) { + struct i915_request *rq; + u32 max_freq, req_freq; + u32 act_freq, max_act_freq; + u32 step; + + if (!intel_engine_can_store_dword(engine)) + continue; + + /* Go from max to min in 5 steps */ + step = (slpc_max_freq - slpc_min_freq) / NUM_STEPS; + max_act_freq = slpc_min_freq; + for (max_freq = slpc_max_freq; max_freq > slpc_min_freq; + max_freq -= step) { + err = slpc_set_max_freq(slpc, max_freq); + if (err) + break; + + st_engine_heartbeat_disable(engine); + + rq = igt_spinner_create_request(&spin, + engine->kernel_context, + MI_NOOP); + if (IS_ERR(rq)) { + st_engine_heartbeat_enable(engine); + err = PTR_ERR(rq); + break; + } + + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + pr_err("%s: SLPC spinner did not start\n", + engine->name); + igt_spinner_end(&spin); + st_engine_heartbeat_enable(engine); + intel_gt_set_wedged(engine->gt); + err = -EIO; + break; + } + + delay_for_h2g(); + + /* Verify that SWREQ indeed was set to specific value */ + req_freq = intel_rps_read_punit_req_frequency(rps); + + /* GuC requests freq in multiples of 50/3 MHz */ + if (req_freq > (max_freq + FREQUENCY_REQ_UNIT)) { + pr_err("SWReq is %d, should be at most %d\n", req_freq, + max_freq + FREQUENCY_REQ_UNIT); + igt_spinner_end(&spin); + st_engine_heartbeat_enable(engine); + err = -EINVAL; + break; + } + + act_freq = intel_rps_read_actual_frequency(rps); + if (act_freq > max_act_freq) + max_act_freq = act_freq; + + st_engine_heartbeat_enable(engine); + igt_spinner_end(&spin); + + if (err) + break; + } + + pr_info("Max actual frequency for %s was %d\n", + engine->name, max_act_freq); + + /* Actual frequency should rise above min */ + if (max_act_freq == slpc_min_freq) { + pr_err("Actual freq did not rise above min\n"); + err = -EINVAL; + } + + if (igt_flush_test(gt->i915)) { + err = -EIO; + break; + } + + if (err) + break; + } + + /* Restore min/max freq */ + slpc_set_max_freq(slpc, slpc_max_freq); + slpc_set_min_freq(slpc, slpc_min_freq); + + intel_gt_pm_put(gt); + igt_spinner_fini(&spin); + intel_gt_pm_wait_for_idle(gt); + + return err; +} + +int intel_slpc_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_slpc_clamp_max), + SUBTEST(live_slpc_clamp_min), + }; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return i915_live_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index e2fd1b61af71..1746a56dda06 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -47,5 +47,6 @@ selftest(hangcheck, intel_hangcheck_live_selftests) selftest(execlists, intel_execlists_live_selftests) selftest(ring_submission, intel_ring_submission_live_selftests) selftest(perf, i915_perf_live_selftests) +selftest(slpc, intel_slpc_live_selftests) /* Here be dragons: keep last to run last! */ selftest(late_gt_pm, intel_gt_pm_late_selftests) -- cgit v1.2.3 From 216d56c5da5c93cb2ae91b911ad6ccfc33e03bee Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Fri, 30 Jul 2021 13:21:19 -0700 Subject: drm/i915/guc/rc: Setup and enable GuCRC feature This feature hands over the control of HW RC6 to the GuC. GuC decides when to put HW into RC6 based on it's internal busyness algorithms. GuCRC needs GuC submission to be enabled, and only supported on Gen12+ for now. When GuCRC is enabled, do not set HW RC6. Use a H2G message to tell GuC to enable GuCRC. When disabling RC6, tell GuC to revert RC6 control back to KMD. KMD is still responsible for enabling everything related to Coarse Power Gating though. v2: Address comments (Michal W) v3: Don't set hysterisis values when GuCRC is used (Matt Roper) v4: checkpatch() Reviewed-by: Michal Wajdeczko Signed-off-by: Vinay Belgaumkar Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210730202119.23810-15-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gt/intel_rc6.c | 47 +++++++++----- drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h | 6 ++ drivers/gpu/drm/i915/gt/uc/intel_guc.c | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc.h | 2 + drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c | 80 ++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_rc.h | 31 +++++++++ drivers/gpu/drm/i915/gt/uc/intel_uc.h | 2 + 8 files changed, 155 insertions(+), 15 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_rc.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 134b0edf4a2e..949719ba048e 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -186,6 +186,7 @@ i915-y += gt/uc/intel_uc.o \ gt/uc/intel_guc_fw.o \ gt/uc/intel_guc_log.o \ gt/uc/intel_guc_log_debugfs.o \ + gt/uc/intel_guc_rc.o \ gt/uc/intel_guc_slpc.o \ gt/uc/intel_guc_submission.o \ gt/uc/intel_huc.o \ diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index a7d13fe35b2e..799d382eea79 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -62,20 +62,25 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) u32 pg_enable; int i; - /* 2b: Program RC6 thresholds.*/ - set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); - set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150); + /* + * With GuCRC, these parameters are set by GuC + */ + if (!intel_uc_uses_guc_rc(>->uc)) { + /* 2b: Program RC6 thresholds.*/ + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); + set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150); - set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ - set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - for_each_engine(engine, rc6_to_gt(rc6), id) - set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + for_each_engine(engine, rc6_to_gt(rc6), id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); - set(uncore, GUC_MAX_IDLE_COUNT, 0xA); + set(uncore, GUC_MAX_IDLE_COUNT, 0xA); - set(uncore, GEN6_RC_SLEEP, 0); + set(uncore, GEN6_RC_SLEEP, 0); - set(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ + set(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ + } /* * 2c: Program Coarse Power Gating Policies. @@ -98,11 +103,19 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 60); set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60); - /* 3a: Enable RC6 */ - rc6->ctl_enable = - GEN6_RC_CTL_HW_ENABLE | - GEN6_RC_CTL_RC6_ENABLE | - GEN6_RC_CTL_EI_MODE(1); + /* 3a: Enable RC6 + * + * With GuCRC, we do not enable bit 31 of RC_CTL, + * thus allowing GuC to control RC6 entry/exit fully instead. + * We will not set the HW ENABLE and EI bits + */ + if (!intel_guc_rc_enable(>->uc.guc)) + rc6->ctl_enable = GEN6_RC_CTL_RC6_ENABLE; + else + rc6->ctl_enable = + GEN6_RC_CTL_HW_ENABLE | + GEN6_RC_CTL_RC6_ENABLE | + GEN6_RC_CTL_EI_MODE(1); pg_enable = GEN9_RENDER_PG_ENABLE | @@ -513,6 +526,10 @@ static void __intel_rc6_disable(struct intel_rc6 *rc6) { struct drm_i915_private *i915 = rc6_to_i915(rc6); struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_gt *gt = rc6_to_gt(rc6); + + /* Take control of RC6 back from GuC */ + intel_guc_rc_disable(>->uc.guc); intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); if (GRAPHICS_VER(i915) >= 9) diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index ca538e5de940..8ff582222aff 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -135,6 +135,7 @@ enum intel_guc_action { INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007, INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008, INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009, + INTEL_GUC_ACTION_SETUP_PC_GUCRC = 0x3004, INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, INTEL_GUC_ACTION_REGISTER_CONTEXT = 0x4502, INTEL_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503, @@ -145,6 +146,11 @@ enum intel_guc_action { INTEL_GUC_ACTION_LIMIT }; +enum intel_guc_rc_options { + INTEL_GUCRC_HOST_CONTROL, + INTEL_GUCRC_FIRMWARE_CONTROL, +}; + enum intel_guc_preempt_options { INTEL_GUC_PREEMPT_OPTION_DROP_WORK_Q = 0x4, INTEL_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q = 0x8, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 13d162353b1a..fbfcae727d7f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -159,6 +159,7 @@ void intel_guc_init_early(struct intel_guc *guc) intel_guc_log_init_early(&guc->log); intel_guc_submission_init_early(guc); intel_guc_slpc_init_early(&guc->slpc); + intel_guc_rc_init_early(guc); mutex_init(&guc->send_mutex); spin_lock_init(&guc->irq_lock); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 7da11a0b6059..2e27fe59786b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -59,6 +59,8 @@ struct intel_guc { bool submission_supported; bool submission_selected; + bool rc_supported; + bool rc_selected; struct i915_vma *ads_vma; struct __guc_ads_blob *ads_blob; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c new file mode 100644 index 000000000000..fc805d466d99 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "intel_guc_rc.h" +#include "gt/intel_gt.h" +#include "i915_drv.h" + +static bool __guc_rc_supported(struct intel_guc *guc) +{ + /* GuC RC is unavailable for pre-Gen12 */ + return guc->submission_supported && + GRAPHICS_VER(guc_to_gt(guc)->i915) >= 12; +} + +static bool __guc_rc_selected(struct intel_guc *guc) +{ + if (!intel_guc_rc_is_supported(guc)) + return false; + + return guc->submission_selected; +} + +void intel_guc_rc_init_early(struct intel_guc *guc) +{ + guc->rc_supported = __guc_rc_supported(guc); + guc->rc_selected = __guc_rc_selected(guc); +} + +static int guc_action_control_gucrc(struct intel_guc *guc, bool enable) +{ + u32 rc_mode = enable ? INTEL_GUCRC_FIRMWARE_CONTROL : + INTEL_GUCRC_HOST_CONTROL; + u32 action[] = { + INTEL_GUC_ACTION_SETUP_PC_GUCRC, + rc_mode + }; + int ret; + + ret = intel_guc_send(guc, action, ARRAY_SIZE(action)); + ret = ret > 0 ? -EPROTO : ret; + + return ret; +} + +static int __guc_rc_control(struct intel_guc *guc, bool enable) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct drm_device *drm = &guc_to_gt(guc)->i915->drm; + int ret; + + if (!intel_uc_uses_guc_rc(>->uc)) + return -EOPNOTSUPP; + + if (!intel_guc_is_ready(guc)) + return -EINVAL; + + ret = guc_action_control_gucrc(guc, enable); + if (ret) { + drm_err(drm, "Failed to %s GuC RC (%pe)\n", + enabledisable(enable), ERR_PTR(ret)); + return ret; + } + + drm_info(>->i915->drm, "GuC RC: %s\n", + enableddisabled(enable)); + + return 0; +} + +int intel_guc_rc_enable(struct intel_guc *guc) +{ + return __guc_rc_control(guc, true); +} + +int intel_guc_rc_disable(struct intel_guc *guc) +{ + return __guc_rc_control(guc, false); +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.h new file mode 100644 index 000000000000..57e86c337838 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _INTEL_GUC_RC_H_ +#define _INTEL_GUC_RC_H_ + +#include "intel_guc_submission.h" + +void intel_guc_rc_init_early(struct intel_guc *guc); + +static inline bool intel_guc_rc_is_supported(struct intel_guc *guc) +{ + return guc->rc_supported; +} + +static inline bool intel_guc_rc_is_wanted(struct intel_guc *guc) +{ + return guc->submission_selected && intel_guc_rc_is_supported(guc); +} + +static inline bool intel_guc_rc_is_used(struct intel_guc *guc) +{ + return intel_guc_submission_is_used(guc) && intel_guc_rc_is_wanted(guc); +} + +int intel_guc_rc_enable(struct intel_guc *guc); +int intel_guc_rc_disable(struct intel_guc *guc); + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h index 925a58ca6b94..866b462821c0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h @@ -7,6 +7,7 @@ #define _INTEL_UC_H_ #include "intel_guc.h" +#include "intel_guc_rc.h" #include "intel_guc_submission.h" #include "intel_guc_slpc.h" #include "intel_huc.h" @@ -85,6 +86,7 @@ uc_state_checkers(guc, guc); uc_state_checkers(huc, huc); uc_state_checkers(guc, guc_submission); uc_state_checkers(guc, guc_slpc); +uc_state_checkers(guc, guc_rc); #undef uc_state_checkers #undef __uc_state_checker -- cgit v1.2.3 From e05316366040429ccce7068de17066b1cbf944bb Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 29 Jul 2021 09:59:53 -0700 Subject: drm/i915/dg2: Add forcewake table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DG2 forcewake table is very similar to the one used by XeHP SDV (and both platforms are even presented as a single table in the bspec). For the most part DG2 starts using a few additional ranges that were 'reserved' on XeHP SDV and stops using some others. However there is a single range (0xd800-0xd87f) that needs to be handled differently between the two platforms (it needs GT wake on XeHP SDV, but render wake on DG2) so unless we want to wake both domains (which could waste power) or define new types of forcewake domains for this special case we need to have separate tables for the two platforms. Let's define the ranges for both platforms with a parameterized macro so that we don't actually need to duplicate everything in the code. It should be fine for DG2 to re-use the Xe_HP shadow register list so we can continue to use the 'xehpsdv' MMIO write functions and don't need to spin up a separate DG2 instance. Bspec: 66534 Cc: Daniele Ceraolo Spurio Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20210729170008.2836648-4-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 305 ++++++++++++++++++++---------------- 1 file changed, 168 insertions(+), 137 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 676b0052f01e..0c35acfcd6da 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1317,143 +1317,170 @@ static const struct intel_forcewake_range __gen12_fw_ranges[] = { 0x1d3f00 - 0x1d3fff: VD2 */ }; -/* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ -static const struct intel_forcewake_range __xehp_fw_ranges[] = { - GEN_FW_RANGE(0x0, 0x1fff, 0), /* - 0x0 - 0xaff: reserved - 0xb00 - 0x1fff: always on */ - GEN_FW_RANGE(0x2000, 0x26ff, FORCEWAKE_RENDER), - GEN_FW_RANGE(0x2700, 0x4aff, FORCEWAKE_GT), - GEN_FW_RANGE(0x4b00, 0x51ff, 0), /* - 0x4b00 - 0x4fff: reserved - 0x5000 - 0x51ff: always on */ - GEN_FW_RANGE(0x5200, 0x7fff, FORCEWAKE_RENDER), - GEN_FW_RANGE(0x8000, 0x813f, FORCEWAKE_GT), - GEN_FW_RANGE(0x8140, 0x815f, FORCEWAKE_RENDER), - GEN_FW_RANGE(0x8160, 0x81ff, 0), /* - 0x8160 - 0x817f: reserved - 0x8180 - 0x81ff: always on */ - GEN_FW_RANGE(0x8200, 0x82ff, FORCEWAKE_GT), - GEN_FW_RANGE(0x8300, 0x84ff, FORCEWAKE_RENDER), - GEN_FW_RANGE(0x8500, 0x94cf, FORCEWAKE_GT), /* - 0x8500 - 0x87ff: gt - 0x8800 - 0x8fff: reserved - 0x9000 - 0x947f: gt - 0x9480 - 0x94cf: reserved */ - GEN_FW_RANGE(0x94d0, 0x955f, FORCEWAKE_RENDER), - GEN_FW_RANGE(0x9560, 0x97ff, 0), /* - 0x9560 - 0x95ff: always on - 0x9600 - 0x97ff: reserved */ - GEN_FW_RANGE(0x9800, 0xcfff, FORCEWAKE_GT), /* - 0x9800 - 0xb4ff: gt - 0xb500 - 0xbfff: reserved - 0xc000 - 0xcfff: gt */ - GEN_FW_RANGE(0xd000, 0xd7ff, 0), - GEN_FW_RANGE(0xd800, 0xdbff, FORCEWAKE_GT), - GEN_FW_RANGE(0xdc00, 0xdcff, FORCEWAKE_RENDER), - GEN_FW_RANGE(0xdd00, 0xde7f, FORCEWAKE_GT), /* - 0xdd00 - 0xddff: gt - 0xde00 - 0xde7f: reserved */ - GEN_FW_RANGE(0xde80, 0xe8ff, FORCEWAKE_RENDER), /* - 0xde80 - 0xdfff: render - 0xe000 - 0xe0ff: reserved - 0xe100 - 0xe8ff: render */ - GEN_FW_RANGE(0xe900, 0xffff, FORCEWAKE_GT), /* - 0xe900 - 0xe9ff: gt - 0xea00 - 0xefff: reserved - 0xf000 - 0xffff: gt */ - GEN_FW_RANGE(0x10000, 0x13fff, 0), /* - 0x10000 - 0x11fff: reserved - 0x12000 - 0x127ff: always on - 0x12800 - 0x13fff: reserved */ - GEN_FW_RANGE(0x14000, 0x141ff, FORCEWAKE_MEDIA_VDBOX0), - GEN_FW_RANGE(0x14200, 0x143ff, FORCEWAKE_MEDIA_VDBOX2), - GEN_FW_RANGE(0x14400, 0x145ff, FORCEWAKE_MEDIA_VDBOX4), - GEN_FW_RANGE(0x14600, 0x147ff, FORCEWAKE_MEDIA_VDBOX6), - GEN_FW_RANGE(0x14800, 0x1ffff, FORCEWAKE_RENDER), /* - 0x14800 - 0x14fff: render - 0x15000 - 0x16dff: reserved - 0x16e00 - 0x1ffff: render */ - GEN_FW_RANGE(0x20000, 0x21fff, FORCEWAKE_MEDIA_VDBOX0), /* - 0x20000 - 0x20fff: VD0 - 0x21000 - 0x21fff: reserved */ - GEN_FW_RANGE(0x22000, 0x23fff, FORCEWAKE_GT), - GEN_FW_RANGE(0x24000, 0x2417f, 0), /* - 0x24000 - 0x2407f: always on - 0x24080 - 0x2417f: reserved */ - GEN_FW_RANGE(0x24180, 0x249ff, FORCEWAKE_GT), /* - 0x24180 - 0x241ff: gt - 0x24200 - 0x249ff: reserved */ - GEN_FW_RANGE(0x24a00, 0x251ff, FORCEWAKE_RENDER), /* - 0x24a00 - 0x24a7f: render - 0x24a80 - 0x251ff: reserved */ - GEN_FW_RANGE(0x25200, 0x25fff, FORCEWAKE_GT), /* - 0x25200 - 0x252ff: gt - 0x25300 - 0x25fff: reserved */ - GEN_FW_RANGE(0x26000, 0x2ffff, FORCEWAKE_RENDER), /* - 0x26000 - 0x27fff: render - 0x28000 - 0x29fff: reserved - 0x2a000 - 0x2ffff: undocumented */ - GEN_FW_RANGE(0x30000, 0x3ffff, FORCEWAKE_GT), - GEN_FW_RANGE(0x40000, 0x1bffff, 0), - GEN_FW_RANGE(0x1c0000, 0x1c3fff, FORCEWAKE_MEDIA_VDBOX0), /* - 0x1c0000 - 0x1c2bff: VD0 - 0x1c2c00 - 0x1c2cff: reserved - 0x1c2d00 - 0x1c2dff: VD0 - 0x1c2e00 - 0x1c3eff: reserved - 0x1c3f00 - 0x1c3fff: VD0 */ - GEN_FW_RANGE(0x1c4000, 0x1c7fff, FORCEWAKE_MEDIA_VDBOX1), /* - 0x1c4000 - 0x1c6bff: VD1 - 0x1c6c00 - 0x1c6cff: reserved - 0x1c6d00 - 0x1c6dff: VD1 - 0x1c6e00 - 0x1c7fff: reserved */ - GEN_FW_RANGE(0x1c8000, 0x1cbfff, FORCEWAKE_MEDIA_VEBOX0), /* - 0x1c8000 - 0x1ca0ff: VE0 - 0x1ca100 - 0x1cbfff: reserved */ - GEN_FW_RANGE(0x1cc000, 0x1ccfff, FORCEWAKE_MEDIA_VDBOX0), - GEN_FW_RANGE(0x1cd000, 0x1cdfff, FORCEWAKE_MEDIA_VDBOX2), - GEN_FW_RANGE(0x1ce000, 0x1cefff, FORCEWAKE_MEDIA_VDBOX4), - GEN_FW_RANGE(0x1cf000, 0x1cffff, FORCEWAKE_MEDIA_VDBOX6), - GEN_FW_RANGE(0x1d0000, 0x1d3fff, FORCEWAKE_MEDIA_VDBOX2), /* - 0x1d0000 - 0x1d2bff: VD2 - 0x1d2c00 - 0x1d2cff: reserved - 0x1d2d00 - 0x1d2dff: VD2 - 0x1d2e00 - 0x1d3eff: reserved - 0x1d3f00 - 0x1d3fff: VD2 */ - GEN_FW_RANGE(0x1d4000, 0x1d7fff, FORCEWAKE_MEDIA_VDBOX3), /* - 0x1d4000 - 0x1d6bff: VD3 - 0x1d6c00 - 0x1d6cff: reserved - 0x1d6d00 - 0x1d6dff: VD3 - 0x1d6e00 - 0x1d7fff: reserved */ - GEN_FW_RANGE(0x1d8000, 0x1dffff, FORCEWAKE_MEDIA_VEBOX1), /* - 0x1d8000 - 0x1da0ff: VE1 - 0x1da100 - 0x1dffff: reserved */ - GEN_FW_RANGE(0x1e0000, 0x1e3fff, FORCEWAKE_MEDIA_VDBOX4), /* - 0x1e0000 - 0x1e2bff: VD4 - 0x1e2c00 - 0x1e2cff: reserved - 0x1e2d00 - 0x1e2dff: VD4 - 0x1e2e00 - 0x1e3eff: reserved - 0x1e3f00 - 0x1e3fff: VD4 */ - GEN_FW_RANGE(0x1e4000, 0x1e7fff, FORCEWAKE_MEDIA_VDBOX5), /* - 0x1e4000 - 0x1e6bff: VD5 - 0x1e6c00 - 0x1e6cff: reserved - 0x1e6d00 - 0x1e6dff: VD5 - 0x1e6e00 - 0x1e7fff: reserved */ - GEN_FW_RANGE(0x1e8000, 0x1effff, FORCEWAKE_MEDIA_VEBOX2), /* - 0x1e8000 - 0x1ea0ff: VE2 - 0x1ea100 - 0x1effff: reserved */ - GEN_FW_RANGE(0x1f0000, 0x1f3fff, FORCEWAKE_MEDIA_VDBOX6), /* - 0x1f0000 - 0x1f2bff: VD6 - 0x1f2c00 - 0x1f2cff: reserved - 0x1f2d00 - 0x1f2dff: VD6 - 0x1f2e00 - 0x1f3eff: reserved - 0x1f3f00 - 0x1f3fff: VD6 */ - GEN_FW_RANGE(0x1f4000, 0x1f7fff, FORCEWAKE_MEDIA_VDBOX7), /* - 0x1f4000 - 0x1f6bff: VD7 - 0x1f6c00 - 0x1f6cff: reserved - 0x1f6d00 - 0x1f6dff: VD7 - 0x1f6e00 - 0x1f7fff: reserved */ +/* + * Graphics IP version 12.55 brings a slight change to the 0xd800 range, + * switching it from the GT domain to the render domain. + * + * *Must* be sorted by offset ranges! See intel_fw_table_check(). + */ +#define XEHP_FWRANGES(FW_RANGE_D800) \ + GEN_FW_RANGE(0x0, 0x1fff, 0), /* \ + 0x0 - 0xaff: reserved \ + 0xb00 - 0x1fff: always on */ \ + GEN_FW_RANGE(0x2000, 0x26ff, FORCEWAKE_RENDER), \ + GEN_FW_RANGE(0x2700, 0x4aff, FORCEWAKE_GT), \ + GEN_FW_RANGE(0x4b00, 0x51ff, 0), /* \ + 0x4b00 - 0x4fff: reserved \ + 0x5000 - 0x51ff: always on */ \ + GEN_FW_RANGE(0x5200, 0x7fff, FORCEWAKE_RENDER), \ + GEN_FW_RANGE(0x8000, 0x813f, FORCEWAKE_GT), \ + GEN_FW_RANGE(0x8140, 0x815f, FORCEWAKE_RENDER), \ + GEN_FW_RANGE(0x8160, 0x81ff, 0), /* \ + 0x8160 - 0x817f: reserved \ + 0x8180 - 0x81ff: always on */ \ + GEN_FW_RANGE(0x8200, 0x82ff, FORCEWAKE_GT), \ + GEN_FW_RANGE(0x8300, 0x84ff, FORCEWAKE_RENDER), \ + GEN_FW_RANGE(0x8500, 0x8cff, FORCEWAKE_GT), /* \ + 0x8500 - 0x87ff: gt \ + 0x8800 - 0x8c7f: reserved \ + 0x8c80 - 0x8cff: gt (DG2 only) */ \ + GEN_FW_RANGE(0x8d00, 0x8fff, FORCEWAKE_RENDER), /* \ + 0x8d00 - 0x8dff: render (DG2 only) \ + 0x8e00 - 0x8fff: reserved */ \ + GEN_FW_RANGE(0x9000, 0x94cf, FORCEWAKE_GT), /* \ + 0x9000 - 0x947f: gt \ + 0x9480 - 0x94cf: reserved */ \ + GEN_FW_RANGE(0x94d0, 0x955f, FORCEWAKE_RENDER), \ + GEN_FW_RANGE(0x9560, 0x967f, 0), /* \ + 0x9560 - 0x95ff: always on \ + 0x9600 - 0x967f: reserved */ \ + GEN_FW_RANGE(0x9680, 0x97ff, FORCEWAKE_RENDER), /* \ + 0x9680 - 0x96ff: render (DG2 only) \ + 0x9700 - 0x97ff: reserved */ \ + GEN_FW_RANGE(0x9800, 0xcfff, FORCEWAKE_GT), /* \ + 0x9800 - 0xb4ff: gt \ + 0xb500 - 0xbfff: reserved \ + 0xc000 - 0xcfff: gt */ \ + GEN_FW_RANGE(0xd000, 0xd7ff, 0), \ + GEN_FW_RANGE(0xd800, 0xd87f, FW_RANGE_D800), \ + GEN_FW_RANGE(0xd880, 0xdbff, FORCEWAKE_GT), \ + GEN_FW_RANGE(0xdc00, 0xdcff, FORCEWAKE_RENDER), \ + GEN_FW_RANGE(0xdd00, 0xde7f, FORCEWAKE_GT), /* \ + 0xdd00 - 0xddff: gt \ + 0xde00 - 0xde7f: reserved */ \ + GEN_FW_RANGE(0xde80, 0xe8ff, FORCEWAKE_RENDER), /* \ + 0xde80 - 0xdfff: render \ + 0xe000 - 0xe0ff: reserved \ + 0xe100 - 0xe8ff: render */ \ + GEN_FW_RANGE(0xe900, 0xffff, FORCEWAKE_GT), /* \ + 0xe900 - 0xe9ff: gt \ + 0xea00 - 0xefff: reserved \ + 0xf000 - 0xffff: gt */ \ + GEN_FW_RANGE(0x10000, 0x12fff, 0), /* \ + 0x10000 - 0x11fff: reserved \ + 0x12000 - 0x127ff: always on \ + 0x12800 - 0x12fff: reserved */ \ + GEN_FW_RANGE(0x13000, 0x131ff, FORCEWAKE_MEDIA_VDBOX0), /* DG2 only */ \ + GEN_FW_RANGE(0x13200, 0x13fff, FORCEWAKE_MEDIA_VDBOX2), /* \ + 0x13200 - 0x133ff: VD2 (DG2 only) \ + 0x13400 - 0x13fff: reserved */ \ + GEN_FW_RANGE(0x14000, 0x141ff, FORCEWAKE_MEDIA_VDBOX0), /* XEHPSDV only */ \ + GEN_FW_RANGE(0x14200, 0x143ff, FORCEWAKE_MEDIA_VDBOX2), /* XEHPSDV only */ \ + GEN_FW_RANGE(0x14400, 0x145ff, FORCEWAKE_MEDIA_VDBOX4), /* XEHPSDV only */ \ + GEN_FW_RANGE(0x14600, 0x147ff, FORCEWAKE_MEDIA_VDBOX6), /* XEHPSDV only */ \ + GEN_FW_RANGE(0x14800, 0x14fff, FORCEWAKE_RENDER), \ + GEN_FW_RANGE(0x15000, 0x16dff, FORCEWAKE_GT), /* \ + 0x15000 - 0x15fff: gt (DG2 only) \ + 0x16000 - 0x16dff: reserved */ \ + GEN_FW_RANGE(0x16e00, 0x1ffff, FORCEWAKE_RENDER), \ + GEN_FW_RANGE(0x20000, 0x21fff, FORCEWAKE_MEDIA_VDBOX0), /* \ + 0x20000 - 0x20fff: VD0 (XEHPSDV only) \ + 0x21000 - 0x21fff: reserved */ \ + GEN_FW_RANGE(0x22000, 0x23fff, FORCEWAKE_GT), \ + GEN_FW_RANGE(0x24000, 0x2417f, 0), /* \ + 0x24000 - 0x2407f: always on \ + 0x24080 - 0x2417f: reserved */ \ + GEN_FW_RANGE(0x24180, 0x249ff, FORCEWAKE_GT), /* \ + 0x24180 - 0x241ff: gt \ + 0x24200 - 0x249ff: reserved */ \ + GEN_FW_RANGE(0x24a00, 0x251ff, FORCEWAKE_RENDER), /* \ + 0x24a00 - 0x24a7f: render \ + 0x24a80 - 0x251ff: reserved */ \ + GEN_FW_RANGE(0x25200, 0x25fff, FORCEWAKE_GT), /* \ + 0x25200 - 0x252ff: gt \ + 0x25300 - 0x25fff: reserved */ \ + GEN_FW_RANGE(0x26000, 0x2ffff, FORCEWAKE_RENDER), /* \ + 0x26000 - 0x27fff: render \ + 0x28000 - 0x29fff: reserved \ + 0x2a000 - 0x2ffff: undocumented */ \ + GEN_FW_RANGE(0x30000, 0x3ffff, FORCEWAKE_GT), \ + GEN_FW_RANGE(0x40000, 0x1bffff, 0), \ + GEN_FW_RANGE(0x1c0000, 0x1c3fff, FORCEWAKE_MEDIA_VDBOX0), /* \ + 0x1c0000 - 0x1c2bff: VD0 \ + 0x1c2c00 - 0x1c2cff: reserved \ + 0x1c2d00 - 0x1c2dff: VD0 \ + 0x1c2e00 - 0x1c3eff: VD0 (DG2 only) \ + 0x1c3f00 - 0x1c3fff: VD0 */ \ + GEN_FW_RANGE(0x1c4000, 0x1c7fff, FORCEWAKE_MEDIA_VDBOX1), /* \ + 0x1c4000 - 0x1c6bff: VD1 \ + 0x1c6c00 - 0x1c6cff: reserved \ + 0x1c6d00 - 0x1c6dff: VD1 \ + 0x1c6e00 - 0x1c7fff: reserved */ \ + GEN_FW_RANGE(0x1c8000, 0x1cbfff, FORCEWAKE_MEDIA_VEBOX0), /* \ + 0x1c8000 - 0x1ca0ff: VE0 \ + 0x1ca100 - 0x1cbfff: reserved */ \ + GEN_FW_RANGE(0x1cc000, 0x1ccfff, FORCEWAKE_MEDIA_VDBOX0), \ + GEN_FW_RANGE(0x1cd000, 0x1cdfff, FORCEWAKE_MEDIA_VDBOX2), \ + GEN_FW_RANGE(0x1ce000, 0x1cefff, FORCEWAKE_MEDIA_VDBOX4), \ + GEN_FW_RANGE(0x1cf000, 0x1cffff, FORCEWAKE_MEDIA_VDBOX6), \ + GEN_FW_RANGE(0x1d0000, 0x1d3fff, FORCEWAKE_MEDIA_VDBOX2), /* \ + 0x1d0000 - 0x1d2bff: VD2 \ + 0x1d2c00 - 0x1d2cff: reserved \ + 0x1d2d00 - 0x1d2dff: VD2 \ + 0x1d2e00 - 0x1d3dff: VD2 (DG2 only) \ + 0x1d3e00 - 0x1d3eff: reserved \ + 0x1d3f00 - 0x1d3fff: VD2 */ \ + GEN_FW_RANGE(0x1d4000, 0x1d7fff, FORCEWAKE_MEDIA_VDBOX3), /* \ + 0x1d4000 - 0x1d6bff: VD3 \ + 0x1d6c00 - 0x1d6cff: reserved \ + 0x1d6d00 - 0x1d6dff: VD3 \ + 0x1d6e00 - 0x1d7fff: reserved */ \ + GEN_FW_RANGE(0x1d8000, 0x1dffff, FORCEWAKE_MEDIA_VEBOX1), /* \ + 0x1d8000 - 0x1da0ff: VE1 \ + 0x1da100 - 0x1dffff: reserved */ \ + GEN_FW_RANGE(0x1e0000, 0x1e3fff, FORCEWAKE_MEDIA_VDBOX4), /* \ + 0x1e0000 - 0x1e2bff: VD4 \ + 0x1e2c00 - 0x1e2cff: reserved \ + 0x1e2d00 - 0x1e2dff: VD4 \ + 0x1e2e00 - 0x1e3eff: reserved \ + 0x1e3f00 - 0x1e3fff: VD4 */ \ + GEN_FW_RANGE(0x1e4000, 0x1e7fff, FORCEWAKE_MEDIA_VDBOX5), /* \ + 0x1e4000 - 0x1e6bff: VD5 \ + 0x1e6c00 - 0x1e6cff: reserved \ + 0x1e6d00 - 0x1e6dff: VD5 \ + 0x1e6e00 - 0x1e7fff: reserved */ \ + GEN_FW_RANGE(0x1e8000, 0x1effff, FORCEWAKE_MEDIA_VEBOX2), /* \ + 0x1e8000 - 0x1ea0ff: VE2 \ + 0x1ea100 - 0x1effff: reserved */ \ + GEN_FW_RANGE(0x1f0000, 0x1f3fff, FORCEWAKE_MEDIA_VDBOX6), /* \ + 0x1f0000 - 0x1f2bff: VD6 \ + 0x1f2c00 - 0x1f2cff: reserved \ + 0x1f2d00 - 0x1f2dff: VD6 \ + 0x1f2e00 - 0x1f3eff: reserved \ + 0x1f3f00 - 0x1f3fff: VD6 */ \ + GEN_FW_RANGE(0x1f4000, 0x1f7fff, FORCEWAKE_MEDIA_VDBOX7), /* \ + 0x1f4000 - 0x1f6bff: VD7 \ + 0x1f6c00 - 0x1f6cff: reserved \ + 0x1f6d00 - 0x1f6dff: VD7 \ + 0x1f6e00 - 0x1f7fff: reserved */ \ GEN_FW_RANGE(0x1f8000, 0x1fa0ff, FORCEWAKE_MEDIA_VEBOX3), + +static const struct intel_forcewake_range __xehp_fw_ranges[] = { + XEHP_FWRANGES(FORCEWAKE_GT) +}; + +static const struct intel_forcewake_range __dg2_fw_ranges[] = { + XEHP_FWRANGES(FORCEWAKE_RENDER) }; static void @@ -2084,7 +2111,11 @@ static int uncore_forcewake_init(struct intel_uncore *uncore) return ret; forcewake_early_sanitize(uncore, 0); - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { + ASSIGN_FW_DOMAINS_TABLE(uncore, __dg2_fw_ranges); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, xehp_fwtable); + ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); + } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __xehp_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(uncore, xehp_fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); -- cgit v1.2.3 From 05b78d291d3854aeae5cfd19cc860910b1a8fcdc Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 29 Jul 2021 09:59:58 -0700 Subject: drm/i915/xehp: Changes to ss/eu definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Xe_HP no longer has "slices" in the same way that old platforms did. There are new concepts (gslices, cslices, mslices) that apply in various contexts, but for the purposes of fusing slices no longer exist and we just have one large pool of dual-subslices (DSS) to work with. Furthermore, the meaning of the DSS fuse is inverted compared to past platforms --- it now specifies which DSS are enabled rather than which ones are disabled. Cc: Rodrigo Vivi Cc: Lucas De Marchi Cc: Tvrtko Ursulin Signed-off-by: Matthew Auld Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Radhakrishna Sripada Signed-off-by: Stuart Summers Signed-off-by: Matt Roper Signed-off-by: Prasad Nallani Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20210729170008.2836648-9-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_sseu.c | 24 ++++++++++++++++++++---- drivers/gpu/drm/i915/i915_getparam.c | 6 ++++-- drivers/gpu/drm/i915/i915_reg.h | 3 +++ 3 files changed, 27 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 9542c3f3822a..d2d5c47ef5c3 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -139,17 +139,33 @@ static void gen12_sseu_info_init(struct intel_gt *gt) * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS. * Instead of splitting these, provide userspace with an array * of DSS to more closely represent the hardware resource. + * + * In addition, the concept of slice has been removed in Xe_HP. + * To be compatible with prior generations, assume a single slice + * across the entire device. Then calculate out the DSS for each + * workload type within that software slice. */ intel_sseu_set_info(sseu, 1, 6, 16); - s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & - GEN11_GT_S_ENA_MASK; + /* + * As mentioned above, Xe_HP does not have the concept of a slice. + * Enable one for software backwards compatibility. + */ + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) + s_en = 0x1; + else + s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & + GEN11_GT_S_ENA_MASK; dss_en = intel_uncore_read(uncore, GEN12_GT_DSS_ENABLE); /* one bit per pair of EUs */ - eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & - GEN11_EU_DIS_MASK); + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) + eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK; + else + eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & + GEN11_EU_DIS_MASK); + for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) if (eu_en_fuse & BIT(eu)) eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index 24e18219eb50..e289397d9178 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -15,7 +15,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, struct pci_dev *pdev = to_pci_dev(dev->dev); const struct sseu_dev_info *sseu = &i915->gt.info.sseu; drm_i915_getparam_t *param = data; - int value; + int value = 0; switch (param->param) { case I915_PARAM_IRQ_ACTIVE: @@ -150,7 +150,9 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, return -ENODEV; break; case I915_PARAM_SUBSLICE_MASK: - value = sseu->subslice_mask[0]; + /* Only copy bits from the first slice */ + memcpy(&value, sseu->subslice_mask, + min(sseu->ss_stride, (u8)sizeof(value))); if (!value) return -ENODEV; break; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 2416a5e63f2a..e7d2fd55be8c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3156,6 +3156,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN12_GT_DSS_ENABLE _MMIO(0x913C) +#define XEHP_EU_ENABLE _MMIO(0x9134) +#define XEHP_EU_ENA_MASK 0xFF + #define GEN6_BSD_SLEEP_PSMI_CONTROL _MMIO(0x12050) #define GEN6_BSD_SLEEP_MSG_DISABLE (1 << 0) #define GEN6_BSD_SLEEP_FLUSH_DISABLE (1 << 2) -- cgit v1.2.3 From eb962fae0078d6f827473e0eb6019db55d2217f1 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 29 Jul 2021 09:59:59 -0700 Subject: drm/i915/xehpsdv: Add maximum sseu limits Due to the removal of legacy slices and the transition to a gslice/cslice/mslice/etc. design, we'll internally store all DSS under "slice0." Signed-off-by: Matt Roper Reviewed-by: Caz Yokoyama Link: https://patchwork.freedesktop.org/patch/msgid/20210729170008.2836648-10-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_sseu.c | 5 ++++- drivers/gpu/drm/i915/gt/intel_sseu.h | 2 +- drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index d2d5c47ef5c3..3a2ff0e00b65 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -145,7 +145,10 @@ static void gen12_sseu_info_init(struct intel_gt *gt) * across the entire device. Then calculate out the DSS for each * workload type within that software slice. */ - intel_sseu_set_info(sseu, 1, 6, 16); + if (IS_XEHPSDV(gt->i915)) + intel_sseu_set_info(sseu, 1, 32, 16); + else + intel_sseu_set_info(sseu, 1, 6, 16); /* * As mentioned above, Xe_HP does not have the concept of a slice. diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 8d85ec05f610..05a93e4e66cb 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -16,7 +16,7 @@ struct intel_gt; struct drm_printer; #define GEN_MAX_SLICES (3) /* SKL upper bound */ -#define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ +#define GEN_MAX_SUBSLICES (32) /* XEHPSDV upper bound */ #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE) #define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES) #define GEN_MAX_EUS (16) /* TGL upper bound */ diff --git a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c index 5e7b09c5e36f..1ba8b7da9d37 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c @@ -53,7 +53,7 @@ static void cherryview_sseu_device_status(struct intel_gt *gt, static void gen11_sseu_device_status(struct intel_gt *gt, struct sseu_dev_info *sseu) { -#define SS_MAX 6 +#define SS_MAX 8 struct intel_uncore *uncore = gt->uncore; const struct intel_gt_info *info = >->info; u32 s_reg[SS_MAX], eu_reg[2 * SS_MAX], eu_mask[2]; -- cgit v1.2.3 From ab49840272cfa595327fa1212a5a44287b9ac986 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 29 Jul 2021 10:00:01 -0700 Subject: drm/i915/dg2: DG2 uses the same sseu limits as XeHP SDV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DG2 supports compute DSS and has the same maximum number of DSS and EU as XeHP SDV. Signed-off-by: Matt Roper Reviewed-by: Caz Yokoyama Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20210729170008.2836648-12-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_sseu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 3a2ff0e00b65..a648818eafa5 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -145,7 +145,7 @@ static void gen12_sseu_info_init(struct intel_gt *gt) * across the entire device. Then calculate out the DSS for each * workload type within that software slice. */ - if (IS_XEHPSDV(gt->i915)) + if (IS_DG2(gt->i915) || IS_XEHPSDV(gt->i915)) intel_sseu_set_info(sseu, 1, 32, 16); else intel_sseu_set_info(sseu, 1, 6, 16); -- cgit v1.2.3 From ce13c78fa93ede18e341b753f21052344ea7eb33 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 3 Aug 2021 14:48:32 +0200 Subject: drm/i915: Disable gpu relocations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Media userspace was the last userspace to still use them, and they converted now too: https://github.com/intel/media-driver/commit/144020c37770083974bedf59902b70b8f444c799 This means no reason anymore to make relocations faster than they've been for the first 9 years of gem. This code was added in commit 7dd4f6729f9243bd7046c6f04c107a456bda38eb Author: Chris Wilson Date: Fri Jun 16 15:05:24 2017 +0100 drm/i915: Async GPU relocation processing Furthermore there's pretty strong indications it's buggy, since the code to use it by default as the only option had to be reverted: commit ad5d95e4d538737ed3fa25493777decf264a3011 Author: Dave Airlie Date: Tue Sep 8 15:41:17 2020 +1000 Revert "drm/i915/gem: Async GPU relocations only" This code just disables gpu relocations, leaving the garbage collection for later patches and more importantly, much less confusing diff. Also given how much headaches this code has caused in the past, letting this soak for a bit seems justified. Acked-by: Dave Airlie Reviewed-by: Maarten Lankhorst Reviewed-by: Jason Ekstrand Signed-off-by: Daniel Vetter Cc: Jon Bloomfield Cc: Chris Wilson Cc: Maarten Lankhorst Cc: Joonas Lahtinen Cc: Daniel Vetter Cc: "Thomas Hellström" Cc: Matthew Auld Cc: Lionel Landwerlin Cc: Dave Airlie Cc: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210803124833.3817354-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 43 +++++++++++--------------- 1 file changed, 18 insertions(+), 25 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 6ad166fa73e7..227a870928e3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1577,7 +1577,7 @@ static int __reloc_entry_gpu(struct i915_execbuffer *eb, return true; } -static int reloc_entry_gpu(struct i915_execbuffer *eb, +static int __maybe_unused reloc_entry_gpu(struct i915_execbuffer *eb, struct i915_vma *vma, u64 offset, u64 target_addr) @@ -1599,32 +1599,25 @@ relocate_entry(struct i915_vma *vma, { u64 target_addr = relocation_target(reloc, target); u64 offset = reloc->offset; - int reloc_gpu = reloc_entry_gpu(eb, vma, offset, target_addr); - - if (reloc_gpu < 0) - return reloc_gpu; - - if (!reloc_gpu) { - bool wide = eb->reloc_cache.use_64bit_reloc; - void *vaddr; + bool wide = eb->reloc_cache.use_64bit_reloc; + void *vaddr; repeat: - vaddr = reloc_vaddr(vma->obj, eb, - offset >> PAGE_SHIFT); - if (IS_ERR(vaddr)) - return PTR_ERR(vaddr); - - GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32))); - clflush_write32(vaddr + offset_in_page(offset), - lower_32_bits(target_addr), - eb->reloc_cache.vaddr); - - if (wide) { - offset += sizeof(u32); - target_addr >>= 32; - wide = false; - goto repeat; - } + vaddr = reloc_vaddr(vma->obj, eb, + offset >> PAGE_SHIFT); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + + GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32))); + clflush_write32(vaddr + offset_in_page(offset), + lower_32_bits(target_addr), + eb->reloc_cache.vaddr); + + if (wide) { + offset += sizeof(u32); + target_addr >>= 32; + wide = false; + goto repeat; } return target->node.start | UPDATE; -- cgit v1.2.3 From 8e02cceb1f1f4f254625e5338dd997ff61ab40d7 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 3 Aug 2021 14:48:33 +0200 Subject: drm/i915: delete gpu reloc code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's already removed, this just garbage collects it all. v2: Rebase over s/GEN/GRAPHICS_VER/ v3: Also ditch eb.reloc_pool and eb.reloc_context (Maarten) Reviewed-by: Jason Ekstrand Signed-off-by: Daniel Vetter Cc: Jon Bloomfield Cc: Chris Wilson Cc: Maarten Lankhorst Cc: Daniel Vetter Cc: Joonas Lahtinen Cc: "Thomas Hellström" Cc: Matthew Auld Cc: Lionel Landwerlin Cc: Dave Airlie Cc: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210803124833.3817354-2-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 360 +-------------------- .../gpu/drm/i915/selftests/i915_live_selftests.h | 1 - 2 files changed, 1 insertion(+), 360 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 227a870928e3..45bc4e4ff048 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -277,16 +277,8 @@ struct i915_execbuffer { bool has_llc : 1; bool has_fence : 1; bool needs_unfenced : 1; - - struct i915_request *rq; - u32 *rq_cmd; - unsigned int rq_size; - struct intel_gt_buffer_pool_node *pool; } reloc_cache; - struct intel_gt_buffer_pool_node *reloc_pool; /** relocation pool for -EDEADLK handling */ - struct intel_context *reloc_context; - u64 invalid_flags; /** Set of execobj.flags that are invalid */ u64 batch_len; /** Length of batch within object */ @@ -1024,8 +1016,6 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final) static void eb_destroy(const struct i915_execbuffer *eb) { - GEM_BUG_ON(eb->reloc_cache.rq); - if (eb->lut_size > 0) kfree(eb->buckets); } @@ -1037,14 +1027,6 @@ relocation_target(const struct drm_i915_gem_relocation_entry *reloc, return gen8_canonical_addr((int)reloc->delta + target->node.start); } -static void reloc_cache_clear(struct reloc_cache *cache) -{ - cache->rq = NULL; - cache->rq_cmd = NULL; - cache->pool = NULL; - cache->rq_size = 0; -} - static void reloc_cache_init(struct reloc_cache *cache, struct drm_i915_private *i915) { @@ -1057,7 +1039,6 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->has_fence = cache->graphics_ver < 4; cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; cache->node.flags = 0; - reloc_cache_clear(cache); } static inline void *unmask_page(unsigned long p) @@ -1079,48 +1060,10 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) return &i915->ggtt; } -static void reloc_cache_put_pool(struct i915_execbuffer *eb, struct reloc_cache *cache) -{ - if (!cache->pool) - return; - - /* - * This is a bit nasty, normally we keep objects locked until the end - * of execbuffer, but we already submit this, and have to unlock before - * dropping the reference. Fortunately we can only hold 1 pool node at - * a time, so this should be harmless. - */ - i915_gem_ww_unlock_single(cache->pool->obj); - intel_gt_buffer_pool_put(cache->pool); - cache->pool = NULL; -} - -static void reloc_gpu_flush(struct i915_execbuffer *eb, struct reloc_cache *cache) -{ - struct drm_i915_gem_object *obj = cache->rq->batch->obj; - - GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32)); - cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END; - - i915_gem_object_flush_map(obj); - i915_gem_object_unpin_map(obj); - - intel_gt_chipset_flush(cache->rq->engine->gt); - - i915_request_add(cache->rq); - reloc_cache_put_pool(eb, cache); - reloc_cache_clear(cache); - - eb->reloc_pool = NULL; -} - static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb) { void *vaddr; - if (cache->rq) - reloc_gpu_flush(eb, cache); - if (!cache->vaddr) return; @@ -1302,295 +1245,6 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) *addr = value; } -static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) -{ - struct drm_i915_gem_object *obj = vma->obj; - int err; - - assert_vma_held(vma); - - if (obj->cache_dirty & ~obj->cache_coherent) - i915_gem_clflush_object(obj, 0); - obj->write_domain = 0; - - err = i915_request_await_object(rq, vma->obj, true); - if (err == 0) - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - - return err; -} - -static int __reloc_gpu_alloc(struct i915_execbuffer *eb, - struct intel_engine_cs *engine, - struct i915_vma *vma, - unsigned int len) -{ - struct reloc_cache *cache = &eb->reloc_cache; - struct intel_gt_buffer_pool_node *pool = eb->reloc_pool; - struct i915_request *rq; - struct i915_vma *batch; - u32 *cmd; - int err; - - if (!pool) { - pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE, - cache->has_llc ? - I915_MAP_WB : - I915_MAP_WC); - if (IS_ERR(pool)) - return PTR_ERR(pool); - } - eb->reloc_pool = NULL; - - err = i915_gem_object_lock(pool->obj, &eb->ww); - if (err) - goto err_pool; - - cmd = i915_gem_object_pin_map(pool->obj, pool->type); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err_pool; - } - intel_gt_buffer_pool_mark_used(pool); - - memset32(cmd, 0, pool->obj->base.size / sizeof(u32)); - - batch = i915_vma_instance(pool->obj, vma->vm, NULL); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto err_unmap; - } - - err = i915_vma_pin_ww(batch, &eb->ww, 0, 0, PIN_USER | PIN_NONBLOCK); - if (err) - goto err_unmap; - - if (engine == eb->context->engine) { - rq = i915_request_create(eb->context); - } else { - struct intel_context *ce = eb->reloc_context; - - if (!ce) { - ce = intel_context_create(engine); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - goto err_unpin; - } - - i915_vm_put(ce->vm); - ce->vm = i915_vm_get(eb->context->vm); - eb->reloc_context = ce; - } - - err = intel_context_pin_ww(ce, &eb->ww); - if (err) - goto err_unpin; - - rq = i915_request_create(ce); - intel_context_unpin(ce); - } - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_unpin; - } - - err = intel_gt_buffer_pool_mark_active(pool, rq); - if (err) - goto err_request; - - err = reloc_move_to_gpu(rq, vma); - if (err) - goto err_request; - - err = eb->engine->emit_bb_start(rq, - batch->node.start, PAGE_SIZE, - cache->graphics_ver > 5 ? 0 : I915_DISPATCH_SECURE); - if (err) - goto skip_request; - - assert_vma_held(batch); - err = i915_request_await_object(rq, batch->obj, false); - if (err == 0) - err = i915_vma_move_to_active(batch, rq, 0); - if (err) - goto skip_request; - - rq->batch = batch; - i915_vma_unpin(batch); - - cache->rq = rq; - cache->rq_cmd = cmd; - cache->rq_size = 0; - cache->pool = pool; - - /* Return with batch mapping (cmd) still pinned */ - return 0; - -skip_request: - i915_request_set_error_once(rq, err); -err_request: - i915_request_add(rq); -err_unpin: - i915_vma_unpin(batch); -err_unmap: - i915_gem_object_unpin_map(pool->obj); -err_pool: - eb->reloc_pool = pool; - return err; -} - -static bool reloc_can_use_engine(const struct intel_engine_cs *engine) -{ - return engine->class != VIDEO_DECODE_CLASS || GRAPHICS_VER(engine->i915) != 6; -} - -static u32 *reloc_gpu(struct i915_execbuffer *eb, - struct i915_vma *vma, - unsigned int len) -{ - struct reloc_cache *cache = &eb->reloc_cache; - u32 *cmd; - - if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1)) - reloc_gpu_flush(eb, cache); - - if (unlikely(!cache->rq)) { - int err; - struct intel_engine_cs *engine = eb->engine; - - /* If we need to copy for the cmdparser, we will stall anyway */ - if (eb_use_cmdparser(eb)) - return ERR_PTR(-EWOULDBLOCK); - - if (!reloc_can_use_engine(engine)) { - engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0]; - if (!engine) - return ERR_PTR(-ENODEV); - } - - err = __reloc_gpu_alloc(eb, engine, vma, len); - if (unlikely(err)) - return ERR_PTR(err); - } - - cmd = cache->rq_cmd + cache->rq_size; - cache->rq_size += len; - - return cmd; -} - -static inline bool use_reloc_gpu(struct i915_vma *vma) -{ - if (DBG_FORCE_RELOC == FORCE_GPU_RELOC) - return true; - - if (DBG_FORCE_RELOC) - return false; - - return !dma_resv_test_signaled(vma->resv, true); -} - -static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset) -{ - struct page *page; - unsigned long addr; - - GEM_BUG_ON(vma->pages != vma->obj->mm.pages); - - page = i915_gem_object_get_page(vma->obj, offset >> PAGE_SHIFT); - addr = PFN_PHYS(page_to_pfn(page)); - GEM_BUG_ON(overflows_type(addr, u32)); /* expected dma32 */ - - return addr + offset_in_page(offset); -} - -static int __reloc_entry_gpu(struct i915_execbuffer *eb, - struct i915_vma *vma, - u64 offset, - u64 target_addr) -{ - const unsigned int ver = eb->reloc_cache.graphics_ver; - unsigned int len; - u32 *batch; - u64 addr; - - if (ver >= 8) - len = offset & 7 ? 8 : 5; - else if (ver >= 4) - len = 4; - else - len = 3; - - batch = reloc_gpu(eb, vma, len); - if (batch == ERR_PTR(-EDEADLK)) - return -EDEADLK; - else if (IS_ERR(batch)) - return false; - - addr = gen8_canonical_addr(vma->node.start + offset); - if (ver >= 8) { - if (offset & 7) { - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = lower_32_bits(addr); - *batch++ = upper_32_bits(addr); - *batch++ = lower_32_bits(target_addr); - - addr = gen8_canonical_addr(addr + 4); - - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = lower_32_bits(addr); - *batch++ = upper_32_bits(addr); - *batch++ = upper_32_bits(target_addr); - } else { - *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1; - *batch++ = lower_32_bits(addr); - *batch++ = upper_32_bits(addr); - *batch++ = lower_32_bits(target_addr); - *batch++ = upper_32_bits(target_addr); - } - } else if (ver >= 6) { - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = 0; - *batch++ = addr; - *batch++ = target_addr; - } else if (IS_I965G(eb->i915)) { - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = 0; - *batch++ = vma_phys_addr(vma, offset); - *batch++ = target_addr; - } else if (ver >= 4) { - *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *batch++ = 0; - *batch++ = addr; - *batch++ = target_addr; - } else if (ver >= 3 && - !(IS_I915G(eb->i915) || IS_I915GM(eb->i915))) { - *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *batch++ = addr; - *batch++ = target_addr; - } else { - *batch++ = MI_STORE_DWORD_IMM; - *batch++ = vma_phys_addr(vma, offset); - *batch++ = target_addr; - } - - return true; -} - -static int __maybe_unused reloc_entry_gpu(struct i915_execbuffer *eb, - struct i915_vma *vma, - u64 offset, - u64 target_addr) -{ - if (eb->reloc_cache.vaddr) - return false; - - if (!use_reloc_gpu(vma)) - return false; - - return __reloc_entry_gpu(eb, vma, offset, target_addr); -} - static u64 relocate_entry(struct i915_vma *vma, const struct drm_i915_gem_relocation_entry *reloc, @@ -3153,8 +2807,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.exec = exec; eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1); eb.vma[0].vma = NULL; - eb.reloc_pool = eb.batch_pool = NULL; - eb.reloc_context = NULL; + eb.batch_pool = NULL; eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; reloc_cache_init(&eb.reloc_cache, eb.i915); @@ -3252,9 +2905,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, batch = eb.batch->vma; - /* All GPU relocation batches must be submitted prior to the user rq */ - GEM_BUG_ON(eb.reloc_cache.rq); - /* Allocate a request for this batch buffer nice and early. */ eb.request = i915_request_create(eb.context); if (IS_ERR(eb.request)) { @@ -3345,10 +2995,6 @@ err_vma: if (eb.batch_pool) intel_gt_buffer_pool_put(eb.batch_pool); - if (eb.reloc_pool) - intel_gt_buffer_pool_put(eb.reloc_pool); - if (eb.reloc_context) - intel_context_put(eb.reloc_context); err_engine: eb_put_engine(&eb); err_context: @@ -3462,7 +3108,3 @@ end:; kvfree(exec2_list); return err; } - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/i915_gem_execbuffer.c" -#endif diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 1746a56dda06..cfa5c4165a4f 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -38,7 +38,6 @@ selftest(gem, i915_gem_live_selftests) selftest(evict, i915_gem_evict_live_selftests) selftest(hugepages, i915_gem_huge_page_live_selftests) selftest(gem_contexts, i915_gem_context_live_selftests) -selftest(gem_execbuf, i915_gem_execbuffer_live_selftests) selftest(client, i915_gem_client_blt_live_selftests) selftest(gem_migrate, i915_gem_migrate_live_selftests) selftest(reset, intel_reset_live_selftests) -- cgit v1.2.3 From b65a9489730a2494f7a2a33a6eb0a12b8f1dd193 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 23 Jul 2021 12:34:05 +0100 Subject: drm/i915/userptr: Probe existence of backing struct pages upon creation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Jason Ekstrand requested a more efficient method than userptr+set-domain to determine if the userptr object was backed by a complete set of pages upon creation. To be more efficient than simply populating the userptr using get_user_pages() (as done by the call to set-domain or execbuf), we can walk the tree of vm_area_struct and check for gaps or vma not backed by struct page (VM_PFNMAP). The question is how to handle VM_MIXEDMAP which may be either struct page or pfn backed... With discrete we are going to drop support for set_domain(), so offering a way to probe the pages, without having to resort to dummy batches has been requested. v2: - add new query param for the PROBE flag, so userspace can easily check if the kernel supports it(Jason). - use mmap_read_{lock, unlock}. - add some kernel-doc. v3: - In the docs also mention that PROBE doesn't guarantee that the pages will remain valid by the time they are actually used(Tvrtko). - Add a small comment for the hole finding logic(Jason). - Move the param next to all the other params which just return true. Testcase: igt/gem_userptr_blits/probe Signed-off-by: Chris Wilson Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Jordan Justen Cc: Kenneth Graunke Cc: Jason Ekstrand Cc: Daniel Vetter Cc: Ramalingam C Reviewed-by: Tvrtko Ursulin Acked-by: Kenneth Graunke Reviewed-by: Jason Ekstrand Acked-by: Maarten Lankhorst Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20210723113405.427004-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 41 ++++++++++++++++++++++++++++- drivers/gpu/drm/i915/i915_getparam.c | 1 + include/uapi/drm/i915_drm.h | 20 ++++++++++++++ 3 files changed, 61 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 56edfeff8c02..468a7a617fbf 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -422,6 +422,34 @@ static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { #endif +static int +probe_range(struct mm_struct *mm, unsigned long addr, unsigned long len) +{ + const unsigned long end = addr + len; + struct vm_area_struct *vma; + int ret = -EFAULT; + + mmap_read_lock(mm); + for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) { + /* Check for holes, note that we also update the addr below */ + if (vma->vm_start > addr) + break; + + if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) + break; + + if (vma->vm_end >= end) { + ret = 0; + break; + } + + addr = vma->vm_end; + } + mmap_read_unlock(mm); + + return ret; +} + /* * Creates a new mm object that wraps some normal memory from the process * context - user memory. @@ -477,7 +505,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev, } if (args->flags & ~(I915_USERPTR_READ_ONLY | - I915_USERPTR_UNSYNCHRONIZED)) + I915_USERPTR_UNSYNCHRONIZED | + I915_USERPTR_PROBE)) return -EINVAL; if (i915_gem_object_size_2big(args->user_size)) @@ -504,6 +533,16 @@ i915_gem_userptr_ioctl(struct drm_device *dev, return -ENODEV; } + if (args->flags & I915_USERPTR_PROBE) { + /* + * Check that the range pointed to represents real struct + * pages and not iomappings (at this moment in time!) + */ + ret = probe_range(current->mm, args->user_ptr, args->user_size); + if (ret) + return ret; + } + #ifdef CONFIG_MMU_NOTIFIER obj = i915_gem_object_alloc(); if (obj == NULL) diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index e289397d9178..77490cb5ff9c 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -134,6 +134,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_FENCE_ARRAY: case I915_PARAM_HAS_EXEC_SUBMIT_FENCE: case I915_PARAM_HAS_EXEC_TIMELINE_FENCES: + case I915_PARAM_HAS_USERPTR_PROBE: /* For the time being all of these are always true; * if some supported hardware does not have one of these * features this value needs to be provided from diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 7f13d241417f..bde5860b3686 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -683,6 +683,9 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 +/* Query if the kernel supports the I915_USERPTR_PROBE flag. */ +#define I915_PARAM_HAS_USERPTR_PROBE 56 + /* Must be kept compact -- no holes and well documented */ typedef struct drm_i915_getparam { @@ -2231,12 +2234,29 @@ struct drm_i915_gem_userptr { * through the GTT. If the HW can't support readonly access, an error is * returned. * + * I915_USERPTR_PROBE: + * + * Probe the provided @user_ptr range and validate that the @user_ptr is + * indeed pointing to normal memory and that the range is also valid. + * For example if some garbage address is given to the kernel, then this + * should complain. + * + * Returns -EFAULT if the probe failed. + * + * Note that this doesn't populate the backing pages, and also doesn't + * guarantee that the object will remain valid when the object is + * eventually used. + * + * The kernel supports this feature if I915_PARAM_HAS_USERPTR_PROBE + * returns a non-zero value. + * * I915_USERPTR_UNSYNCHRONIZED: * * NOT USED. Setting this flag will result in an error. */ __u32 flags; #define I915_USERPTR_READ_ONLY 0x1 +#define I915_USERPTR_PROBE 0x2 #define I915_USERPTR_UNSYNCHRONIZED 0x80000000 /** * @handle: Returned handle for the object. -- cgit v1.2.3 From 3ffe82d701a4ec6b2c1970609b23c6187503b0a0 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 29 Jul 2021 09:59:51 -0700 Subject: drm/i915/xehp: handle new steering options Xe_HP is more modular than its predecessors and as a consequence it has more types of replicated registers. As with l3bank regions on previous platforms, we may need to explicitly re-steer accesses to these new types of ranges at runtime if we can't find a single default steering value that satisfies the fusing of all types. v2: - Add a local 'i915' variable to reduce gt->i915 usage. (Caz) - Drop unused 'intel_gt_read_register' prototype. (Caz) v3: - Drop unnecessary comment text. (Lucas) - Drop unused register bit definition. (Lucas) Bspec: 66534 Cc: Tvrtko Ursulin Cc: Caz Yokoyama Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20210729170008.2836648-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 40 ++++++++++++- drivers/gpu/drm/i915/gt/intel_gt_types.h | 7 +++ drivers/gpu/drm/i915/gt/intel_region_lmem.c | 1 + drivers/gpu/drm/i915/gt/intel_sseu.c | 18 ++++++ drivers/gpu/drm/i915/gt/intel_sseu.h | 6 ++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 89 +++++++++++++++++++++++++++-- drivers/gpu/drm/i915/i915_drv.h | 3 + drivers/gpu/drm/i915/i915_pci.c | 1 + drivers/gpu/drm/i915/i915_reg.h | 3 + drivers/gpu/drm/i915/intel_device_info.h | 1 + 10 files changed, 163 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 04dd69bcf6cb..3bd1a31cd356 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -89,18 +89,40 @@ static const struct intel_mmio_range icl_l3bank_steering_table[] = { {}, }; +static u16 slicemask(struct intel_gt *gt, int count) +{ + u64 dss_mask = intel_sseu_get_subslices(>->info.sseu, 0); + + return intel_slicemask_from_dssmask(dss_mask, count); +} + int intel_gt_init_mmio(struct intel_gt *gt) { + struct drm_i915_private *i915 = gt->i915; + intel_gt_init_clock_frequency(gt); intel_uc_init_mmio(>->uc); intel_sseu_info_init(gt); - if (GRAPHICS_VER(gt->i915) >= 11) { + /* + * An mslice is unavailable only if both the meml3 for the slice is + * disabled *and* all of the DSS in the slice (quadrant) are disabled. + */ + if (HAS_MSLICES(i915)) + gt->info.mslice_mask = + slicemask(gt, GEN_DSS_PER_MSLICE) | + (intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) & + GEN12_MEML3_EN_MASK); + + if (GRAPHICS_VER(i915) >= 11 && + GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) { gt->steering_table[L3BANK] = icl_l3bank_steering_table; gt->info.l3bank_mask = ~intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) & GEN10_L3BANK_MASK; + } else if (HAS_MSLICES(i915)) { + MISSING_CASE(INTEL_INFO(i915)->platform); } return intel_engines_init_mmio(gt); @@ -787,6 +809,22 @@ static void intel_gt_get_valid_steering(struct intel_gt *gt, *sliceid = 0; /* unused */ *subsliceid = __ffs(gt->info.l3bank_mask); break; + case MSLICE: + GEM_DEBUG_WARN_ON(!gt->info.mslice_mask); /* should be impossible! */ + + *sliceid = __ffs(gt->info.mslice_mask); + *subsliceid = 0; /* unused */ + break; + case LNCF: + GEM_DEBUG_WARN_ON(!gt->info.mslice_mask); /* should be impossible! */ + + /* + * An LNCF is always present if its mslice is present, so we + * can safely just steer to LNCF 0 in all cases. + */ + *sliceid = __ffs(gt->info.mslice_mask) << 1; + *subsliceid = 0; /* unused */ + break; default: MISSING_CASE(type); *sliceid = 0; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 97a5075288d2..a81e21bf1bd1 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -47,9 +47,14 @@ struct intel_mmio_range { * of multicast registers. If another type of steering does not have any * overlap in valid steering targets with 'subslice' style registers, we will * need to explicitly re-steer reads of registers of the other type. + * + * Only the replication types that may need additional non-default steering + * are listed here. */ enum intel_steering_type { L3BANK, + MSLICE, + LNCF, NUM_STEERING_TYPES }; @@ -184,6 +189,8 @@ struct intel_gt { /* Slice/subslice/EU info */ struct sseu_dev_info sseu; + + unsigned long mslice_mask; } info; }; diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index e3a2a2fa5f94..a74b72f50cc9 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -10,6 +10,7 @@ #include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_ttm.h" +#include "gt/intel_gt.h" static int init_fake_lmem_bar(struct intel_memory_region *mem) { diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index a648818eafa5..bbd272943c3f 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -699,3 +699,21 @@ void intel_sseu_print_topology(const struct sseu_dev_info *sseu, } } } + +u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice) +{ + u16 slice_mask = 0; + int i; + + WARN_ON(sizeof(dss_mask) * 8 / dss_per_slice > 8 * sizeof(slice_mask)); + + for (i = 0; dss_mask; i++) { + if (dss_mask & GENMASK(dss_per_slice - 1, 0)) + slice_mask |= BIT(i); + + dss_mask >>= dss_per_slice; + } + + return slice_mask; +} + diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 05a93e4e66cb..22fef98887c0 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -22,6 +22,10 @@ struct drm_printer; #define GEN_MAX_EUS (16) /* TGL upper bound */ #define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS) +#define GEN_DSS_PER_GSLICE 4 +#define GEN_DSS_PER_CSLICE 8 +#define GEN_DSS_PER_MSLICE 8 + struct sseu_dev_info { u8 slice_mask; u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; @@ -104,4 +108,6 @@ void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p); void intel_sseu_print_topology(const struct sseu_dev_info *sseu, struct drm_printer *p); +u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice); + #endif /* __INTEL_SSEU_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 053fa7251cd0..29a76038fb97 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -889,12 +889,24 @@ cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); } +static void __add_mcr_wa(struct drm_i915_private *i915, struct i915_wa_list *wal, + unsigned slice, unsigned subslice) +{ + u32 mcr, mcr_mask; + + mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); + mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; + + drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr); + + wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr); +} + static void icl_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) { const struct sseu_dev_info *sseu = &i915->gt.info.sseu; unsigned int slice, subslice; - u32 mcr, mcr_mask; GEM_BUG_ON(GRAPHICS_VER(i915) < 11); GEM_BUG_ON(hweight8(sseu->slice_mask) > 1); @@ -919,12 +931,79 @@ icl_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) if (i915->gt.info.l3bank_mask & BIT(subslice)) i915->gt.steering_table[L3BANK] = NULL; - mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); - mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; + __add_mcr_wa(i915, wal, slice, subslice); +} - drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr); +__maybe_unused +static void +xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) +{ + struct drm_i915_private *i915 = gt->i915; + const struct sseu_dev_info *sseu = >->info.sseu; + unsigned long slice, subslice = 0, slice_mask = 0; + u64 dss_mask = 0; + u32 lncf_mask = 0; + int i; - wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr); + /* + * On Xe_HP the steering increases in complexity. There are now several + * more units that require steering and we're not guaranteed to be able + * to find a common setting for all of them. These are: + * - GSLICE (fusable) + * - DSS (sub-unit within gslice; fusable) + * - L3 Bank (fusable) + * - MSLICE (fusable) + * - LNCF (sub-unit within mslice; always present if mslice is present) + * - SQIDI (always on) + * + * We'll do our default/implicit steering based on GSLICE (in the + * sliceid field) and DSS (in the subsliceid field). If we can + * find overlap between the valid MSLICE and/or LNCF values with + * a suitable GSLICE, then we can just re-use the default value and + * skip and explicit steering at runtime. + * + * We only need to look for overlap between GSLICE/MSLICE/LNCF to find + * a valid sliceid value. DSS steering is the only type of steering + * that utilizes the 'subsliceid' bits. + * + * Also note that, even though the steering domain is called "GSlice" + * and it is encoded in the register using the gslice format, the spec + * says that the combined (geometry | compute) fuse should be used to + * select the steering. + */ + + /* Find the potential gslice candidates */ + dss_mask = intel_sseu_get_subslices(sseu, 0); + slice_mask = intel_slicemask_from_dssmask(dss_mask, GEN_DSS_PER_GSLICE); + + /* + * Find the potential LNCF candidates. Either LNCF within a valid + * mslice is fine. + */ + for_each_set_bit(i, >->info.mslice_mask, GEN12_MAX_MSLICES) + lncf_mask |= (0x3 << (i * 2)); + + /* + * Are there any sliceid values that work for both GSLICE and LNCF + * steering? + */ + if (slice_mask & lncf_mask) { + slice_mask &= lncf_mask; + gt->steering_table[LNCF] = NULL; + } + + /* How about sliceid values that also work for MSLICE steering? */ + if (slice_mask & gt->info.mslice_mask) { + slice_mask &= gt->info.mslice_mask; + gt->steering_table[MSLICE] = NULL; + } + + slice = __ffs(slice_mask); + subslice = __ffs(dss_mask >> (slice * GEN_DSS_PER_GSLICE)); + WARN_ON(subslice > GEN_DSS_PER_GSLICE); + WARN_ON(dss_mask >> (slice * GEN_DSS_PER_GSLICE) == 0); + + __add_mcr_wa(i915, wal, slice, subslice); } static void diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 65000b57ddb6..0558921663d9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1695,6 +1695,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm) #define HAS_64BIT_RELOC(dev_priv) (INTEL_INFO(dev_priv)->has_64bit_reloc) +#define HAS_MSLICES(dev_priv) \ + (INTEL_INFO(dev_priv)->has_mslices) + #define HAS_IPC(dev_priv) (INTEL_INFO(dev_priv)->display.has_ipc) #define HAS_REGION(i915, i) (INTEL_INFO(i915)->memory_regions & (i)) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index b76c2ba8451c..32358f90b920 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -975,6 +975,7 @@ static const struct intel_device_info adl_p_info = { .has_llc = 1, \ .has_logical_ring_contexts = 1, \ .has_logical_ring_elsq = 1, \ + .has_mslices = 1, \ .has_rc6 = 1, \ .has_reset_engine = 1, \ .has_rps = 1, \ diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e7d2fd55be8c..7696f0d1c8bb 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3122,6 +3122,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN10_MIRROR_FUSE3 _MMIO(0x9118) #define GEN10_L3BANK_PAIR_COUNT 4 #define GEN10_L3BANK_MASK 0x0F +/* on Xe_HP the same fuses indicates mslices instead of L3 banks */ +#define GEN12_MAX_MSLICES 4 +#define GEN12_MEML3_EN_MASK 0x0F #define GEN8_EU_DISABLE0 _MMIO(0x9134) #define GEN8_EU_DIS0_S0_MASK 0xffffff diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 50ac43d4047f..f88be11a3570 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -134,6 +134,7 @@ enum intel_ppgtt_type { func(has_logical_ring_contexts); \ func(has_logical_ring_elsq); \ func(has_master_unit_irq); \ + func(has_mslices); \ func(has_pooled_eu); \ func(has_rc6); \ func(has_rc6p); \ -- cgit v1.2.3 From 768fe28dd3dcea517d3c491cfe1b5cd768ee1334 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 29 Jul 2021 09:59:52 -0700 Subject: drm/i915/xehpsdv: Define steering tables Define and initialize the MMIO ranges for which XeHP SDV requires MSLICE and LNCF steering. Bspec: 66534 Cc: Tvrtko Ursulin Cc: Daniele Ceraolo Spurio Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20210729170008.2836648-3-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 19 ++++++++++++++++++- drivers/gpu/drm/i915/gt/intel_workarounds.c | 11 +++++++++-- 2 files changed, 27 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 3bd1a31cd356..93bfec40767a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -89,6 +89,20 @@ static const struct intel_mmio_range icl_l3bank_steering_table[] = { {}, }; +static const struct intel_mmio_range xehpsdv_mslice_steering_table[] = { + { 0x004000, 0x004AFF }, + { 0x00C800, 0x00CFFF }, + { 0x00DD00, 0x00DDFF }, + { 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */ + {}, +}; + +static const struct intel_mmio_range xehpsdv_lncf_steering_table[] = { + { 0x00B000, 0x00B0FF }, + { 0x00D800, 0x00D8FF }, + {}, +}; + static u16 slicemask(struct intel_gt *gt, int count) { u64 dss_mask = intel_sseu_get_subslices(>->info.sseu, 0); @@ -115,7 +129,10 @@ int intel_gt_init_mmio(struct intel_gt *gt) (intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) & GEN12_MEML3_EN_MASK); - if (GRAPHICS_VER(i915) >= 11 && + if (IS_XEHPSDV(i915)) { + gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table; + gt->steering_table[LNCF] = xehpsdv_lncf_steering_table; + } else if (GRAPHICS_VER(i915) >= 11 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) { gt->steering_table[L3BANK] = icl_l3bank_steering_table; gt->info.l3bank_mask = diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 29a76038fb97..b378ffc26413 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -934,7 +934,6 @@ icl_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) __add_mcr_wa(i915, wal, slice, subslice); } -__maybe_unused static void xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) { @@ -1136,10 +1135,18 @@ dg1_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) VSUNIT_CLKGATE_DIS_TGL); } +static void +xehpsdv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + xehp_init_mcr(&i915->gt, wal); +} + static void gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) { - if (IS_DG1(i915)) + if (IS_XEHPSDV(i915)) + xehpsdv_gt_workarounds_init(i915, wal); + else if (IS_DG1(i915)) dg1_gt_workarounds_init(i915, wal); else if (IS_TIGERLAKE(i915)) tgl_gt_workarounds_init(i915, wal); -- cgit v1.2.3 From 1705f22c86fb2654df22169c020d9d4ff193e47b Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 29 Jul 2021 09:59:54 -0700 Subject: drm/i915/dg2: Update steering tables DG2's replicated register ranges are almost the same at XeHP SDV with the exception of one LNCF sub-range that switches to gslice steering. We can re-use the XeHP SDV mslice steering table and just provide a DG2-specific LNCF steering table. Bspec: 66534 Cc: Daniele Ceraolo Spurio Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20210729170008.2836648-5-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 93bfec40767a..62d40c986642 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -103,6 +103,12 @@ static const struct intel_mmio_range xehpsdv_lncf_steering_table[] = { {}, }; +static const struct intel_mmio_range dg2_lncf_steering_table[] = { + { 0x00B000, 0x00B0FF }, + { 0x00D880, 0x00D8FF }, + {}, +}; + static u16 slicemask(struct intel_gt *gt, int count) { u64 dss_mask = intel_sseu_get_subslices(>->info.sseu, 0); @@ -129,7 +135,10 @@ int intel_gt_init_mmio(struct intel_gt *gt) (intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) & GEN12_MEML3_EN_MASK); - if (IS_XEHPSDV(i915)) { + if (IS_DG2(i915)) { + gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table; + gt->steering_table[LNCF] = dg2_lncf_steering_table; + } else if (IS_XEHPSDV(i915)) { gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table; gt->steering_table[LNCF] = xehpsdv_lncf_steering_table; } else if (GRAPHICS_VER(i915) >= 11 && -- cgit v1.2.3 From 927dfdd09d8c03ba100ed0c8c3915f8e1d1f5556 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 29 Jul 2021 09:59:55 -0700 Subject: drm/i915/dg2: Add SQIDI steering Although DG2_G10 platforms will always have all SQIDI's present and don't need steering for registers in a SQIDI MMIO range, this isn't true for DG2_G11 platforms; only SQIDI's 2 and 3 can be used on those. We handle SQIDI ranges a bit differently from other types of explicit steering. The SQIDI ranges belong to either the MCFG unit or the SF unit, both of which have their own dedicated steering registers and do not use the typical 0xFDC steering control that all other types of ranges use. Thus we only need to worry about picking a valid initial value for the MCFG and SF steering registers (0xFD0 and 0xFD8 respectively) at driver init; they won't change after we set them up so we don't need to worry about re-steering them explicitly at runtime. Given that any SQIDI value should work fine for DG2-G10 and XeHP SDV, while only values of 2 and 3 are valid for DG2-G11, we'll just initialize the MCFG and SF steering registers to a constant value of "2" for all XeHP-based platforms for simplicity --- that will work in all cases. Bspec: 66534 Cc: Radhakrishna Sripada Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20210729170008.2836648-6-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 28 +++++++++++++++++++++++----- drivers/gpu/drm/i915/i915_reg.h | 2 ++ 2 files changed, 25 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index b378ffc26413..aae609d7d85d 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -889,17 +889,24 @@ cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); } -static void __add_mcr_wa(struct drm_i915_private *i915, struct i915_wa_list *wal, - unsigned slice, unsigned subslice) +static void __set_mcr_steering(struct i915_wa_list *wal, + i915_reg_t steering_reg, + unsigned int slice, unsigned int subslice) { u32 mcr, mcr_mask; mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; - drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr); + wa_write_clr_set(wal, steering_reg, mcr_mask, mcr); +} + +static void __add_mcr_wa(struct drm_i915_private *i915, struct i915_wa_list *wal, + unsigned int slice, unsigned int subslice) +{ + drm_dbg(&i915->drm, "MCR slice=0x%x, subslice=0x%x\n", slice, subslice); - wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr); + __set_mcr_steering(wal, GEN8_MCR_SELECTOR, slice, subslice); } static void @@ -953,7 +960,6 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) * - L3 Bank (fusable) * - MSLICE (fusable) * - LNCF (sub-unit within mslice; always present if mslice is present) - * - SQIDI (always on) * * We'll do our default/implicit steering based on GSLICE (in the * sliceid field) and DSS (in the subsliceid field). If we can @@ -1003,6 +1009,18 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) WARN_ON(dss_mask >> (slice * GEN_DSS_PER_GSLICE) == 0); __add_mcr_wa(i915, wal, slice, subslice); + + /* + * SQIDI ranges are special because they use different steering + * registers than everything else we work with. On XeHP SDV and + * DG2-G10, any value in the steering registers will work fine since + * all instances are present, but DG2-G11 only has SQIDI instances at + * ID's 2 and 3, so we need to steer to one of those. For simplicity + * we'll just steer to a hardcoded "2" since that value will work + * everywhere. + */ + __set_mcr_steering(wal, MCFG_MCR_SELECTOR, 0, 2); + __set_mcr_steering(wal, SF_MCR_SELECTOR, 0, 2); } static void diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 7696f0d1c8bb..2113925084b0 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2695,6 +2695,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN12_SC_INSTDONE_EXTRA2 _MMIO(0x7108) #define GEN7_SAMPLER_INSTDONE _MMIO(0xe160) #define GEN7_ROW_INSTDONE _MMIO(0xe164) +#define MCFG_MCR_SELECTOR _MMIO(0xfd0) +#define SF_MCR_SELECTOR _MMIO(0xfd8) #define GEN8_MCR_SELECTOR _MMIO(0xfdc) #define GEN8_MCR_SLICE(slice) (((slice) & 3) << 26) #define GEN8_MCR_SLICE_MASK GEN8_MCR_SLICE(3) -- cgit v1.2.3