From 26594678d00f94c62f2e43162bd6d10fd0b74917 Mon Sep 17 00:00:00 2001 From: Leandro Ribeiro Date: Wed, 9 Jun 2021 20:00:38 -0300 Subject: drm/doc: document how userspace should find out CRTC index In this patch we add a section to document what userspace should do to find out the CRTC index. This is important as they may be many places in the documentation that need this, so it's better to just point to this section and avoid repetition. Signed-off-by: Leandro Ribeiro Reviewed-by: Pekka Paalanen Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210609230039.73307-2-leandro.ribeiro@collabora.com --- Documentation/gpu/drm-uapi.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'Documentation/gpu') diff --git a/Documentation/gpu/drm-uapi.rst b/Documentation/gpu/drm-uapi.rst index 04bdc7a91d53..7e51dd40bf6e 100644 --- a/Documentation/gpu/drm-uapi.rst +++ b/Documentation/gpu/drm-uapi.rst @@ -457,6 +457,19 @@ Userspace API Structures .. kernel-doc:: include/uapi/drm/drm_mode.h :doc: overview +.. _crtc_index: + +CRTC index +---------- + +CRTC's have both an object ID and an index, and they are not the same thing. +The index is used in cases where a densely packed identifier for a CRTC is +needed, for instance a bitmask of CRTC's. The member possible_crtcs of struct +drm_mode_get_plane is an example. + +DRM_IOCTL_MODE_GETRESOURCES populates a structure with an array of CRTC ID's, +and the CRTC index is its position in this array. + .. kernel-doc:: include/uapi/drm/drm.h :internal: -- cgit v1.2.3 From 3e28d37146db5dd49c469bc62a93ca791067d391 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 17 Jun 2021 18:06:31 -0700 Subject: drm/i915: Move priolist to new i915_sched_engine object Introduce i915_sched_engine object which is lower level data structure that i915_scheduler / generic code can operate on without touching execlist specific structures. This allows additional submission backends to be added without breaking the layering. Currently the execlists backend uses 1 of these object per each engine (physical or virtual) but future backends like the GuC will point to less instances utilizing the reference counting. This is a bit of detour to integrating the i915 with the DRM scheduler but this object will still exist when the DRM scheduler lands in the i915. It will however look a bit different. It will encapsulate the drm_gpu_scheduler object plus and common variables (to the backends) related to scheduling. Regardless this is a step in the right direction. This patch starts the aforementioned transition by moving the priolist into the i915_sched_engine object. v3: (Jason Ekstrand) Update comment next to intel_engine_cs.virtual Add kernel doc (Checkpatch) Fix double the in commit message v4: (Daniele) Update comment message. Add comment about subclass field Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210618010638.98941-2-matthew.brost@intel.com --- Documentation/gpu/i915.rst | 5 ++ drivers/gpu/drm/i915/gt/intel_engine_cs.c | 14 ++-- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 4 +- drivers/gpu/drm/i915/gt/intel_engine_types.h | 32 ++------- .../gpu/drm/i915/gt/intel_execlists_submission.c | 81 ++++++++++++---------- drivers/gpu/drm/i915/gt/mock_engine.c | 9 ++- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 19 ++--- drivers/gpu/drm/i915/i915_scheduler.c | 53 ++++++++++---- drivers/gpu/drm/i915/i915_scheduler.h | 18 +++++ drivers/gpu/drm/i915/i915_scheduler_types.h | 47 +++++++++++++ 10 files changed, 192 insertions(+), 90 deletions(-) (limited to 'Documentation/gpu') diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 42ce0196930a..1d5ce5676d35 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -425,6 +425,11 @@ User Batchbuffer Execution .. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c :doc: User command execution +Scheduling +---------- +.. kernel-doc:: drivers/gpu/drm/i915/i915_scheduler_types.h + :functions: i915_sched_engine + Logical Rings, Logical Ring Contexts and Execlists -------------------------------------------------- diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 04c1f5b9ce71..62a28c3c44a8 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -585,9 +585,6 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine) memset(execlists->pending, 0, sizeof(execlists->pending)); execlists->active = memset(execlists->inflight, 0, sizeof(execlists->inflight)); - - execlists->queue_priority_hint = INT_MIN; - execlists->queue = RB_ROOT_CACHED; } static void cleanup_status_page(struct intel_engine_cs *engine) @@ -714,6 +711,12 @@ static int engine_setup_common(struct intel_engine_cs *engine) goto err_status; } + engine->sched_engine = i915_sched_engine_create(ENGINE_PHYSICAL); + if (!engine->sched_engine) { + err = -ENOMEM; + goto err_sched_engine; + } + err = intel_engine_init_cmd_parser(engine); if (err) goto err_cmd_parser; @@ -737,6 +740,8 @@ static int engine_setup_common(struct intel_engine_cs *engine) return 0; err_cmd_parser: + i915_sched_engine_put(engine->sched_engine); +err_sched_engine: intel_breadcrumbs_free(engine->breadcrumbs); err_status: cleanup_status_page(engine); @@ -967,6 +972,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) GEM_BUG_ON(!list_empty(&engine->active.requests)); tasklet_kill(&engine->execlists.tasklet); /* flush the callback */ + i915_sched_engine_put(engine->sched_engine); intel_breadcrumbs_free(engine->breadcrumbs); intel_engine_fini_retire(engine); @@ -1253,7 +1259,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) intel_engine_flush_submission(engine); /* ELSP is empty, but there are ready requests? E.g. after reset */ - if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)) + if (!RB_EMPTY_ROOT(&engine->sched_engine->queue.rb_root)) return false; /* Ring stopped? */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 47f4397095e5..b6a00dd72808 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -275,12 +275,12 @@ static int __engine_park(struct intel_wakeref *wf) intel_breadcrumbs_park(engine->breadcrumbs); /* Must be reset upon idling, or we may miss the busy wakeup. */ - GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN); + GEM_BUG_ON(engine->sched_engine->queue_priority_hint != INT_MIN); if (engine->park) engine->park(engine); - engine->execlists.no_priolist = false; + engine->sched_engine->no_priolist = false; /* While gt calls i915_vma_parked(), we have to break the lock cycle */ intel_gt_pm_put_async(engine->gt); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index e113f93b3274..e41a9c3f9269 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -59,6 +59,7 @@ struct drm_i915_reg_table; struct i915_gem_context; struct i915_request; struct i915_sched_attr; +struct i915_sched_engine; struct intel_gt; struct intel_ring; struct intel_uncore; @@ -152,11 +153,6 @@ struct intel_engine_execlists { */ struct timer_list preempt; - /** - * @default_priolist: priority list for I915_PRIORITY_NORMAL - */ - struct i915_priolist default_priolist; - /** * @ccid: identifier for contexts submitted to this engine */ @@ -191,11 +187,6 @@ struct intel_engine_execlists { */ u32 reset_ccid; - /** - * @no_priolist: priority lists disabled - */ - bool no_priolist; - /** * @submit_reg: gen-specific execlist submission register * set to the ExecList Submission Port (elsp) register pre-Gen11 and to @@ -238,23 +229,10 @@ struct intel_engine_execlists { unsigned int port_mask; /** - * @queue_priority_hint: Highest pending priority. - * - * When we add requests into the queue, or adjust the priority of - * executing requests, we compute the maximum priority of those - * pending requests. We can then use this value to determine if - * we need to preempt the executing requests to service the queue. - * However, since the we may have recorded the priority of an inflight - * request we wanted to preempt but since completed, at the time of - * dequeuing the priority hint may no longer may match the highest - * available request priority. + * @virtual: Queue of requets on a virtual engine, sorted by priority. + * Each RB entry is a struct i915_priolist containing a list of requests + * of the same priority. */ - int queue_priority_hint; - - /** - * @queue: queue of requests, in priority lists - */ - struct rb_root_cached queue; struct rb_root_cached virtual; /** @@ -332,6 +310,8 @@ struct intel_engine_cs { struct list_head hold; /* ready requests, but on hold */ } active; + struct i915_sched_engine *sched_engine; + /* keep a request in reserve for a [pm] barrier under oom */ struct i915_request *request_pool; diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index fc77592d88a9..4f759559a792 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -273,11 +273,11 @@ static int effective_prio(const struct i915_request *rq) return prio; } -static int queue_prio(const struct intel_engine_execlists *execlists) +static int queue_prio(const struct i915_sched_engine *sched_engine) { struct rb_node *rb; - rb = rb_first_cached(&execlists->queue); + rb = rb_first_cached(&sched_engine->queue); if (!rb) return INT_MIN; @@ -318,7 +318,7 @@ static bool need_preempt(const struct intel_engine_cs *engine, * to preserve FIFO ordering of dependencies. */ last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1); - if (engine->execlists.queue_priority_hint <= last_prio) + if (engine->sched_engine->queue_priority_hint <= last_prio) return false; /* @@ -340,7 +340,7 @@ static bool need_preempt(const struct intel_engine_cs *engine, * context, it's priority would not exceed ELSP[0] aka last_prio. */ return max(virtual_prio(&engine->execlists), - queue_prio(&engine->execlists)) > last_prio; + queue_prio(engine->sched_engine)) > last_prio; } __maybe_unused static bool @@ -384,7 +384,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) prio = rq_prio(rq); pl = i915_sched_lookup_priolist(engine, prio); } - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->sched_engine->queue.rb_root)); list_move(&rq->sched.link, pl); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); @@ -1139,7 +1139,7 @@ static bool needs_timeslice(const struct intel_engine_cs *engine, } /* Otherwise, ELSP[0] is by itself, but may be waiting in the queue */ - if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)) { + if (!RB_EMPTY_ROOT(&engine->sched_engine->queue.rb_root)) { ENGINE_TRACE(engine, "timeslice required for queue\n"); return true; } @@ -1236,6 +1236,7 @@ static bool completed(const struct i915_request *rq) static void execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_sched_engine * const sched_engine = engine->sched_engine; struct i915_request **port = execlists->pending; struct i915_request ** const last_port = port + execlists->port_mask; struct i915_request *last, * const *active; @@ -1287,7 +1288,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) last->fence.context, last->fence.seqno, last->sched.attr.priority, - execlists->queue_priority_hint); + sched_engine->queue_priority_hint); record_preemption(execlists); /* @@ -1313,7 +1314,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) yesno(timer_expired(&execlists->timer)), last->fence.context, last->fence.seqno, rq_prio(last), - execlists->queue_priority_hint, + sched_engine->queue_priority_hint, yesno(timeslice_yield(execlists, last))); /* @@ -1384,7 +1385,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(rq->engine != &ve->base); GEM_BUG_ON(rq->context != &ve->context); - if (unlikely(rq_prio(rq) < queue_prio(execlists))) { + if (unlikely(rq_prio(rq) < queue_prio(sched_engine))) { spin_unlock(&ve->base.active.lock); break; } @@ -1405,7 +1406,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) yesno(engine != ve->siblings[0])); WRITE_ONCE(ve->request, NULL); - WRITE_ONCE(ve->base.execlists.queue_priority_hint, INT_MIN); + WRITE_ONCE(ve->base.sched_engine->queue_priority_hint, INT_MIN); rb = &ve->nodes[engine->id].rb; rb_erase_cached(rb, &execlists->virtual); @@ -1450,7 +1451,7 @@ unlock: break; } - while ((rb = rb_first_cached(&execlists->queue))) { + while ((rb = rb_first_cached(&sched_engine->queue))) { struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; @@ -1529,7 +1530,7 @@ unlock: } } - rb_erase_cached(&p->node, &execlists->queue); + rb_erase_cached(&p->node, &sched_engine->queue); i915_priolist_free(p); } done: @@ -1551,7 +1552,7 @@ done: * request triggering preemption on the next dequeue (or subsequent * interrupt for secondary ports). */ - execlists->queue_priority_hint = queue_prio(execlists); + sched_engine->queue_priority_hint = queue_prio(sched_engine); spin_unlock(&engine->active.lock); /* @@ -2123,8 +2124,8 @@ static void execlists_unhold(struct intel_engine_cs *engine, */ __execlists_unhold(rq); - if (rq_prio(rq) > engine->execlists.queue_priority_hint) { - engine->execlists.queue_priority_hint = rq_prio(rq); + if (rq_prio(rq) > engine->sched_engine->queue_priority_hint) { + engine->sched_engine->queue_priority_hint = rq_prio(rq); tasklet_hi_schedule(&engine->execlists.tasklet); } @@ -2455,12 +2456,12 @@ static void queue_request(struct intel_engine_cs *engine, static bool submit_queue(struct intel_engine_cs *engine, const struct i915_request *rq) { - struct intel_engine_execlists *execlists = &engine->execlists; + struct i915_sched_engine *sched_engine = engine->sched_engine; - if (rq_prio(rq) <= execlists->queue_priority_hint) + if (rq_prio(rq) <= sched_engine->queue_priority_hint) return false; - execlists->queue_priority_hint = rq_prio(rq); + sched_engine->queue_priority_hint = rq_prio(rq); return true; } @@ -2486,7 +2487,7 @@ static void execlists_submit_request(struct i915_request *request) } else { queue_request(engine, request); - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->sched_engine->queue.rb_root)); GEM_BUG_ON(list_empty(&request->sched.link)); if (submit_queue(engine, request)) @@ -2969,12 +2970,13 @@ static void nop_submission_tasklet(struct tasklet_struct *t) from_tasklet(engine, t, execlists.tasklet); /* The driver is wedged; don't process any more events. */ - WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN); + WRITE_ONCE(engine->sched_engine->queue_priority_hint, INT_MIN); } static void execlists_reset_cancel(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_sched_engine * const sched_engine = engine->sched_engine; struct i915_request *rq, *rn; struct rb_node *rb; unsigned long flags; @@ -3006,7 +3008,7 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) intel_engine_signal_breadcrumbs(engine); /* Flush the queued requests to the timeline list (for retiring). */ - while ((rb = rb_first_cached(&execlists->queue))) { + while ((rb = rb_first_cached(&sched_engine->queue))) { struct i915_priolist *p = to_priolist(rb); priolist_for_each_request_consume(rq, rn, p) { @@ -3016,7 +3018,7 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) } } - rb_erase_cached(&p->node, &execlists->queue); + rb_erase_cached(&p->node, &sched_engine->queue); i915_priolist_free(p); } @@ -3042,15 +3044,15 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) } i915_request_put(rq); - ve->base.execlists.queue_priority_hint = INT_MIN; + ve->base.sched_engine->queue_priority_hint = INT_MIN; } spin_unlock(&ve->base.active.lock); } /* Remaining _unready_ requests will be nop'ed when submitted */ - execlists->queue_priority_hint = INT_MIN; - execlists->queue = RB_ROOT_CACHED; + sched_engine->queue_priority_hint = INT_MIN; + sched_engine->queue = RB_ROOT_CACHED; GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet)); execlists->tasklet.callback = nop_submission_tasklet; @@ -3286,7 +3288,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) static struct list_head *virtual_queue(struct virtual_engine *ve) { - return &ve->base.execlists.default_priolist.requests; + return &ve->base.sched_engine->default_priolist.requests; } static void rcu_virtual_context_destroy(struct work_struct *wrk) @@ -3344,7 +3346,10 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk) lrc_fini(&ve->context); intel_context_fini(&ve->context); - intel_breadcrumbs_free(ve->base.breadcrumbs); + if (ve->base.breadcrumbs) + intel_breadcrumbs_free(ve->base.breadcrumbs); + if (ve->base.sched_engine) + i915_sched_engine_put(ve->base.sched_engine); intel_engine_free_request_pool(&ve->base); kfree(ve->bonds); @@ -3475,7 +3480,7 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n", rq->fence.context, rq->fence.seqno, - mask, ve->base.execlists.queue_priority_hint); + mask, ve->base.sched_engine->queue_priority_hint); return mask; } @@ -3484,7 +3489,7 @@ static void virtual_submission_tasklet(struct tasklet_struct *t) { struct virtual_engine * const ve = from_tasklet(ve, t, base.execlists.tasklet); - const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint); + const int prio = READ_ONCE(ve->base.sched_engine->queue_priority_hint); intel_engine_mask_t mask; unsigned int n; @@ -3552,7 +3557,7 @@ static void virtual_submission_tasklet(struct tasklet_struct *t) submit_engine: GEM_BUG_ON(RB_EMPTY_NODE(&node->rb)); node->prio = prio; - if (first && prio > sibling->execlists.queue_priority_hint) + if (first && prio > sibling->sched_engine->queue_priority_hint) tasklet_hi_schedule(&sibling->execlists.tasklet); unlock_engine: @@ -3588,7 +3593,7 @@ static void virtual_submit_request(struct i915_request *rq) i915_request_put(ve->request); } - ve->base.execlists.queue_priority_hint = rq_prio(rq); + ve->base.sched_engine->queue_priority_hint = rq_prio(rq); ve->request = i915_request_get(rq); GEM_BUG_ON(!list_empty(virtual_queue(ve))); @@ -3684,6 +3689,12 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, intel_engine_init_active(&ve->base, ENGINE_VIRTUAL); intel_engine_init_execlists(&ve->base); + ve->base.sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); + if (!ve->base.sched_engine) { + err = -ENOMEM; + goto err_put; + } + ve->base.cops = &virtual_context_ops; ve->base.request_alloc = execlists_request_alloc; @@ -3692,7 +3703,6 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, ve->base.bond_execute = virtual_bond_execute; INIT_LIST_HEAD(virtual_queue(ve)); - ve->base.execlists.queue_priority_hint = INT_MIN; tasklet_setup(&ve->base.execlists.tasklet, virtual_submission_tasklet); intel_context_init(&ve->context, &ve->base); @@ -3849,6 +3859,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, unsigned int max) { const struct intel_engine_execlists *execlists = &engine->execlists; + const struct i915_sched_engine *sched_engine = engine->sched_engine; struct i915_request *rq, *last; unsigned long flags; unsigned int count; @@ -3873,13 +3884,13 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, show_request(m, last, "\t\t", 0); } - if (execlists->queue_priority_hint != INT_MIN) + if (sched_engine->queue_priority_hint != INT_MIN) drm_printf(m, "\t\tQueue priority hint: %d\n", - READ_ONCE(execlists->queue_priority_hint)); + READ_ONCE(sched_engine->queue_priority_hint)); last = NULL; count = 0; - for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { + for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); priolist_for_each_request(rq, p) { diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 32589c6625e1..b1fdba13e900 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -283,6 +283,7 @@ static void mock_engine_release(struct intel_engine_cs *engine) GEM_BUG_ON(timer_pending(&mock->hw_delay)); + i915_sched_engine_put(engine->sched_engine); intel_breadcrumbs_free(engine->breadcrumbs); intel_context_unpin(engine->kernel_context); @@ -345,6 +346,10 @@ int mock_engine_init(struct intel_engine_cs *engine) { struct intel_context *ce; + engine->sched_engine = i915_sched_engine_create(ENGINE_MOCK); + if (!engine->sched_engine) + return -ENOMEM; + intel_engine_init_active(engine, ENGINE_MOCK); intel_engine_init_execlists(engine); intel_engine_init__pm(engine); @@ -352,7 +357,7 @@ int mock_engine_init(struct intel_engine_cs *engine) engine->breadcrumbs = intel_breadcrumbs_create(NULL); if (!engine->breadcrumbs) - return -ENOMEM; + goto err_schedule; ce = create_kernel_context(engine); if (IS_ERR(ce)) @@ -366,6 +371,8 @@ int mock_engine_init(struct intel_engine_cs *engine) err_breadcrumbs: intel_breadcrumbs_free(engine->breadcrumbs); +err_schedule: + i915_sched_engine_put(engine->sched_engine); return -ENOMEM; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 7c8ff9792f7b..5c5f33f40055 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -182,6 +182,7 @@ static void schedule_out(struct i915_request *rq) static void __guc_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_sched_engine * const sched_engine = engine->sched_engine; struct i915_request **first = execlists->inflight; struct i915_request ** const last_port = first + execlists->port_mask; struct i915_request *last = first[0]; @@ -204,7 +205,7 @@ static void __guc_dequeue(struct intel_engine_cs *engine) * event. */ port = first; - while ((rb = rb_first_cached(&execlists->queue))) { + while ((rb = rb_first_cached(&sched_engine->queue))) { struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; @@ -224,11 +225,11 @@ static void __guc_dequeue(struct intel_engine_cs *engine) last = rq; } - rb_erase_cached(&p->node, &execlists->queue); + rb_erase_cached(&p->node, &sched_engine->queue); i915_priolist_free(p); } done: - execlists->queue_priority_hint = + sched_engine->queue_priority_hint = rb ? to_priolist(rb)->priority : INT_MIN; if (submit) { *port = schedule_in(last, port - execlists->inflight); @@ -338,7 +339,7 @@ out_unlock: static void guc_reset_cancel(struct intel_engine_cs *engine) { - struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_sched_engine * const sched_engine = engine->sched_engine; struct i915_request *rq, *rn; struct rb_node *rb; unsigned long flags; @@ -368,7 +369,7 @@ static void guc_reset_cancel(struct intel_engine_cs *engine) } /* Flush the queued requests to the timeline list (for retiring). */ - while ((rb = rb_first_cached(&execlists->queue))) { + while ((rb = rb_first_cached(&sched_engine->queue))) { struct i915_priolist *p = to_priolist(rb); priolist_for_each_request_consume(rq, rn, p) { @@ -378,14 +379,14 @@ static void guc_reset_cancel(struct intel_engine_cs *engine) i915_request_mark_complete(rq); } - rb_erase_cached(&p->node, &execlists->queue); + rb_erase_cached(&p->node, &sched_engine->queue); i915_priolist_free(p); } /* Remaining _unready_ requests will be nop'ed when submitted */ - execlists->queue_priority_hint = INT_MIN; - execlists->queue = RB_ROOT_CACHED; + sched_engine->queue_priority_hint = INT_MIN; + sched_engine->queue = RB_ROOT_CACHED; spin_unlock_irqrestore(&engine->active.lock, flags); } @@ -514,7 +515,7 @@ static void guc_submit_request(struct i915_request *rq) queue_request(engine, rq, rq_prio(rq)); - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->sched_engine->queue.rb_root)); GEM_BUG_ON(list_empty(&rq->sched.link)); tasklet_hi_schedule(&engine->execlists.tasklet); diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index efa638c3acc7..2c31e07883ba 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -40,7 +40,7 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) return rb_entry(rb, struct i915_priolist, node); } -static void assert_priolists(struct intel_engine_execlists * const execlists) +static void assert_priolists(struct i915_sched_engine * const sched_engine) { struct rb_node *rb; long last_prio; @@ -48,11 +48,11 @@ static void assert_priolists(struct intel_engine_execlists * const execlists) if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) return; - GEM_BUG_ON(rb_first_cached(&execlists->queue) != - rb_first(&execlists->queue.rb_root)); + GEM_BUG_ON(rb_first_cached(&sched_engine->queue) != + rb_first(&sched_engine->queue.rb_root)); last_prio = INT_MAX; - for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { + for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { const struct i915_priolist *p = to_priolist(rb); GEM_BUG_ON(p->priority > last_prio); @@ -63,21 +63,21 @@ static void assert_priolists(struct intel_engine_execlists * const execlists) struct list_head * i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio) { - struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_sched_engine * const sched_engine = engine->sched_engine; struct i915_priolist *p; struct rb_node **parent, *rb; bool first = true; lockdep_assert_held(&engine->active.lock); - assert_priolists(execlists); + assert_priolists(sched_engine); - if (unlikely(execlists->no_priolist)) + if (unlikely(sched_engine->no_priolist)) prio = I915_PRIORITY_NORMAL; find_priolist: /* most positive priority is scheduled first, equal priorities fifo */ rb = NULL; - parent = &execlists->queue.rb_root.rb_node; + parent = &sched_engine->queue.rb_root.rb_node; while (*parent) { rb = *parent; p = to_priolist(rb); @@ -92,7 +92,7 @@ find_priolist: } if (prio == I915_PRIORITY_NORMAL) { - p = &execlists->default_priolist; + p = &sched_engine->default_priolist; } else { p = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC); /* Convert an allocation failure to a priority bump */ @@ -107,7 +107,7 @@ find_priolist: * requests, so if userspace lied about their * dependencies that reordering may be visible. */ - execlists->no_priolist = true; + sched_engine->no_priolist = true; goto find_priolist; } } @@ -116,7 +116,7 @@ find_priolist: INIT_LIST_HEAD(&p->requests); rb_link_node(&p->node, rb, parent); - rb_insert_color_cached(&p->node, &execlists->queue, first); + rb_insert_color_cached(&p->node, &sched_engine->queue, first); return &p->requests; } @@ -184,7 +184,7 @@ static void kick_submission(struct intel_engine_cs *engine, * We only need to kick the tasklet once for the high priority * new context we add into the queue. */ - if (prio <= engine->execlists.queue_priority_hint) + if (prio <= engine->sched_engine->queue_priority_hint) return; rcu_read_lock(); @@ -208,7 +208,7 @@ static void kick_submission(struct intel_engine_cs *engine, inflight->fence.context, inflight->fence.seqno, inflight->sched.attr.priority); - engine->execlists.queue_priority_hint = prio; + engine->sched_engine->queue_priority_hint = prio; if (need_preempt(prio, rq_prio(inflight))) tasklet_hi_schedule(&engine->execlists.tasklet); @@ -489,6 +489,33 @@ void i915_request_show_with_schedule(struct drm_printer *m, rcu_read_unlock(); } +void i915_sched_engine_free(struct kref *kref) +{ + struct i915_sched_engine *sched_engine = + container_of(kref, typeof(*sched_engine), ref); + + kfree(sched_engine); +} + +struct i915_sched_engine * +i915_sched_engine_create(unsigned int subclass) +{ + struct i915_sched_engine *sched_engine; + + sched_engine = kzalloc(sizeof(*sched_engine), GFP_KERNEL); + if (!sched_engine) + return NULL; + + kref_init(&sched_engine->ref); + + sched_engine->queue = RB_ROOT_CACHED; + sched_engine->queue_priority_hint = INT_MIN; + + /* subclass is used in a follow up patch */ + + return sched_engine; +} + static void i915_global_scheduler_shrink(void) { kmem_cache_shrink(global.slab_dependencies); diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 858a0938f47a..91a04e34cac5 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -48,6 +48,24 @@ static inline void i915_priolist_free(struct i915_priolist *p) __i915_priolist_free(p); } +struct i915_sched_engine * +i915_sched_engine_create(unsigned int subclass); + +void i915_sched_engine_free(struct kref *kref); + +static inline struct i915_sched_engine * +i915_sched_engine_get(struct i915_sched_engine *sched_engine) +{ + kref_get(&sched_engine->ref); + return sched_engine; +} + +static inline void +i915_sched_engine_put(struct i915_sched_engine *sched_engine) +{ + kref_put(&sched_engine->ref, i915_sched_engine_free); +} + void i915_request_show_with_schedule(struct drm_printer *m, const struct i915_request *rq, const char *prefix, diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index 343ed44d5ed4..4a7c9f06b40b 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -91,4 +91,51 @@ struct i915_dependency { &(rq__)->sched.signalers_list, \ signal_link) +/** + * struct i915_sched_engine - scheduler engine + * + * A schedule engine represents a submission queue with different priority + * bands. It contains all the common state (relative to the backend) to queue, + * track, and submit a request. + * + * This object at the moment is quite i915 specific but will transition into a + * container for the drm_gpu_scheduler plus a few other variables once the i915 + * is integrated with the DRM scheduler. + */ +struct i915_sched_engine { + /** + * @ref: reference count of schedule engine object + */ + struct kref ref; + + /** + * @default_priolist: priority list for I915_PRIORITY_NORMAL + */ + struct i915_priolist default_priolist; + + /** + * @queue_priority_hint: Highest pending priority. + * + * When we add requests into the queue, or adjust the priority of + * executing requests, we compute the maximum priority of those + * pending requests. We can then use this value to determine if + * we need to preempt the executing requests to service the queue. + * However, since the we may have recorded the priority of an inflight + * request we wanted to preempt but since completed, at the time of + * dequeuing the priority hint may no longer may match the highest + * available request priority. + */ + int queue_priority_hint; + + /** + * @queue: queue of requests, in priority lists + */ + struct rb_root_cached queue; + + /** + * @no_priolist: priority lists disabled + */ + bool no_priolist; +}; + #endif /* _I915_SCHEDULER_TYPES_H_ */ -- cgit v1.2.3 From bfde26df7af4e8ea894008dfda1d7d54a834dcd4 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 15 Jun 2021 17:13:02 -0700 Subject: drm/i915/doc: Include GuC ABI documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GuC ABI documentation is now ready to be included in i915.rst Signed-off-by: Michal Wajdeczko Signed-off-by: Matthew Brost Cc: Piotr Piórkowski Reviewed-by: Matthew Brost Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210616001302.84233-4-matthew.brost@intel.com --- Documentation/gpu/i915.rst | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'Documentation/gpu') diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 1d5ce5676d35..e6fd9608e9c6 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -523,6 +523,14 @@ GuC-based command submission .. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c :doc: GuC-based command submission +GuC ABI +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h +.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h +.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h +.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h + HuC --- .. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_huc.c -- cgit v1.2.3 From 577729533cdc4e37a8c230e404a44ad7a3ff4eda Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 18 Jun 2021 16:00:36 +0100 Subject: drm/i915: Document the Virtual Engine uAPI A little bit of documentation covering the topics of engine discovery, context engine maps and virtual engines. It is not very detailed but supposed to be a starting point of giving a brief high level overview of general principles and intended use cases. v2: * Have the text in uapi header and link from there. v4: * Link from driver-uapi.rst. Signed-off-by: Tvrtko Ursulin Cc: Daniel Vetter Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210618150036.2507653-1-tvrtko.ursulin@linux.intel.com --- Documentation/gpu/driver-uapi.rst | 21 +++++ include/uapi/drm/i915_drm.h | 188 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 209 insertions(+) (limited to 'Documentation/gpu') diff --git a/Documentation/gpu/driver-uapi.rst b/Documentation/gpu/driver-uapi.rst index 4411e6919a3d..27d0fbe33e87 100644 --- a/Documentation/gpu/driver-uapi.rst +++ b/Documentation/gpu/driver-uapi.rst @@ -5,4 +5,25 @@ DRM Driver uAPI drm/i915 uAPI ============= +Engine Discovery uAPI +--------------------- + +.. kernel-doc:: include/uapi/drm/i915_drm.h + :doc: Engine Discovery uAPI + +Context Engine Map uAPI +----------------------- + +.. kernel-doc:: include/uapi/drm/i915_drm.h + :doc: Context Engine Map uAPI + +Virtual Engine uAPI +------------------- + +.. kernel-doc:: include/uapi/drm/i915_drm.h + :doc: Virtual Engine uAPI + +i915_drm.h +---------- .. kernel-doc:: include/uapi/drm/i915_drm.h + :internal: diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index a1cb4aa035a9..2f70c48567c0 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1806,6 +1806,69 @@ struct drm_i915_gem_context_param_sseu { __u32 rsvd; }; +/** + * DOC: Virtual Engine uAPI + * + * Virtual engine is a concept where userspace is able to configure a set of + * physical engines, submit a batch buffer, and let the driver execute it on any + * engine from the set as it sees fit. + * + * This is primarily useful on parts which have multiple instances of a same + * class engine, like for example GT3+ Skylake parts with their two VCS engines. + * + * For instance userspace can enumerate all engines of a certain class using the + * previously described `Engine Discovery uAPI`_. After that userspace can + * create a GEM context with a placeholder slot for the virtual engine (using + * `I915_ENGINE_CLASS_INVALID` and `I915_ENGINE_CLASS_INVALID_NONE` for class + * and instance respectively) and finally using the + * `I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE` extension place a virtual engine in + * the same reserved slot. + * + * Example of creating a virtual engine and submitting a batch buffer to it: + * + * .. code-block:: C + * + * I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(virtual, 2) = { + * .base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE, + * .engine_index = 0, // Place this virtual engine into engine map slot 0 + * .num_siblings = 2, + * .engines = { { I915_ENGINE_CLASS_VIDEO, 0 }, + * { I915_ENGINE_CLASS_VIDEO, 1 }, }, + * }; + * I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1) = { + * .engines = { { I915_ENGINE_CLASS_INVALID, + * I915_ENGINE_CLASS_INVALID_NONE } }, + * .extensions = to_user_pointer(&virtual), // Chains after load_balance extension + * }; + * struct drm_i915_gem_context_create_ext_setparam p_engines = { + * .base = { + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, + * }, + * .param = { + * .param = I915_CONTEXT_PARAM_ENGINES, + * .value = to_user_pointer(&engines), + * .size = sizeof(engines), + * }, + * }; + * struct drm_i915_gem_context_create_ext create = { + * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, + * .extensions = to_user_pointer(&p_engines); + * }; + * + * ctx_id = gem_context_create_ext(drm_fd, &create); + * + * // Now we have created a GEM context with its engine map containing a + * // single virtual engine. Submissions to this slot can go either to + * // vcs0 or vcs1, depending on the load balancing algorithm used inside + * // the driver. The load balancing is dynamic from one batch buffer to + * // another and transparent to userspace. + * + * ... + * execbuf.rsvd1 = ctx_id; + * execbuf.flags = 0; // Submits to index 0 which is the virtual engine + * gem_execbuf(drm_fd, &execbuf); + */ + /* * i915_context_engines_load_balance: * @@ -1882,6 +1945,61 @@ struct i915_context_engines_bond { struct i915_engine_class_instance engines[N__]; \ } __attribute__((packed)) name__ +/** + * DOC: Context Engine Map uAPI + * + * Context engine map is a new way of addressing engines when submitting batch- + * buffers, replacing the existing way of using identifiers like `I915_EXEC_BLT` + * inside the flags field of `struct drm_i915_gem_execbuffer2`. + * + * To use it created GEM contexts need to be configured with a list of engines + * the user is intending to submit to. This is accomplished using the + * `I915_CONTEXT_PARAM_ENGINES` parameter and `struct + * i915_context_param_engines`. + * + * For such contexts the `I915_EXEC_RING_MASK` field becomes an index into the + * configured map. + * + * Example of creating such context and submitting against it: + * + * .. code-block:: C + * + * I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2) = { + * .engines = { { I915_ENGINE_CLASS_RENDER, 0 }, + * { I915_ENGINE_CLASS_COPY, 0 } } + * }; + * struct drm_i915_gem_context_create_ext_setparam p_engines = { + * .base = { + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, + * }, + * .param = { + * .param = I915_CONTEXT_PARAM_ENGINES, + * .value = to_user_pointer(&engines), + * .size = sizeof(engines), + * }, + * }; + * struct drm_i915_gem_context_create_ext create = { + * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, + * .extensions = to_user_pointer(&p_engines); + * }; + * + * ctx_id = gem_context_create_ext(drm_fd, &create); + * + * // We have now created a GEM context with two engines in the map: + * // Index 0 points to rcs0 while index 1 points to bcs0. Other engines + * // will not be accessible from this context. + * + * ... + * execbuf.rsvd1 = ctx_id; + * execbuf.flags = 0; // Submits to index 0, which is rcs0 for this context + * gem_execbuf(drm_fd, &execbuf); + * + * ... + * execbuf.rsvd1 = ctx_id; + * execbuf.flags = 1; // Submits to index 0, which is bcs0 for this context + * gem_execbuf(drm_fd, &execbuf); + */ + struct i915_context_param_engines { __u64 extensions; /* linked chain of extension blocks, 0 terminates */ #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */ @@ -2375,6 +2493,76 @@ struct drm_i915_query_topology_info { __u8 data[]; }; +/** + * DOC: Engine Discovery uAPI + * + * Engine discovery uAPI is a way of enumerating physical engines present in a + * GPU associated with an open i915 DRM file descriptor. This supersedes the old + * way of using `DRM_IOCTL_I915_GETPARAM` and engine identifiers like + * `I915_PARAM_HAS_BLT`. + * + * The need for this interface came starting with Icelake and newer GPUs, which + * started to establish a pattern of having multiple engines of a same class, + * where not all instances were always completely functionally equivalent. + * + * Entry point for this uapi is `DRM_IOCTL_I915_QUERY` with the + * `DRM_I915_QUERY_ENGINE_INFO` as the queried item id. + * + * Example for getting the list of engines: + * + * .. code-block:: C + * + * struct drm_i915_query_engine_info *info; + * struct drm_i915_query_item item = { + * .query_id = DRM_I915_QUERY_ENGINE_INFO; + * }; + * struct drm_i915_query query = { + * .num_items = 1, + * .items_ptr = (uintptr_t)&item, + * }; + * int err, i; + * + * // First query the size of the blob we need, this needs to be large + * // enough to hold our array of engines. The kernel will fill out the + * // item.length for us, which is the number of bytes we need. + * // + * // Alternatively a large buffer can be allocated straight away enabling + * // querying in one pass, in which case item.length should contain the + * // length of the provided buffer. + * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); + * if (err) ... + * + * info = calloc(1, item.length); + * // Now that we allocated the required number of bytes, we call the ioctl + * // again, this time with the data_ptr pointing to our newly allocated + * // blob, which the kernel can then populate with info on all engines. + * item.data_ptr = (uintptr_t)&info, + * + * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); + * if (err) ... + * + * // We can now access each engine in the array + * for (i = 0; i < info->num_engines; i++) { + * struct drm_i915_engine_info einfo = info->engines[i]; + * u16 class = einfo.engine.class; + * u16 instance = einfo.engine.instance; + * .... + * } + * + * free(info); + * + * Each of the enumerated engines, apart from being defined by its class and + * instance (see `struct i915_engine_class_instance`), also can have flags and + * capabilities defined as documented in i915_drm.h. + * + * For instance video engines which support HEVC encoding will have the + * `I915_VIDEO_CLASS_CAPABILITY_HEVC` capability bit set. + * + * Engine discovery only fully comes to its own when combined with the new way + * of addressing engines when submitting batch buffers using contexts with + * engine maps configured. + */ + /** * struct drm_i915_engine_info * -- cgit v1.2.3 From f587623b78ff538f5d9ef1241e58b91ea70a4daf Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 29 Jun 2021 12:35:10 -0700 Subject: drm/doc/rfc: i915 GuC submission / DRM scheduler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add entry for i915 GuC submission / DRM scheduler integration plan. Follow up patch with details of new parallel submission uAPI to come. v2: (Daniel Vetter) - Expand explaination of why bonding isn't supported for GuC submission - CC some of the DRM scheduler maintainers - Add priority inheritance / boosting use case - Add reasoning for removing in order assumptions (Daniel Stone) - Add links to priority spec v4: (Tvrtko) - Add TODOs section (Daniel Vetter) - Pull in 1 line from following patch v5: (Checkpatch) - Fix typos Cc: Christian König Cc: Luben Tuikov Cc: Alex Deucher Cc: Steven Price Cc: Jon Bloomfield Cc: Dave Airlie Cc: Daniel Vetter Cc: Jason Ekstrand Cc: dri-devel@lists.freedesktop.org Signed-off-by: Matthew Brost Reviewed-by: Daniel Vetter Acked-by: Dave Airlie Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210629193511.124099-2-matthew.brost@intel.com --- Documentation/gpu/rfc/i915_scheduler.rst | 91 ++++++++++++++++++++++++++++++++ Documentation/gpu/rfc/index.rst | 4 ++ 2 files changed, 95 insertions(+) create mode 100644 Documentation/gpu/rfc/i915_scheduler.rst (limited to 'Documentation/gpu') diff --git a/Documentation/gpu/rfc/i915_scheduler.rst b/Documentation/gpu/rfc/i915_scheduler.rst new file mode 100644 index 000000000000..7acd386a6b49 --- /dev/null +++ b/Documentation/gpu/rfc/i915_scheduler.rst @@ -0,0 +1,91 @@ +========================================= +I915 GuC Submission/DRM Scheduler Section +========================================= + +Upstream plan +============= +For upstream the overall plan for landing GuC submission and integrating the +i915 with the DRM scheduler is: + +* Merge basic GuC submission + * Basic submission support for all gen11+ platforms + * Not enabled by default on any current platforms but can be enabled via + modparam enable_guc + * Lots of rework will need to be done to integrate with DRM scheduler so + no need to nit pick everything in the code, it just should be + functional, no major coding style / layering errors, and not regress + execlists + * Update IGTs / selftests as needed to work with GuC submission + * Enable CI on supported platforms for a baseline + * Rework / get CI heathly for GuC submission in place as needed +* Merge new parallel submission uAPI + * Bonding uAPI completely incompatible with GuC submission, plus it has + severe design issues in general, which is why we want to retire it no + matter what + * New uAPI adds I915_CONTEXT_ENGINES_EXT_PARALLEL context setup step + which configures a slot with N contexts + * After I915_CONTEXT_ENGINES_EXT_PARALLEL a user can submit N batches to + a slot in a single execbuf IOCTL and the batches run on the GPU in + paralllel + * Initially only for GuC submission but execlists can be supported if + needed +* Convert the i915 to use the DRM scheduler + * GuC submission backend fully integrated with DRM scheduler + * All request queues removed from backend (e.g. all backpressure + handled in DRM scheduler) + * Resets / cancels hook in DRM scheduler + * Watchdog hooks into DRM scheduler + * Lots of complexity of the GuC backend can be pulled out once + integrated with DRM scheduler (e.g. state machine gets + simplier, locking gets simplier, etc...) + * Execlists backend will minimum required to hook in the DRM scheduler + * Legacy interface + * Features like timeslicing / preemption / virtual engines would + be difficult to integrate with the DRM scheduler and these + features are not required for GuC submission as the GuC does + these things for us + * ROI low on fully integrating into DRM scheduler + * Fully integrating would add lots of complexity to DRM + scheduler + * Port i915 priority inheritance / boosting feature in DRM scheduler + * Used for i915 page flip, may be useful to other DRM drivers as + well + * Will be an optional feature in the DRM scheduler + * Remove in-order completion assumptions from DRM scheduler + * Even when using the DRM scheduler the backends will handle + preemption, timeslicing, etc... so it is possible for jobs to + finish out of order + * Pull out i915 priority levels and use DRM priority levels + * Optimize DRM scheduler as needed + +TODOs for GuC submission upstream +================================= + +* Need an update to GuC firmware / i915 to enable error state capture +* Open source tool to decode GuC logs +* Public GuC spec + +New uAPI for basic GuC submission +================================= +No major changes are required to the uAPI for basic GuC submission. The only +change is a new scheduler attribute: I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP. +This attribute indicates the 2k i915 user priority levels are statically mapped +into 3 levels as follows: + +* -1k to -1 Low priority +* 0 Medium priority +* 1 to 1k High priority + +This is needed because the GuC only has 4 priority bands. The highest priority +band is reserved with the kernel. This aligns with the DRM scheduler priority +levels too. + +Spec references: +---------------- +* https://www.khronos.org/registry/EGL/extensions/IMG/EGL_IMG_context_priority.txt +* https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap5.html#devsandqueues-priority +* https://spec.oneapi.com/level-zero/latest/core/api.html#ze-command-queue-priority-t + +New parallel submission uAPI +============================ +Details to come in a following patch. diff --git a/Documentation/gpu/rfc/index.rst b/Documentation/gpu/rfc/index.rst index 05670442ca1b..91e93a705230 100644 --- a/Documentation/gpu/rfc/index.rst +++ b/Documentation/gpu/rfc/index.rst @@ -19,3 +19,7 @@ host such documentation: .. toctree:: i915_gem_lmem.rst + +.. toctree:: + + i915_scheduler.rst -- cgit v1.2.3 From 0454a490bdebea78e93e325a020f9f80908b9ed6 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 29 Jun 2021 12:35:11 -0700 Subject: drm/doc/rfc: i915 new parallel submission uAPI plan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add entry for i915 new parallel submission uAPI plan. v2: (Daniel Vetter): - Expand logical order explaination - Add dummy header - Only allow N BBs in execbuf IOCTL - Configure parallel submission per slot not per gem context v3: (Marcin Ślusarz): - Lot's of typos / bad english fixed (Tvrtko Ursulin): - Consistent pseudo code, clean up wording in descriptions v4: (Daniel Vetter) - Drop flags - Add kernel doc - Reword a few things / fix typos (Tvrtko) - Reword a few things / fix typos v5: (Checkpatch) - Fix typos (Docs) - Fix warning Cc: Tvrtko Ursulin Cc: Tony Ye CC: Carl Zhang Cc: Daniel Vetter Cc: Jason Ekstrand Signed-off-by: Matthew Brost Acked-by: Daniel Vetter Acked-by: Tony Ye Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210629193511.124099-3-matthew.brost@intel.com --- Documentation/gpu/rfc/i915_parallel_execbuf.h | 122 ++++++++++++++++++++++++++ Documentation/gpu/rfc/i915_scheduler.rst | 59 ++++++++++++- 2 files changed, 180 insertions(+), 1 deletion(-) create mode 100644 Documentation/gpu/rfc/i915_parallel_execbuf.h (limited to 'Documentation/gpu') diff --git a/Documentation/gpu/rfc/i915_parallel_execbuf.h b/Documentation/gpu/rfc/i915_parallel_execbuf.h new file mode 100644 index 000000000000..8cbe2c4e0172 --- /dev/null +++ b/Documentation/gpu/rfc/i915_parallel_execbuf.h @@ -0,0 +1,122 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */ + +/** + * struct drm_i915_context_engines_parallel_submit - Configure engine for + * parallel submission. + * + * Setup a slot in the context engine map to allow multiple BBs to be submitted + * in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU + * in parallel. Multiple hardware contexts are created internally in the i915 + * run these BBs. Once a slot is configured for N BBs only N BBs can be + * submitted in each execbuf IOCTL and this is implicit behavior e.g. The user + * doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how + * many BBs there are based on the slot's configuration. The N BBs are the last + * N buffer objects or first N if I915_EXEC_BATCH_FIRST is set. + * + * The default placement behavior is to create implicit bonds between each + * context if each context maps to more than 1 physical engine (e.g. context is + * a virtual engine). Also we only allow contexts of same engine class and these + * contexts must be in logically contiguous order. Examples of the placement + * behavior described below. Lastly, the default is to not allow BBs to + * preempted mid BB rather insert coordinated preemption on all hardware + * contexts between each set of BBs. Flags may be added in the future to change + * both of these default behaviors. + * + * Returns -EINVAL if hardware context placement configuration is invalid or if + * the placement configuration isn't supported on the platform / submission + * interface. + * Returns -ENODEV if extension isn't supported on the platform / submission + * interface. + * + * .. code-block:: none + * + * Example 1 pseudo code: + * CS[X] = generic engine of same class, logical instance X + * INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE + * set_engines(INVALID) + * set_parallel(engine_index=0, width=2, num_siblings=1, + * engines=CS[0],CS[1]) + * + * Results in the following valid placement: + * CS[0], CS[1] + * + * Example 2 pseudo code: + * CS[X] = generic engine of same class, logical instance X + * INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE + * set_engines(INVALID) + * set_parallel(engine_index=0, width=2, num_siblings=2, + * engines=CS[0],CS[2],CS[1],CS[3]) + * + * Results in the following valid placements: + * CS[0], CS[1] + * CS[2], CS[3] + * + * This can also be thought of as 2 virtual engines described by 2-D array + * in the engines the field with bonds placed between each index of the + * virtual engines. e.g. CS[0] is bonded to CS[1], CS[2] is bonded to + * CS[3]. + * VE[0] = CS[0], CS[2] + * VE[1] = CS[1], CS[3] + * + * Example 3 pseudo code: + * CS[X] = generic engine of same class, logical instance X + * INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE + * set_engines(INVALID) + * set_parallel(engine_index=0, width=2, num_siblings=2, + * engines=CS[0],CS[1],CS[1],CS[3]) + * + * Results in the following valid and invalid placements: + * CS[0], CS[1] + * CS[1], CS[3] - Not logical contiguous, return -EINVAL + */ +struct drm_i915_context_engines_parallel_submit { + /** + * @base: base user extension. + */ + struct i915_user_extension base; + + /** + * @engine_index: slot for parallel engine + */ + __u16 engine_index; + + /** + * @width: number of contexts per parallel engine + */ + __u16 width; + + /** + * @num_siblings: number of siblings per context + */ + __u16 num_siblings; + + /** + * @mbz16: reserved for future use; must be zero + */ + __u16 mbz16; + + /** + * @flags: all undefined flags must be zero, currently not defined flags + */ + __u64 flags; + + /** + * @mbz64: reserved for future use; must be zero + */ + __u64 mbz64[3]; + + /** + * @engines: 2-d array of engine instances to configure parallel engine + * + * length = width (i) * num_siblings (j) + * index = j + i * num_siblings + */ + struct i915_engine_class_instance engines[0]; + +} __packed; + diff --git a/Documentation/gpu/rfc/i915_scheduler.rst b/Documentation/gpu/rfc/i915_scheduler.rst index 7acd386a6b49..cbda75065dad 100644 --- a/Documentation/gpu/rfc/i915_scheduler.rst +++ b/Documentation/gpu/rfc/i915_scheduler.rst @@ -88,4 +88,61 @@ Spec references: New parallel submission uAPI ============================ -Details to come in a following patch. +The existing bonding uAPI is completely broken with GuC submission because +whether a submission is a single context submit or parallel submit isn't known +until execbuf time activated via the I915_SUBMIT_FENCE. To submit multiple +contexts in parallel with the GuC the context must be explicitly registered with +N contexts and all N contexts must be submitted in a single command to the GuC. +The GuC interfaces do not support dynamically changing between N contexts as the +bonding uAPI does. Hence the need for a new parallel submission interface. Also +the legacy bonding uAPI is quite confusing and not intuitive at all. Furthermore +I915_SUBMIT_FENCE is by design a future fence, so not really something we should +continue to support. + +The new parallel submission uAPI consists of 3 parts: + +* Export engines logical mapping +* A 'set_parallel' extension to configure contexts for parallel + submission +* Extend execbuf2 IOCTL to support submitting N BBs in a single IOCTL + +Export engines logical mapping +------------------------------ +Certain use cases require BBs to be placed on engine instances in logical order +(e.g. split-frame on gen11+). The logical mapping of engine instances can change +based on fusing. Rather than making UMDs be aware of fusing, simply expose the +logical mapping with the existing query engine info IOCTL. Also the GuC +submission interface currently only supports submitting multiple contexts to +engines in logical order which is a new requirement compared to execlists. +Lastly, all current platforms have at most 2 engine instances and the logical +order is the same as uAPI order. This will change on platforms with more than 2 +engine instances. + +A single bit will be added to drm_i915_engine_info.flags indicating that the +logical instance has been returned and a new field, +drm_i915_engine_info.logical_instance, returns the logical instance. + +A 'set_parallel' extension to configure contexts for parallel submission +------------------------------------------------------------------------ +The 'set_parallel' extension configures a slot for parallel submission of N BBs. +It is a setup step that must be called before using any of the contexts. See +I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE or I915_CONTEXT_ENGINES_EXT_BOND for +similar existing examples. Once a slot is configured for parallel submission the +execbuf2 IOCTL can be called submitting N BBs in a single IOCTL. Initially only +supports GuC submission. Execlists supports can be added later if needed. + +Add I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT and +drm_i915_context_engines_parallel_submit to the uAPI to implement this +extension. + +.. kernel-doc:: Documentation/gpu/rfc/i915_parallel_execbuf.h + :functions: drm_i915_context_engines_parallel_submit + +Extend execbuf2 IOCTL to support submitting N BBs in a single IOCTL +------------------------------------------------------------------- +Contexts that have been configured with the 'set_parallel' extension can only +submit N BBs in a single execbuf2 IOCTL. The BBs are either the last N objects +in the drm_i915_gem_exec_object2 list or the first N if I915_EXEC_BATCH_FIRST is +set. The number of BBs is implicit based on the slot submitted and how it has +been configured by 'set_parallel' or other extensions. No uAPI changes are +required to the execbuf2 IOCTL. -- cgit v1.2.3 From fb786a48ac0dd63e702c05c35426f2e024f5749f Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Sat, 26 Jun 2021 10:26:55 +0100 Subject: drm/vkms: update the current status of todo list Update: - debugging issues on igt testcases - plane composition features: add primary plane improvements - suggestions of good tasks to start working on vkms Drop: - syzkaller bug report: what triggered the warning was replaced by shmem functions at https://patchwork.freedesktop.org/patch/394614/ - overlay plane: this feature was added by https://patchwork.freedesktop.org/patch/430941/ Signed-off-by: Melissa Wen Reviewed-by: Rodrigo Siqueira Link: https://patchwork.freedesktop.org/patch/msgid/20210626092655.ghmmt2yux5klrne7@smtp.gmail.com --- Documentation/gpu/vkms.rst | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) (limited to 'Documentation/gpu') diff --git a/Documentation/gpu/vkms.rst b/Documentation/gpu/vkms.rst index 2c9b376da5ca..941f0e7e5eef 100644 --- a/Documentation/gpu/vkms.rst +++ b/Documentation/gpu/vkms.rst @@ -98,9 +98,17 @@ with VKMS maintainers. IGT better support ------------------ -- Investigate: (1) test cases on kms_plane that are failing due to timeout on - capturing CRC; (2) when running kms_flip test cases in sequence, some - successful individual test cases are failing randomly. +Debugging: + +- kms_plane: some test cases are failing due to timeout on capturing CRC; + +- kms_flip: when running test cases in sequence, some successful individual + test cases are failing randomly; when individually, some successful test + cases display in the log the following error:: + + [drm:vkms_prepare_fb [vkms]] ERROR vmap failed: -4 + +Virtual hardware (vblank-less) mode: - VKMS already has support for vblanks simulated via hrtimers, which can be tested with kms_flip test; in some way, we can say that VKMS already mimics @@ -116,7 +124,17 @@ Add Plane Features There's lots of plane features we could add support for: -- Real overlay planes, not just cursor. +- Multiple overlay planes. [Good to get started] + +- Clearing primary plane: clear primary plane before plane composition (at the + start) for correctness of pixel blend ops. It also guarantees alpha channel + is cleared in the target buffer for stable crc. [Good to get started] + +- ARGB format on primary plane: blend the primary plane into background with + translucent alpha. + +- Support when the primary plane isn't exactly matching the output size: blend + the primary plane into the black background. - Full alpha blending on all planes. @@ -129,13 +147,8 @@ There's lots of plane features we could add support for: cursor api). For all of these, we also want to review the igt test coverage and make sure -all relevant igt testcases work on vkms. - -Prime Buffer Sharing --------------------- - -- Syzbot report - WARNING in vkms_gem_free_object: - https://syzkaller.appspot.com/bug?extid=e7ad70d406e74d8fc9d0 +all relevant igt testcases work on vkms. They are good options for internship +project. Runtime Configuration --------------------- @@ -153,7 +166,7 @@ module. Use/Test-cases: the refresh rate. The currently proposed solution is to expose vkms configuration through -configfs. All existing module options should be supported through configfs +configfs. All existing module options should be supported through configfs too. Writeback support @@ -162,6 +175,7 @@ Writeback support - The writeback and CRC capture operations share the use of composer_enabled boolean to ensure vblanks. Probably, when these operations work together, composer_enabled needs to refcounting the composer state to proper work. + [Good to get started] - Add support for cloned writeback outputs and related test cases using a cloned output in the IGT kms_writeback. -- cgit v1.2.3 From f8a9a5c2e9058bcfc3a3d5b444d10fd8f20cb29e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:20 -0500 Subject: drm/i915: Add gem/i915_gem_context.h to the docs In order to prevent kernel doc warnings, also fill out docs for any missing fields and fix those that forgot the "@". Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-16-jason@jlekstrand.net --- Documentation/gpu/i915.rst | 2 ++ drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 43 +++++++++++++++++++---- 2 files changed, 38 insertions(+), 7 deletions(-) (limited to 'Documentation/gpu') diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index e6fd9608e9c6..204ebdaadb45 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -422,6 +422,8 @@ Batchbuffer Parsing User Batchbuffer Execution -------------------------- +.. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_context_types.h + .. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c :doc: User command execution diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index df76767f0c41..5f0673a2129f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -30,19 +30,39 @@ struct i915_address_space; struct intel_timeline; struct intel_ring; +/** + * struct i915_gem_engines - A set of engines + */ struct i915_gem_engines { union { + /** @link: Link in i915_gem_context::stale::engines */ struct list_head link; + + /** @rcu: RCU to use when freeing */ struct rcu_head rcu; }; + + /** @fence: Fence used for delayed destruction of engines */ struct i915_sw_fence fence; + + /** @ctx: i915_gem_context backpointer */ struct i915_gem_context *ctx; + + /** @num_engines: Number of engines in this set */ unsigned int num_engines; + + /** @engines: Array of engines */ struct intel_context *engines[]; }; +/** + * struct i915_gem_engines_iter - Iterator for an i915_gem_engines set + */ struct i915_gem_engines_iter { + /** @idx: Index into i915_gem_engines::engines */ unsigned int idx; + + /** @engines: Engine set being iterated */ const struct i915_gem_engines *engines; }; @@ -53,10 +73,10 @@ struct i915_gem_engines_iter { * logical hardware state for a particular client. */ struct i915_gem_context { - /** i915: i915 device backpointer */ + /** @i915: i915 device backpointer */ struct drm_i915_private *i915; - /** file_priv: owning file descriptor */ + /** @file_priv: owning file descriptor */ struct drm_i915_file_private *file_priv; /** @@ -81,7 +101,9 @@ struct i915_gem_context { * CONTEXT_USER_ENGINES flag is set). */ struct i915_gem_engines __rcu *engines; - struct mutex engines_mutex; /* guards writes to engines */ + + /** @engines_mutex: guards writes to engines */ + struct mutex engines_mutex; /** * @syncobj: Shared timeline syncobj @@ -118,7 +140,7 @@ struct i915_gem_context { */ struct pid *pid; - /** link: place with &drm_i915_private.context_list */ + /** @link: place with &drm_i915_private.context_list */ struct list_head link; /** @@ -153,11 +175,13 @@ struct i915_gem_context { #define CONTEXT_CLOSED 0 #define CONTEXT_USER_ENGINES 1 + /** @mutex: guards everything that isn't engines or handles_vma */ struct mutex mutex; + /** @sched: scheduler parameters */ struct i915_sched_attr sched; - /** guilty_count: How many times this context has caused a GPU hang. */ + /** @guilty_count: How many times this context has caused a GPU hang. */ atomic_t guilty_count; /** * @active_count: How many times this context was active during a GPU @@ -171,15 +195,17 @@ struct i915_gem_context { unsigned long hang_timestamp[2]; #define CONTEXT_FAST_HANG_JIFFIES (120 * HZ) /* 3 hangs within 120s? Banned! */ - /** remap_slice: Bitmask of cache lines that need remapping */ + /** @remap_slice: Bitmask of cache lines that need remapping */ u8 remap_slice; /** - * handles_vma: rbtree to look up our context specific obj/vma for + * @handles_vma: rbtree to look up our context specific obj/vma for * the user handle. (user handles are per fd, but the binding is * per vm, which may be one per context or shared with the global GTT) */ struct radix_tree_root handles_vma; + + /** @lut_mutex: Locks handles_vma */ struct mutex lut_mutex; /** @@ -191,8 +217,11 @@ struct i915_gem_context { */ char name[TASK_COMM_LEN + 8]; + /** @stale: tracks stale engines to be destroyed */ struct { + /** @lock: guards engines */ spinlock_t lock; + /** @engines: list of stale engines */ struct list_head engines; } stale; }; -- cgit v1.2.3 From 1cbf731ef3a17b3dd4c22ed0c634ac126d1a4876 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 9 Jul 2021 13:38:42 +0200 Subject: drm/i915: Fix missing docbook chapters for i915 uapi. I noticed when grepping for DOC: that those were defined in the header, but not actually used. Fix it by removing all chapters and the internal annotation, so the docbook generated chapters are used. This reverts the changes to driver-uapi.rst by the referenced commit 577729533cdc ("drm/i915: Document the Virtual Engine uAPI") Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20210715120842.806605-1-maarten.lankhorst@linux.intel.com Fixes: 577729533cdc ("drm/i915: Document the Virtual Engine uAPI") Cc: Tvrtko Ursulin Reviewed-by: Matthew Auld --- Documentation/gpu/driver-uapi.rst | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'Documentation/gpu') diff --git a/Documentation/gpu/driver-uapi.rst b/Documentation/gpu/driver-uapi.rst index 27d0fbe33e87..4411e6919a3d 100644 --- a/Documentation/gpu/driver-uapi.rst +++ b/Documentation/gpu/driver-uapi.rst @@ -5,25 +5,4 @@ DRM Driver uAPI drm/i915 uAPI ============= -Engine Discovery uAPI ---------------------- - -.. kernel-doc:: include/uapi/drm/i915_drm.h - :doc: Engine Discovery uAPI - -Context Engine Map uAPI ------------------------ - -.. kernel-doc:: include/uapi/drm/i915_drm.h - :doc: Context Engine Map uAPI - -Virtual Engine uAPI -------------------- - -.. kernel-doc:: include/uapi/drm/i915_drm.h - :doc: Virtual Engine uAPI - -i915_drm.h ----------- .. kernel-doc:: include/uapi/drm/i915_drm.h - :internal: -- cgit v1.2.3 From c18c36dc75fefa8e1672d3ae2d565a9c7136d38d Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 20 Jul 2021 16:35:44 +0200 Subject: Documentation: gpu: Mention the requirements for new properties New KMS properties come with a bunch of requirements to avoid each driver from running their own, inconsistent, set of properties, eventually leading to issues like property conflicts, inconsistencies between drivers and semantics, etc. Let's document what we expect. Acked-by: Dave Airlie Reviewed-by: Pekka Paalanen Reviewed-by: Daniel Vetter Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20210720143544.571760-1-maxime@cerno.tech --- Documentation/gpu/drm-kms.rst | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'Documentation/gpu') diff --git a/Documentation/gpu/drm-kms.rst b/Documentation/gpu/drm-kms.rst index 87e5023e3f55..12e25119e563 100644 --- a/Documentation/gpu/drm-kms.rst +++ b/Documentation/gpu/drm-kms.rst @@ -463,6 +463,35 @@ KMS Properties This section of the documentation is primarily aimed at user-space developers. For the driver APIs, see the other sections. +Requirements +------------ + +KMS drivers might need to add extra properties to support new features. Each +new property introduced in a driver needs to meet a few requirements, in +addition to the one mentioned above: + +* It must be standardized, documenting: + + * The full, exact, name string; + * If the property is an enum, all the valid value name strings; + * What values are accepted, and what these values mean; + * What the property does and how it can be used; + * How the property might interact with other, existing properties. + +* It must provide a generic helper in the core code to register that + property on the object it attaches to. + +* Its content must be decoded by the core and provided in the object's + associated state structure. That includes anything drivers might want + to precompute, like struct drm_clip_rect for planes. + +* Its initial state must match the behavior prior to the property + introduction. This might be a fixed value matching what the hardware + does, or it may be inherited from the state the firmware left the + system in during boot. + +* An IGT test must be submitted where reasonable. + Property Types and Blob Property Support ---------------------------------------- -- cgit v1.2.3 From ba6cd766e0bf933611dc66fcb86f72ac80a446bc Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 23 Jul 2021 10:34:57 +0200 Subject: drm/plane: Move drm_plane_enable_fb_damage_clips into core MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're trying to have a fairly strict split between core functionality that defines the uapi, including the docs, and the helper functions to implement it. Move drm_plane_enable_fb_damage_clips and associated kerneldoc into drm_plane from drm_damage_helper.c to fix this. Reviewed-by: José Roberto de Souza Cc: Ville Syrjälä Cc: Gwan-gyeong Mun Cc: José Roberto de Souza Cc: Hans de Goede Signed-off-by: Daniel Vetter Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210723083457.696939-3-daniel.vetter@ffwll.ch --- Documentation/gpu/drm-kms.rst | 4 +-- drivers/gpu/drm/drm_damage_helper.c | 54 ------------------------------------- drivers/gpu/drm/drm_plane.c | 54 +++++++++++++++++++++++++++++++++++++ include/drm/drm_damage_helper.h | 1 - include/drm/drm_plane.h | 3 ++- 5 files changed, 58 insertions(+), 58 deletions(-) (limited to 'Documentation/gpu') diff --git a/Documentation/gpu/drm-kms.rst b/Documentation/gpu/drm-kms.rst index 12e25119e563..0cc21f6aaef5 100644 --- a/Documentation/gpu/drm-kms.rst +++ b/Documentation/gpu/drm-kms.rst @@ -537,8 +537,8 @@ Plane Composition Properties Damage Tracking Properties -------------------------- -.. kernel-doc:: drivers/gpu/drm/drm_damage_helper.c - :doc: overview +.. kernel-doc:: drivers/gpu/drm/drm_plane.c + :doc: damage tracking Color Management Properties --------------------------- diff --git a/drivers/gpu/drm/drm_damage_helper.c b/drivers/gpu/drm/drm_damage_helper.c index eb69b7123af5..245959dad7bb 100644 --- a/drivers/gpu/drm/drm_damage_helper.c +++ b/drivers/gpu/drm/drm_damage_helper.c @@ -34,44 +34,6 @@ #include #include -/** - * DOC: overview - * - * FB_DAMAGE_CLIPS is an optional plane property which provides a means to - * specify a list of damage rectangles on a plane in framebuffer coordinates of - * the framebuffer attached to the plane. In current context damage is the area - * of plane framebuffer that has changed since last plane update (also called - * page-flip), irrespective of whether currently attached framebuffer is same as - * framebuffer attached during last plane update or not. - * - * FB_DAMAGE_CLIPS is a hint to kernel which could be helpful for some drivers - * to optimize internally especially for virtual devices where each framebuffer - * change needs to be transmitted over network, usb, etc. - * - * Since FB_DAMAGE_CLIPS is a hint so it is an optional property. User-space can - * ignore damage clips property and in that case driver will do a full plane - * update. In case damage clips are provided then it is guaranteed that the area - * inside damage clips will be updated to plane. For efficiency driver can do - * full update or can update more than specified in damage clips. Since driver - * is free to read more, user-space must always render the entire visible - * framebuffer. Otherwise there can be corruptions. Also, if a user-space - * provides damage clips which doesn't encompass the actual damage to - * framebuffer (since last plane update) can result in incorrect rendering. - * - * FB_DAMAGE_CLIPS is a blob property with the layout of blob data is simply an - * array of &drm_mode_rect. Unlike plane &drm_plane_state.src coordinates, - * damage clips are not in 16.16 fixed point. Similar to plane src in - * framebuffer, damage clips cannot be negative. In damage clip, x1/y1 are - * inclusive and x2/y2 are exclusive. While kernel does not error for overlapped - * damage clips, it is strongly discouraged. - * - * Drivers that are interested in damage interface for plane should enable - * FB_DAMAGE_CLIPS property by calling drm_plane_enable_fb_damage_clips(). - * Drivers implementing damage can use drm_atomic_helper_damage_iter_init() and - * drm_atomic_helper_damage_iter_next() helper iterator function to get damage - * rectangles clipped to &drm_plane_state.src. - */ - static void convert_clip_rect_to_rect(const struct drm_clip_rect *src, struct drm_mode_rect *dest, uint32_t num_clips, uint32_t src_inc) @@ -87,22 +49,6 @@ static void convert_clip_rect_to_rect(const struct drm_clip_rect *src, } } -/** - * drm_plane_enable_fb_damage_clips - Enables plane fb damage clips property. - * @plane: Plane on which to enable damage clips property. - * - * This function lets driver to enable the damage clips property on a plane. - */ -void drm_plane_enable_fb_damage_clips(struct drm_plane *plane) -{ - struct drm_device *dev = plane->dev; - struct drm_mode_config *config = &dev->mode_config; - - drm_object_attach_property(&plane->base, config->prop_fb_damage_clips, - 0); -} -EXPORT_SYMBOL(drm_plane_enable_fb_damage_clips); - /** * drm_atomic_helper_check_plane_damage - Verify plane damage on atomic_check. * @state: The driver state object. diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index f61315b61174..f5fe8255597c 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -1397,6 +1397,60 @@ out: return ret; } +/** + * DOC: damage tracking + * + * FB_DAMAGE_CLIPS is an optional plane property which provides a means to + * specify a list of damage rectangles on a plane in framebuffer coordinates of + * the framebuffer attached to the plane. In current context damage is the area + * of plane framebuffer that has changed since last plane update (also called + * page-flip), irrespective of whether currently attached framebuffer is same as + * framebuffer attached during last plane update or not. + * + * FB_DAMAGE_CLIPS is a hint to kernel which could be helpful for some drivers + * to optimize internally especially for virtual devices where each framebuffer + * change needs to be transmitted over network, usb, etc. + * + * Since FB_DAMAGE_CLIPS is a hint so it is an optional property. User-space can + * ignore damage clips property and in that case driver will do a full plane + * update. In case damage clips are provided then it is guaranteed that the area + * inside damage clips will be updated to plane. For efficiency driver can do + * full update or can update more than specified in damage clips. Since driver + * is free to read more, user-space must always render the entire visible + * framebuffer. Otherwise there can be corruptions. Also, if a user-space + * provides damage clips which doesn't encompass the actual damage to + * framebuffer (since last plane update) can result in incorrect rendering. + * + * FB_DAMAGE_CLIPS is a blob property with the layout of blob data is simply an + * array of &drm_mode_rect. Unlike plane &drm_plane_state.src coordinates, + * damage clips are not in 16.16 fixed point. Similar to plane src in + * framebuffer, damage clips cannot be negative. In damage clip, x1/y1 are + * inclusive and x2/y2 are exclusive. While kernel does not error for overlapped + * damage clips, it is strongly discouraged. + * + * Drivers that are interested in damage interface for plane should enable + * FB_DAMAGE_CLIPS property by calling drm_plane_enable_fb_damage_clips(). + * Drivers implementing damage can use drm_atomic_helper_damage_iter_init() and + * drm_atomic_helper_damage_iter_next() helper iterator function to get damage + * rectangles clipped to &drm_plane_state.src. + */ + +/** + * drm_plane_enable_fb_damage_clips - Enables plane fb damage clips property. + * @plane: Plane on which to enable damage clips property. + * + * This function lets driver to enable the damage clips property on a plane. + */ +void drm_plane_enable_fb_damage_clips(struct drm_plane *plane) +{ + struct drm_device *dev = plane->dev; + struct drm_mode_config *config = &dev->mode_config; + + drm_object_attach_property(&plane->base, config->prop_fb_damage_clips, + 0); +} +EXPORT_SYMBOL(drm_plane_enable_fb_damage_clips); + /** * drm_plane_get_damage_clips_count - Returns damage clips count. * @state: Plane state. diff --git a/include/drm/drm_damage_helper.h b/include/drm/drm_damage_helper.h index 1ae8bce6a5ce..effda42cce31 100644 --- a/include/drm/drm_damage_helper.h +++ b/include/drm/drm_damage_helper.h @@ -64,7 +64,6 @@ struct drm_atomic_helper_damage_iter { bool full_update; }; -void drm_plane_enable_fb_damage_clips(struct drm_plane *plane); void drm_atomic_helper_check_plane_damage(struct drm_atomic_state *state, struct drm_plane_state *plane_state); int drm_atomic_helper_dirtyfb(struct drm_framebuffer *fb, diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index a2684aab8372..fed97e35626f 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -897,9 +897,10 @@ static inline struct drm_plane *drm_plane_find(struct drm_device *dev, bool drm_any_plane_has_format(struct drm_device *dev, u32 format, u64 modifier); + +void drm_plane_enable_fb_damage_clips(struct drm_plane *plane); unsigned int drm_plane_get_damage_clips_count(const struct drm_plane_state *state); - struct drm_mode_rect * drm_plane_get_damage_clips(const struct drm_plane_state *state); -- cgit v1.2.3 From d793b8f732d6acbc6390be7342fb2e92b069dc7f Mon Sep 17 00:00:00 2001 From: Desmond Cheong Zhi Xi Date: Wed, 28 Jul 2021 18:27:39 +0800 Subject: drm: clarify usage of drm leases We make the following changes to the documentation of drm leases to make it easier to reason about their usage. In particular, we clarify the lifetime and locking rules of lease fields in drm_master: 1. Make it clear that &drm_device.mode_config.idr_mutex protects the lease idr and list structures for drm_master. The lessor field itself doesn't need to be protected as it doesn't change after it's set in drm_lease_create. 2. Add descriptions for the lifetime of lessors and leases. 3. Add an overview DOC: section in drm-uapi.rst that defines the terminology for drm leasing, and explains how leases work and why they're used. Signed-off-by: Desmond Cheong Zhi Xi Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210728102739.441543-1-desmondcheongzx@gmail.com --- Documentation/gpu/drm-uapi.rst | 9 ++++++ drivers/gpu/drm/drm_lease.c | 51 ++++++++++++++++++++++++++++++++ include/drm/drm_auth.h | 67 +++++++++++++++++++++++++++++++++++------- 3 files changed, 116 insertions(+), 11 deletions(-) (limited to 'Documentation/gpu') diff --git a/Documentation/gpu/drm-uapi.rst b/Documentation/gpu/drm-uapi.rst index 7e51dd40bf6e..199afb503ab1 100644 --- a/Documentation/gpu/drm-uapi.rst +++ b/Documentation/gpu/drm-uapi.rst @@ -37,6 +37,15 @@ Primary Nodes, DRM Master and Authentication .. kernel-doc:: include/drm/drm_auth.h :internal: + +.. _drm_leasing: + +DRM Display Resource Leasing +============================ + +.. kernel-doc:: drivers/gpu/drm/drm_lease.c + :doc: drm leasing + Open-Source Userspace Requirements ================================== diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c index 92eac73d9001..79be797e8689 100644 --- a/drivers/gpu/drm/drm_lease.c +++ b/drivers/gpu/drm/drm_lease.c @@ -15,6 +15,57 @@ #include "drm_crtc_internal.h" #include "drm_internal.h" +/** + * DOC: drm leasing + * + * DRM leases provide information about whether a DRM master may control a DRM + * mode setting object. This enables the creation of multiple DRM masters that + * manage subsets of display resources. + * + * The original DRM master of a device 'owns' the available drm resources. It + * may create additional DRM masters and 'lease' resources which it controls + * to the new DRM master. This gives the new DRM master control over the + * leased resources until the owner revokes the lease, or the new DRM master + * is closed. Some helpful terminology: + * + * - An 'owner' is a &struct drm_master that is not leasing objects from + * another &struct drm_master, and hence 'owns' the objects. The owner can be + * identified as the &struct drm_master for which &drm_master.lessor is NULL. + * + * - A 'lessor' is a &struct drm_master which is leasing objects to one or more + * other &struct drm_master. Currently, lessees are not allowed to + * create sub-leases, hence the lessor is the same as the owner. + * + * - A 'lessee' is a &struct drm_master which is leasing objects from some + * other &struct drm_master. Each lessee only leases resources from a single + * lessor recorded in &drm_master.lessor, and holds the set of objects that + * it is leasing in &drm_master.leases. + * + * - A 'lease' is a contract between the lessor and lessee that identifies + * which resources may be controlled by the lessee. All of the resources + * that are leased must be owned by or leased to the lessor, and lessors are + * not permitted to lease the same object to multiple lessees. + * + * The set of objects any &struct drm_master 'controls' is limited to the set + * of objects it leases (for lessees) or all objects (for owners). + * + * Objects not controlled by a &struct drm_master cannot be modified through + * the various state manipulating ioctls, and any state reported back to user + * space will be edited to make them appear idle and/or unusable. For + * instance, connectors always report 'disconnected', while encoders + * report no possible crtcs or clones. + * + * Since each lessee may lease objects from a single lessor, display resource + * leases form a tree of &struct drm_master. As lessees are currently not + * allowed to create sub-leases, the tree depth is limited to 1. All of + * these get activated simultaneously when the top level device owner changes + * through the SETMASTER or DROPMASTER IOCTL, so &drm_device.master points to + * the owner at the top of the lease tree (i.e. the &struct drm_master for which + * &drm_master.lessor is NULL). The full list of lessees that are leasing + * objects from the owner can be searched via the owner's + * &drm_master.lessee_idr. + */ + #define drm_for_each_lessee(lessee, lessor) \ list_for_each_entry((lessee), &(lessor)->lessees, lessee_list) diff --git a/include/drm/drm_auth.h b/include/drm/drm_auth.h index f99d3417f304..ba248ca8866f 100644 --- a/include/drm/drm_auth.h +++ b/include/drm/drm_auth.h @@ -58,12 +58,6 @@ struct drm_lock_data { * @refcount: Refcount for this master object. * @dev: Link back to the DRM device * @driver_priv: Pointer to driver-private information. - * @lessor: Lease holder - * @lessee_id: id for lessees. Owners always have id 0 - * @lessee_list: other lessees of the same master - * @lessees: drm_masters leasing from this one - * @leases: Objects leased to this drm_master. - * @lessee_idr: All lessees under this owner (only used where lessor == NULL) * * Note that master structures are only relevant for the legacy/primary device * nodes, hence there can only be one per device, not one per drm_minor. @@ -88,17 +82,68 @@ struct drm_master { struct idr magic_map; void *driver_priv; - /* Tree of display resource leases, each of which is a drm_master struct - * All of these get activated simultaneously, so drm_device master points - * at the top of the tree (for which lessor is NULL). Protected by - * &drm_device.mode_config.idr_mutex. + /** + * @lessor: + * + * Lease grantor, only set if this &struct drm_master represents a + * lessee holding a lease of objects from @lessor. Full owners of the + * device have this set to NULL. + * + * The lessor does not change once it's set in drm_lease_create(), and + * each lessee holds a reference to its lessor that it releases upon + * being destroyed in drm_lease_destroy(). + * + * See also the :ref:`section on display resource leasing + * `. */ - struct drm_master *lessor; + + /** + * @lessee_id: + * + * ID for lessees. Owners (i.e. @lessor is NULL) always have ID 0. + * Protected by &drm_device.mode_config's &drm_mode_config.idr_mutex. + */ int lessee_id; + + /** + * @lessee_list: + * + * List entry of lessees of @lessor, where they are linked to @lessees. + * Not used for owners. Protected by &drm_device.mode_config's + * &drm_mode_config.idr_mutex. + */ struct list_head lessee_list; + + /** + * @lessees: + * + * List of drm_masters leasing from this one. Protected by + * &drm_device.mode_config's &drm_mode_config.idr_mutex. + * + * This list is empty if no leases have been granted, or if all lessees + * have been destroyed. Since lessors are referenced by all their + * lessees, this master cannot be destroyed unless the list is empty. + */ struct list_head lessees; + + /** + * @leases: + * + * Objects leased to this drm_master. Protected by + * &drm_device.mode_config's &drm_mode_config.idr_mutex. + * + * Objects are leased all together in drm_lease_create(), and are + * removed all together when the lease is revoked. + */ struct idr leases; + + /** + * @lessee_idr: + * + * All lessees under this owner (only used where @lessor is NULL). + * Protected by &drm_device.mode_config's &drm_mode_config.idr_mutex. + */ struct idr lessee_idr; /* private: */ #if IS_ENABLED(CONFIG_DRM_LEGACY) -- cgit v1.2.3 From 6e5b47a4f1dde38d42b054cc6d16b6840de08bd2 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Mon, 2 Aug 2021 07:28:35 +0000 Subject: drm: document drm_mode_get_property It's not obvious what the fields mean and how they should be used. The most important detail is the link to drm_property.flags, which describes how property types work. v2: document enum drm_mode_property_enum, add ref to "Modeset Base Object Abstraction" (Daniel) Signed-off-by: Simon Ser Acked-by: Pekka Paalanen Acked-by: Daniel Vetter Cc: Leandro Ribeiro Link: https://patchwork.freedesktop.org/patch/msgid/20210802072826.500078-1-contact@emersion.fr --- Documentation/gpu/drm-kms.rst | 2 ++ include/uapi/drm/drm_mode.h | 60 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 58 insertions(+), 4 deletions(-) (limited to 'Documentation/gpu') diff --git a/Documentation/gpu/drm-kms.rst b/Documentation/gpu/drm-kms.rst index 0cc21f6aaef5..1ef7951ded5e 100644 --- a/Documentation/gpu/drm-kms.rst +++ b/Documentation/gpu/drm-kms.rst @@ -159,6 +159,8 @@ KMS Core Structures and Functions .. kernel-doc:: drivers/gpu/drm/drm_mode_config.c :export: +.. _kms_base_object_abstraction: + Modeset Base Object Abstraction =============================== diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 98bf130feda5..90c55383f1ee 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -541,22 +541,74 @@ struct drm_mode_get_connector { */ #define DRM_MODE_PROP_ATOMIC 0x80000000 +/** + * struct drm_mode_property_enum - Description for an enum/bitfield entry. + * @value: numeric value for this enum entry. + * @name: symbolic name for this enum entry. + * + * See struct drm_property_enum for details. + */ struct drm_mode_property_enum { __u64 value; char name[DRM_PROP_NAME_LEN]; }; +/** + * struct drm_mode_get_property - Get property metadata. + * + * User-space can perform a GETPROPERTY ioctl to retrieve information about a + * property. The same property may be attached to multiple objects, see + * "Modeset Base Object Abstraction". + * + * The meaning of the @values_ptr field changes depending on the property type. + * See &drm_property.flags for more details. + * + * The @enum_blob_ptr and @count_enum_blobs fields are only meaningful when the + * property has the type &DRM_MODE_PROP_ENUM or &DRM_MODE_PROP_BITMASK. For + * backwards compatibility, the kernel will always set @count_enum_blobs to + * zero when the property has the type &DRM_MODE_PROP_BLOB. User-space must + * ignore these two fields if the property has a different type. + * + * User-space is expected to retrieve values and enums by performing this ioctl + * at least twice: the first time to retrieve the number of elements, the + * second time to retrieve the elements themselves. + * + * To retrieve the number of elements, set @count_values and @count_enum_blobs + * to zero, then call the ioctl. @count_values will be updated with the number + * of elements. If the property has the type &DRM_MODE_PROP_ENUM or + * &DRM_MODE_PROP_BITMASK, @count_enum_blobs will be updated as well. + * + * To retrieve the elements themselves, allocate an array for @values_ptr and + * set @count_values to its capacity. If the property has the type + * &DRM_MODE_PROP_ENUM or &DRM_MODE_PROP_BITMASK, allocate an array for + * @enum_blob_ptr and set @count_enum_blobs to its capacity. Calling the ioctl + * again will fill the arrays. + */ struct drm_mode_get_property { - __u64 values_ptr; /* values and blob lengths */ - __u64 enum_blob_ptr; /* enum and blob id ptrs */ + /** @values_ptr: Pointer to a ``__u64`` array. */ + __u64 values_ptr; + /** @enum_blob_ptr: Pointer to a struct drm_mode_property_enum array. */ + __u64 enum_blob_ptr; + /** + * @prop_id: Object ID of the property which should be retrieved. Set + * by the caller. + */ __u32 prop_id; + /** + * @flags: ``DRM_MODE_PROP_*`` bitfield. See &drm_property.flags for + * a definition of the flags. + */ __u32 flags; + /** + * @name: Symbolic property name. User-space should use this field to + * recognize properties. + */ char name[DRM_PROP_NAME_LEN]; + /** @count_values: Number of elements in @values_ptr. */ __u32 count_values; - /* This is only used to count enum values, not blobs. The _blobs is - * simply because of a historical reason, i.e. backwards compat. */ + /** @count_enum_blobs: Number of elements in @enum_blob_ptr. */ __u32 count_enum_blobs; }; -- cgit v1.2.3