summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gt
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.c15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_breadcrumbs.c52
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h46
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c16
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pool.h34
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h36
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gpu_commands.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c (renamed from drivers/gpu/drm/i915/gt/intel_engine_pool.c)114
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h37
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h (renamed from drivers/gpu/drm/i915/gt/intel_engine_pool_types.h)15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.c10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_types.h11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c320
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.c8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_renderstate.c16
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.c8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.h5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c28
-rw-r--r--drivers/gpu/drm/i915/gt/mock_engine.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_context.c12
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_lrc.c86
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_ring_submission.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rps.c56
-rw-r--r--drivers/gpu/drm/i915/gt/sysfs_engines.c94
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c2
29 files changed, 689 insertions, 354 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 94e746af8926..699125928272 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -389,6 +389,16 @@ static int gen8_ppgtt_alloc(struct i915_address_space *vm,
return err;
}
+static __always_inline void
+write_pte(gen8_pte_t *pte, const gen8_pte_t val)
+{
+ /* Magic delays? Or can we refine these to flush all in one pass? */
+ *pte = val;
+ wmb(); /* cpu to cache */
+ clflush(pte); /* cache to memory */
+ wmb(); /* visible to all */
+}
+
static __always_inline u64
gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
struct i915_page_directory *pdp,
@@ -405,7 +415,8 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
do {
GEM_BUG_ON(iter->sg->length < I915_GTT_PAGE_SIZE);
- vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
+ write_pte(&vaddr[gen8_pd_index(idx, 0)],
+ pte_encode | iter->dma);
iter->dma += I915_GTT_PAGE_SIZE;
if (iter->dma >= iter->max) {
@@ -487,7 +498,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
do {
GEM_BUG_ON(iter->sg->length < page_size);
- vaddr[index++] = encode | iter->dma;
+ write_pte(&vaddr[index++], encode | iter->dma);
start += page_size;
iter->dma += page_size;
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index cbedba857d43..d907d538176e 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -142,6 +142,18 @@ static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
intel_engine_add_retire(engine, tl);
}
+static void __signal_request(struct i915_request *rq, struct list_head *signals)
+{
+ GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
+ clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+
+ if (!__dma_fence_signal(&rq->fence))
+ return;
+
+ i915_request_get(rq);
+ list_add_tail(&rq->signal_link, signals);
+}
+
static void signal_irq_work(struct irq_work *work)
{
struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
@@ -155,6 +167,8 @@ static void signal_irq_work(struct irq_work *work)
if (b->irq_armed && list_empty(&b->signalers))
__intel_breadcrumbs_disarm_irq(b);
+ list_splice_init(&b->signaled_requests, &signal);
+
list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) {
GEM_BUG_ON(list_empty(&ce->signals));
@@ -163,24 +177,15 @@ static void signal_irq_work(struct irq_work *work)
list_entry(pos, typeof(*rq), signal_link);
GEM_BUG_ON(!check_signal_order(ce, rq));
-
if (!__request_completed(rq))
break;
- GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL,
- &rq->fence.flags));
- clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
-
- if (!__dma_fence_signal(&rq->fence))
- continue;
-
/*
* Queue for execution after dropping the signaling
* spinlock as the callback chain may end up adding
* more signalers to the same context or engine.
*/
- i915_request_get(rq);
- list_add_tail(&rq->signal_link, &signal);
+ __signal_request(rq, &signal);
}
/*
@@ -255,6 +260,7 @@ void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
spin_lock_init(&b->irq_lock);
INIT_LIST_HEAD(&b->signalers);
+ INIT_LIST_HEAD(&b->signaled_requests);
init_irq_work(&b->irq_work, signal_irq_work);
}
@@ -274,6 +280,32 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
spin_unlock_irqrestore(&b->irq_lock, flags);
}
+void intel_engine_transfer_stale_breadcrumbs(struct intel_engine_cs *engine,
+ struct intel_context *ce)
+{
+ struct intel_breadcrumbs *b = &engine->breadcrumbs;
+ unsigned long flags;
+
+ spin_lock_irqsave(&b->irq_lock, flags);
+ if (!list_empty(&ce->signals)) {
+ struct i915_request *rq, *next;
+
+ /* Queue for executing the signal callbacks in the irq_work */
+ list_for_each_entry_safe(rq, next, &ce->signals, signal_link) {
+ GEM_BUG_ON(rq->engine != engine);
+ GEM_BUG_ON(!__request_completed(rq));
+
+ __signal_request(rq, &b->signaled_requests);
+ }
+
+ INIT_LIST_HEAD(&ce->signals);
+ list_del_init(&ce->signal_link);
+
+ irq_work_queue(&b->irq_work);
+ }
+ spin_unlock_irqrestore(&b->irq_lock, flags);
+}
+
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
{
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index d10e52ff059f..9bf6d4989968 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -238,22 +238,35 @@ intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
+void intel_engine_transfer_stale_breadcrumbs(struct intel_engine_cs *engine,
+ struct intel_context *ce);
+
void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
struct drm_printer *p);
-static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
+static inline u32 *__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
{
memset(batch, 0, 6 * sizeof(u32));
- batch[0] = GFX_OP_PIPE_CONTROL(6);
- batch[1] = flags;
+ batch[0] = GFX_OP_PIPE_CONTROL(6) | flags0;
+ batch[1] = flags1;
batch[2] = offset;
return batch + 6;
}
+static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
+{
+ return __gen8_emit_pipe_control(batch, 0, flags, offset);
+}
+
+static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
+{
+ return __gen8_emit_pipe_control(batch, flags0, flags1, offset);
+}
+
static inline u32 *
-gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
+__gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1)
{
/* We're using qword write, offset should be aligned to 8 bytes. */
GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
@@ -262,8 +275,8 @@ gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
* need a prior CS_STALL, which is emitted by the flush
* following the batch.
*/
- *cs++ = GFX_OP_PIPE_CONTROL(6);
- *cs++ = flags | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
+ *cs++ = GFX_OP_PIPE_CONTROL(6) | flags0;
+ *cs++ = flags1 | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
*cs++ = gtt_offset;
*cs++ = 0;
*cs++ = value;
@@ -273,6 +286,18 @@ gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
return cs;
}
+static inline u32*
+gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
+{
+ return __gen8_emit_ggtt_write_rcs(cs, value, gtt_offset, 0, flags);
+}
+
+static inline u32*
+gen12_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1)
+{
+ return __gen8_emit_ggtt_write_rcs(cs, value, gtt_offset, flags0, flags1);
+}
+
static inline u32 *
gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
{
@@ -332,13 +357,4 @@ intel_engine_has_preempt_reset(const struct intel_engine_cs *engine)
return intel_engine_has_preemption(engine);
}
-static inline bool
-intel_engine_has_timeslices(const struct intel_engine_cs *engine)
-{
- if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
- return false;
-
- return intel_engine_has_semaphores(engine);
-}
-
#endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index c9e46c5ced43..da5b61085257 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -31,7 +31,6 @@
#include "intel_context.h"
#include "intel_engine.h"
#include "intel_engine_pm.h"
-#include "intel_engine_pool.h"
#include "intel_engine_user.h"
#include "intel_gt.h"
#include "intel_gt_requests.h"
@@ -327,6 +326,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
if (INTEL_GEN(i915) == 12 && engine->class == RENDER_CLASS)
engine->props.preempt_timeout_ms = 0;
+ engine->defaults = engine->props; /* never to change again */
+
engine->context_size = intel_engine_context_size(gt, engine->class);
if (WARN_ON(engine->context_size > BIT(20)))
engine->context_size = 0;
@@ -631,8 +632,6 @@ static int engine_setup_common(struct intel_engine_cs *engine)
intel_engine_init__pm(engine);
intel_engine_init_retire(engine);
- intel_engine_pool_init(&engine->pool);
-
/* Use the whole device by default */
engine->sseu =
intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu);
@@ -829,7 +828,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
cleanup_status_page(engine);
intel_engine_fini_retire(engine);
- intel_engine_pool_fini(&engine->pool);
intel_engine_fini_breadcrumbs(engine);
intel_engine_cleanup_cmd_parser(engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 446e35ac0224..d0a1078ef632 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -10,7 +10,6 @@
#include "intel_engine.h"
#include "intel_engine_heartbeat.h"
#include "intel_engine_pm.h"
-#include "intel_engine_pool.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
#include "intel_rc6.h"
@@ -22,18 +21,11 @@ static int __engine_unpark(struct intel_wakeref *wf)
struct intel_engine_cs *engine =
container_of(wf, typeof(*engine), wakeref);
struct intel_context *ce;
- void *map;
ENGINE_TRACE(engine, "\n");
intel_gt_pm_get(engine->gt);
- /* Pin the default state for fast resets from atomic context. */
- map = NULL;
- if (engine->default_state)
- map = shmem_pin_map(engine->default_state);
- engine->pinned_default_state = map;
-
/* Discard stale context state from across idling */
ce = engine->kernel_context;
if (ce) {
@@ -43,6 +35,7 @@ static int __engine_unpark(struct intel_wakeref *wf)
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && ce->state) {
struct drm_i915_gem_object *obj = ce->state->obj;
int type = i915_coherent_map_type(engine->i915);
+ void *map;
map = i915_gem_object_pin_map(obj, type);
if (!IS_ERR(map)) {
@@ -254,7 +247,6 @@ static int __engine_park(struct intel_wakeref *wf)
intel_engine_park_heartbeat(engine);
intel_engine_disarm_breadcrumbs(engine);
- intel_engine_pool_park(&engine->pool);
/* Must be reset upon idling, or we may miss the busy wakeup. */
GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
@@ -262,12 +254,6 @@ static int __engine_park(struct intel_wakeref *wf)
if (engine->park)
engine->park(engine);
- if (engine->pinned_default_state) {
- shmem_unpin_map(engine->default_state,
- engine->pinned_default_state);
- engine->pinned_default_state = NULL;
- }
-
engine->execlists.no_priolist = false;
/* While gt calls i915_vma_parked(), we have to break the lock cycle */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.h b/drivers/gpu/drm/i915/gt/intel_engine_pool.h
deleted file mode 100644
index 1bd89cadc3b7..000000000000
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright © 2014-2018 Intel Corporation
- */
-
-#ifndef INTEL_ENGINE_POOL_H
-#define INTEL_ENGINE_POOL_H
-
-#include "intel_engine_pool_types.h"
-#include "i915_active.h"
-#include "i915_request.h"
-
-struct intel_engine_pool_node *
-intel_engine_get_pool(struct intel_engine_cs *engine, size_t size);
-
-static inline int
-intel_engine_pool_mark_active(struct intel_engine_pool_node *node,
- struct i915_request *rq)
-{
- return i915_active_add_request(&node->active, rq);
-}
-
-static inline void
-intel_engine_pool_put(struct intel_engine_pool_node *node)
-{
- i915_active_release(&node->active);
-}
-
-void intel_engine_pool_init(struct intel_engine_pool *pool);
-void intel_engine_pool_park(struct intel_engine_pool *pool);
-void intel_engine_pool_fini(struct intel_engine_pool *pool);
-
-#endif /* INTEL_ENGINE_POOL_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index f760e2ef285b..2b6cdf47d428 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -22,7 +22,6 @@
#include "i915_pmu.h"
#include "i915_priolist_types.h"
#include "i915_selftest.h"
-#include "intel_engine_pool_types.h"
#include "intel_sseu.h"
#include "intel_timeline_types.h"
#include "intel_wakeref.h"
@@ -181,6 +180,11 @@ struct intel_engine_execlists {
u32 error_interrupt;
/**
+ * @reset_ccid: Active CCID [EXECLISTS_STATUS_HI] at the time of reset
+ */
+ u32 reset_ccid;
+
+ /**
* @no_priolist: priority lists disabled
*/
bool no_priolist;
@@ -340,7 +344,6 @@ struct intel_engine_cs {
unsigned long wakeref_serial;
struct intel_wakeref wakeref;
struct file *default_state;
- void *pinned_default_state;
struct {
struct intel_ring *ring;
@@ -374,6 +377,8 @@ struct intel_engine_cs {
spinlock_t irq_lock;
struct list_head signalers;
+ struct list_head signaled_requests;
+
struct irq_work irq_work; /* for use from inside irq_lock */
unsigned int irq_enabled;
@@ -405,13 +410,6 @@ struct intel_engine_cs {
struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT];
} pmu;
- /*
- * A pool of objects to use as shadow copies of client batch buffers
- * when the command parser is enabled. Prevents the client from
- * modifying the batch contents after software parsing.
- */
- struct intel_engine_pool pool;
-
struct intel_hw_status_page status_page;
struct i915_ctx_workarounds wa_ctx;
struct i915_wa_list ctx_wa_list;
@@ -500,10 +498,11 @@ struct intel_engine_cs {
#define I915_ENGINE_SUPPORTS_STATS BIT(1)
#define I915_ENGINE_HAS_PREEMPTION BIT(2)
#define I915_ENGINE_HAS_SEMAPHORES BIT(3)
-#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
-#define I915_ENGINE_IS_VIRTUAL BIT(5)
-#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
-#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
+#define I915_ENGINE_HAS_TIMESLICES BIT(4)
+#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(5)
+#define I915_ENGINE_IS_VIRTUAL BIT(6)
+#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7)
+#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8)
unsigned int flags;
/*
@@ -568,7 +567,7 @@ struct intel_engine_cs {
unsigned long preempt_timeout_ms;
unsigned long stop_timeout_ms;
unsigned long timeslice_duration_ms;
- } props;
+ } props, defaults;
};
static inline bool
@@ -602,6 +601,15 @@ intel_engine_has_semaphores(const struct intel_engine_cs *engine)
}
static inline bool
+intel_engine_has_timeslices(const struct intel_engine_cs *engine)
+{
+ if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ return false;
+
+ return engine->flags & I915_ENGINE_HAS_TIMESLICES;
+}
+
+static inline bool
intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
{
return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index ee10122a511e..534e435f20bc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -236,9 +236,8 @@
#define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */
#define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */
#define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) /* GM45+ only */
-#define PIPE_CONTROL_L3_RO_CACHE_INVALIDATE REG_BIT(10) /* gen12 */
#define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9)
-#define PIPE_CONTROL_HDC_PIPELINE_FLUSH REG_BIT(9) /* gen12 */
+#define PIPE_CONTROL0_HDC_PIPELINE_FLUSH REG_BIT(9) /* gen12 */
#define PIPE_CONTROL_NOTIFY (1<<8)
#define PIPE_CONTROL_FLUSH_ENABLE (1<<7) /* gen7+ */
#define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 52593edf8aa0..f069551e412f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -7,6 +7,7 @@
#include "i915_drv.h"
#include "intel_context.h"
#include "intel_gt.h"
+#include "intel_gt_buffer_pool.h"
#include "intel_gt_clock_utils.h"
#include "intel_gt_pm.h"
#include "intel_gt_requests.h"
@@ -28,6 +29,7 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
INIT_LIST_HEAD(&gt->closed_vma);
spin_lock_init(&gt->closed_lock);
+ intel_gt_init_buffer_pool(gt);
intel_gt_init_reset(gt);
intel_gt_init_requests(gt);
intel_gt_init_timelines(gt);
@@ -621,6 +623,7 @@ void intel_gt_driver_release(struct intel_gt *gt)
intel_gt_pm_fini(gt);
intel_gt_fini_scratch(gt);
+ intel_gt_fini_buffer_pool(gt);
}
void intel_gt_driver_late_release(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
index 397186818305..1495054a4305 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: MIT
/*
- * SPDX-License-Identifier: MIT
- *
* Copyright © 2014-2018 Intel Corporation
*/
@@ -8,15 +7,15 @@
#include "i915_drv.h"
#include "intel_engine_pm.h"
-#include "intel_engine_pool.h"
+#include "intel_gt_buffer_pool.h"
-static struct intel_engine_cs *to_engine(struct intel_engine_pool *pool)
+static struct intel_gt *to_gt(struct intel_gt_buffer_pool *pool)
{
- return container_of(pool, struct intel_engine_cs, pool);
+ return container_of(pool, struct intel_gt, buffer_pool);
}
static struct list_head *
-bucket_for_size(struct intel_engine_pool *pool, size_t sz)
+bucket_for_size(struct intel_gt_buffer_pool *pool, size_t sz)
{
int n;
@@ -32,16 +31,50 @@ bucket_for_size(struct intel_engine_pool *pool, size_t sz)
return &pool->cache_list[n];
}
-static void node_free(struct intel_engine_pool_node *node)
+static void node_free(struct intel_gt_buffer_pool_node *node)
{
i915_gem_object_put(node->obj);
i915_active_fini(&node->active);
kfree(node);
}
+static void pool_free_work(struct work_struct *wrk)
+{
+ struct intel_gt_buffer_pool *pool =
+ container_of(wrk, typeof(*pool), work.work);
+ struct intel_gt_buffer_pool_node *node, *next;
+ unsigned long old = jiffies - HZ;
+ bool active = false;
+ LIST_HEAD(stale);
+ int n;
+
+ /* Free buffers that have not been used in the past second */
+ spin_lock_irq(&pool->lock);
+ for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
+ struct list_head *list = &pool->cache_list[n];
+
+ /* Most recent at head; oldest at tail */
+ list_for_each_entry_safe_reverse(node, next, list, link) {
+ if (time_before(node->age, old))
+ break;
+
+ list_move(&node->link, &stale);
+ }
+ active |= !list_empty(list);
+ }
+ spin_unlock_irq(&pool->lock);
+
+ list_for_each_entry_safe(node, next, &stale, link)
+ node_free(node);
+
+ if (active)
+ schedule_delayed_work(&pool->work,
+ round_jiffies_up_relative(HZ));
+}
+
static int pool_active(struct i915_active *ref)
{
- struct intel_engine_pool_node *node =
+ struct intel_gt_buffer_pool_node *node =
container_of(ref, typeof(*node), active);
struct dma_resv *resv = node->obj->base.resv;
int err;
@@ -64,29 +97,31 @@ static int pool_active(struct i915_active *ref)
__i915_active_call
static void pool_retire(struct i915_active *ref)
{
- struct intel_engine_pool_node *node =
+ struct intel_gt_buffer_pool_node *node =
container_of(ref, typeof(*node), active);
- struct intel_engine_pool *pool = node->pool;
+ struct intel_gt_buffer_pool *pool = node->pool;
struct list_head *list = bucket_for_size(pool, node->obj->base.size);
unsigned long flags;
- GEM_BUG_ON(!intel_engine_pm_is_awake(to_engine(pool)));
-
i915_gem_object_unpin_pages(node->obj);
/* Return this object to the shrinker pool */
i915_gem_object_make_purgeable(node->obj);
spin_lock_irqsave(&pool->lock, flags);
+ node->age = jiffies;
list_add(&node->link, list);
spin_unlock_irqrestore(&pool->lock, flags);
+
+ schedule_delayed_work(&pool->work,
+ round_jiffies_up_relative(HZ));
}
-static struct intel_engine_pool_node *
-node_create(struct intel_engine_pool *pool, size_t sz)
+static struct intel_gt_buffer_pool_node *
+node_create(struct intel_gt_buffer_pool *pool, size_t sz)
{
- struct intel_engine_cs *engine = to_engine(pool);
- struct intel_engine_pool_node *node;
+ struct intel_gt *gt = to_gt(pool);
+ struct intel_gt_buffer_pool_node *node;
struct drm_i915_gem_object *obj;
node = kmalloc(sizeof(*node),
@@ -97,7 +132,7 @@ node_create(struct intel_engine_pool *pool, size_t sz)
node->pool = pool;
i915_active_init(&node->active, pool_active, pool_retire);
- obj = i915_gem_object_create_internal(engine->i915, sz);
+ obj = i915_gem_object_create_internal(gt->i915, sz);
if (IS_ERR(obj)) {
i915_active_fini(&node->active);
kfree(node);
@@ -110,26 +145,15 @@ node_create(struct intel_engine_pool *pool, size_t sz)
return node;
}
-static struct intel_engine_pool *lookup_pool(struct intel_engine_cs *engine)
+struct intel_gt_buffer_pool_node *
+intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size)
{
- if (intel_engine_is_virtual(engine))
- engine = intel_virtual_engine_get_sibling(engine, 0);
-
- GEM_BUG_ON(!engine);
- return &engine->pool;
-}
-
-struct intel_engine_pool_node *
-intel_engine_get_pool(struct intel_engine_cs *engine, size_t size)
-{
- struct intel_engine_pool *pool = lookup_pool(engine);
- struct intel_engine_pool_node *node;
+ struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
+ struct intel_gt_buffer_pool_node *node;
struct list_head *list;
unsigned long flags;
int ret;
- GEM_BUG_ON(!intel_engine_pm_is_awake(to_engine(pool)));
-
size = PAGE_ALIGN(size);
list = bucket_for_size(pool, size);
@@ -157,34 +181,48 @@ intel_engine_get_pool(struct intel_engine_cs *engine, size_t size)
return node;
}
-void intel_engine_pool_init(struct intel_engine_pool *pool)
+void intel_gt_init_buffer_pool(struct intel_gt *gt)
{
+ struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
int n;
spin_lock_init(&pool->lock);
for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
INIT_LIST_HEAD(&pool->cache_list[n]);
+ INIT_DELAYED_WORK(&pool->work, pool_free_work);
}
-void intel_engine_pool_park(struct intel_engine_pool *pool)
+static void pool_free_imm(struct intel_gt_buffer_pool *pool)
{
int n;
+ spin_lock_irq(&pool->lock);
for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
+ struct intel_gt_buffer_pool_node *node, *next;
struct list_head *list = &pool->cache_list[n];
- struct intel_engine_pool_node *node, *nn;
- list_for_each_entry_safe(node, nn, list, link)
+ list_for_each_entry_safe(node, next, list, link)
node_free(node);
-
INIT_LIST_HEAD(list);
}
+ spin_unlock_irq(&pool->lock);
+}
+
+void intel_gt_flush_buffer_pool(struct intel_gt *gt)
+{
+ struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
+
+ if (cancel_delayed_work_sync(&pool->work))
+ pool_free_imm(pool);
}
-void intel_engine_pool_fini(struct intel_engine_pool *pool)
+void intel_gt_fini_buffer_pool(struct intel_gt *gt)
{
+ struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
int n;
+ intel_gt_flush_buffer_pool(gt);
+
for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
GEM_BUG_ON(!list_empty(&pool->cache_list[n]));
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h
new file mode 100644
index 000000000000..42cbac003e8a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2018 Intel Corporation
+ */
+
+#ifndef INTEL_GT_BUFFER_POOL_H
+#define INTEL_GT_BUFFER_POOL_H
+
+#include <linux/types.h>
+
+#include "i915_active.h"
+#include "intel_gt_buffer_pool_types.h"
+
+struct intel_gt;
+struct i915_request;
+
+struct intel_gt_buffer_pool_node *
+intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size);
+
+static inline int
+intel_gt_buffer_pool_mark_active(struct intel_gt_buffer_pool_node *node,
+ struct i915_request *rq)
+{
+ return i915_active_add_request(&node->active, rq);
+}
+
+static inline void
+intel_gt_buffer_pool_put(struct intel_gt_buffer_pool_node *node)
+{
+ i915_active_release(&node->active);
+}
+
+void intel_gt_init_buffer_pool(struct intel_gt *gt);
+void intel_gt_flush_buffer_pool(struct intel_gt *gt);
+void intel_gt_fini_buffer_pool(struct intel_gt *gt);
+
+#endif /* INTEL_GT_BUFFER_POOL_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
index e31ee361b76f..e28bdda771ed 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
@@ -4,26 +4,29 @@
* Copyright © 2014-2018 Intel Corporation
*/
-#ifndef INTEL_ENGINE_POOL_TYPES_H
-#define INTEL_ENGINE_POOL_TYPES_H
+#ifndef INTEL_GT_BUFFER_POOL_TYPES_H
+#define INTEL_GT_BUFFER_POOL_TYPES_H
#include <linux/list.h>
#include <linux/spinlock.h>
+#include <linux/workqueue.h>
#include "i915_active_types.h"
struct drm_i915_gem_object;
-struct intel_engine_pool {
+struct intel_gt_buffer_pool {
spinlock_t lock;
struct list_head cache_list[4];
+ struct delayed_work work;
};
-struct intel_engine_pool_node {
+struct intel_gt_buffer_pool_node {
struct i915_active active;
struct drm_i915_gem_object *obj;
struct list_head link;
- struct intel_engine_pool *pool;
+ struct intel_gt_buffer_pool *pool;
+ unsigned long age;
};
-#endif /* INTEL_ENGINE_POOL_TYPES_H */
+#endif /* INTEL_GT_BUFFER_POOL_TYPES_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index 5097786f4375..6bdb434a442d 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -150,10 +150,6 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
if (intel_gt_is_wedged(gt))
intel_gt_unset_wedged(gt);
- for_each_engine(engine, gt, id)
- if (engine->sanitize)
- engine->sanitize(engine);
-
intel_uc_sanitize(&gt->uc);
for_each_engine(engine, gt, id)
@@ -162,6 +158,10 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
intel_uc_reset_prepare(&gt->uc);
+ for_each_engine(engine, gt, id)
+ if (engine->sanitize)
+ engine->sanitize(engine);
+
if (reset_engines(gt) || force) {
for_each_engine(engine, gt, id)
__intel_engine_reset(engine, false);
@@ -171,6 +171,8 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
if (engine->reset.finish)
engine->reset.finish(engine);
+ intel_rps_sanitize(&gt->rps);
+
intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
intel_runtime_pm_put(gt->uncore->rpm, wakeref);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index d02ccb735e24..0cc1d6b185dc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -17,6 +17,7 @@
#include "i915_vma.h"
#include "intel_engine_types.h"
+#include "intel_gt_buffer_pool_types.h"
#include "intel_llc_types.h"
#include "intel_reset_types.h"
#include "intel_rc6_types.h"
@@ -97,6 +98,16 @@ struct intel_gt {
*/
struct i915_address_space *vm;
+ /*
+ * A pool of objects to use as shadow copies of client batch buffers
+ * when the command parser is enabled. Prevents the client from
+ * modifying the batch contents after software parsing.
+ *
+ * Buffers older than 1s are periodically reaped from the pool,
+ * or may be reclaimed by the shrinker before then.
+ */
+ struct intel_gt_buffer_pool buffer_pool;
+
struct i915_vma *scratch;
};
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 4311b12542fb..87e6c5bdd2dc 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -217,7 +217,7 @@ struct virtual_engine {
/* And finally, which physical engines this virtual engine maps onto. */
unsigned int num_siblings;
- struct intel_engine_cs *siblings[0];
+ struct intel_engine_cs *siblings[];
};
static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
@@ -429,18 +429,7 @@ static int effective_prio(const struct i915_request *rq)
if (i915_request_has_nopreempt(rq))
prio = I915_PRIORITY_UNPREEMPTABLE;
- /*
- * On unwinding the active request, we give it a priority bump
- * if it has completed waiting on any semaphore. If we know that
- * the request has already started, we can prevent an unwanted
- * preempt-to-idle cycle by taking that into account now.
- */
- if (__i915_request_has_started(rq))
- prio |= I915_PRIORITY_NOSEMAPHORE;
-
- /* Restrict mere WAIT boosts from triggering preemption */
- BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
- return prio | __NO_PREEMPTION;
+ return prio;
}
static int queue_prio(const struct intel_engine_execlists *execlists)
@@ -1271,14 +1260,11 @@ execlists_check_context(const struct intel_context *ce,
static void restore_default_state(struct intel_context *ce,
struct intel_engine_cs *engine)
{
- u32 *regs = ce->lrc_reg_state;
+ u32 *regs;
- if (engine->pinned_default_state)
- memcpy(regs, /* skip restoring the vanilla PPHWSP */
- engine->pinned_default_state + LRC_STATE_OFFSET,
- engine->context_size - PAGE_SIZE);
+ regs = memset(ce->lrc_reg_state, 0, engine->context_size - PAGE_SIZE);
+ execlists_init_reg_state(regs, ce, engine, ce->ring, true);
- execlists_init_reg_state(regs, ce, engine, ce->ring, false);
ce->runtime.last = intel_context_get_runtime(ce);
}
@@ -1372,7 +1358,7 @@ __execlists_schedule_in(struct i915_request *rq)
ce->lrc.ccid = ce->tag;
} else {
/* We don't need a strict matching tag, just different values */
- unsigned int tag = ffs(engine->context_tag);
+ unsigned int tag = ffs(READ_ONCE(engine->context_tag));
GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG);
clear_bit(tag - 1, &engine->context_tag);
@@ -1826,30 +1812,16 @@ static bool virtual_matches(const struct virtual_engine *ve,
return true;
}
-static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
- struct i915_request *rq)
+static void virtual_xfer_breadcrumbs(struct virtual_engine *ve)
{
- struct intel_engine_cs *old = ve->siblings[0];
-
- /* All unattached (rq->engine == old) must already be completed */
-
- spin_lock(&old->breadcrumbs.irq_lock);
- if (!list_empty(&ve->context.signal_link)) {
- list_del_init(&ve->context.signal_link);
-
- /*
- * We cannot acquire the new engine->breadcrumbs.irq_lock
- * (as we are holding a breadcrumbs.irq_lock already),
- * so attach this request to the signaler on submission.
- * The queued irq_work will occur when we finally drop
- * the engine->active.lock after dequeue.
- */
- set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags);
-
- /* Also transfer the pending irq_work for the old breadcrumb. */
- intel_engine_signal_breadcrumbs(rq->engine);
- }
- spin_unlock(&old->breadcrumbs.irq_lock);
+ /*
+ * All the outstanding signals on ve->siblings[0] must have
+ * been completed, just pending the interrupt handler. As those
+ * signals still refer to the old sibling (via rq->engine), we must
+ * transfer those to the old irq_worker to keep our locking
+ * consistent.
+ */
+ intel_engine_transfer_stale_breadcrumbs(ve->siblings[0], &ve->context);
}
#define for_each_waiter(p__, rq__) \
@@ -1883,12 +1855,16 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl)
struct i915_request *w =
container_of(p->waiter, typeof(*w), sched);
+ if (p->flags & I915_DEPENDENCY_WEAK)
+ continue;
+
/* Leave semaphores spinning on the other engines */
if (w->engine != rq->engine)
continue;
/* No waiter should start before its signaler */
- GEM_BUG_ON(i915_request_started(w) &&
+ GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) &&
+ i915_request_started(w) &&
!i915_request_completed(rq));
GEM_BUG_ON(i915_request_is_active(w));
@@ -2280,7 +2256,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
engine);
if (!list_empty(&ve->context.signals))
- virtual_xfer_breadcrumbs(ve, rq);
+ virtual_xfer_breadcrumbs(ve);
/*
* Move the bound engine to the top of the list
@@ -3494,6 +3470,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
{
u32 *cs;
+ GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq));
if (!i915_request_timeline(rq)->has_initial_breadcrumb)
return 0;
@@ -3520,6 +3497,56 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
/* Record the updated position of the request's payload */
rq->infix = intel_ring_offset(rq, cs);
+ __set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags);
+
+ return 0;
+}
+
+static int emit_pdps(struct i915_request *rq)
+{
+ const struct intel_engine_cs * const engine = rq->engine;
+ struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm);
+ int err, i;
+ u32 *cs;
+
+ GEM_BUG_ON(intel_vgpu_active(rq->i915));
+
+ /*
+ * Beware ye of the dragons, this sequence is magic!
+ *
+ * Small changes to this sequence can cause anything from
+ * GPU hangs to forcewake errors and machine lockups!
+ */
+
+ /* Flush any residual operations from the context load */
+ err = engine->emit_flush(rq, EMIT_FLUSH);
+ if (err)
+ return err;
+
+ /* Magic required to prevent forcewake errors! */
+ err = engine->emit_flush(rq, EMIT_INVALIDATE);
+ if (err)
+ return err;
+
+ cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /* Ensure the LRI have landed before we invalidate & continue */
+ *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
+ for (i = GEN8_3LVL_PDPES; i--; ) {
+ const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
+ u32 base = engine->mmio_base;
+
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
+ *cs++ = upper_32_bits(pd_daddr);
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
+ *cs++ = lower_32_bits(pd_daddr);
+ }
+ *cs++ = MI_NOOP;
+
+ intel_ring_advance(rq, cs);
+
return 0;
}
@@ -3544,6 +3571,12 @@ static int execlists_request_alloc(struct i915_request *request)
* to cancel/unwind this request now.
*/
+ if (!i915_vm_is_4lvl(request->context->vm)) {
+ ret = emit_pdps(request);
+ if (ret)
+ return ret;
+ }
+
/* Unconditionally invalidate GPU caches and TLBs. */
ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
if (ret)
@@ -3886,6 +3919,14 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
ring_set_paused(engine, 0);
/*
+ * Sometimes Icelake forgets to reset its pointers on a GPU reset.
+ * Bludgeon them with a mmio update to be sure.
+ */
+ ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
+ 0xffff << 16 | reset_value << 8 | reset_value);
+ ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
+
+ /*
* After a reset, the HW starts writing into CSB entry [0]. We
* therefore have to set our HEAD pointer back one entry so that
* the *first* entry we check is entry 0. To complicate this further,
@@ -3898,16 +3939,15 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
WRITE_ONCE(*execlists->csb_write, reset_value);
wmb(); /* Make sure this is visible to HW (paranoia?) */
- /*
- * Sometimes Icelake forgets to reset its pointers on a GPU reset.
- * Bludgeon them with a mmio update to be sure.
- */
+ invalidate_csb_entries(&execlists->csb_status[0],
+ &execlists->csb_status[reset_value]);
+
+ /* Once more for luck and our trusty paranoia */
ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
- reset_value << 8 | reset_value);
+ 0xffff << 16 | reset_value << 8 | reset_value);
ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
- invalidate_csb_entries(&execlists->csb_status[0],
- &execlists->csb_status[reset_value]);
+ GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value);
}
static void execlists_sanitize(struct intel_engine_cs *engine)
@@ -4074,6 +4114,8 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
*/
ring_set_paused(engine, 1);
intel_engine_stop_cs(engine);
+
+ engine->execlists.reset_ccid = active_ccid(engine);
}
static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
@@ -4116,7 +4158,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
* its request, it was still running at the time of the
* reset and will have been clobbered.
*/
- rq = execlists_active(execlists);
+ rq = active_context(engine, engine->execlists.reset_ccid);
if (!rq)
goto unwind;
@@ -4166,8 +4208,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
* image back to the expected values to skip over the guilty request.
*/
__i915_request_reset(rq, stalled);
- if (!stalled)
- goto out_replay;
/*
* We want a simple context + ring to execute the breadcrumb update.
@@ -4177,9 +4217,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
* future request will be after userspace has had the opportunity
* to recreate its own state.
*/
- GEM_BUG_ON(!intel_context_is_pinned(ce));
- restore_default_state(ce, engine);
-
out_replay:
ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
head, ce->ring->tail);
@@ -4545,6 +4582,42 @@ static u32 preparser_disable(bool state)
return MI_ARB_CHECK | 1 << 8 | state;
}
+static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
+{
+ static const i915_reg_t vd[] = {
+ GEN12_VD0_AUX_NV,
+ GEN12_VD1_AUX_NV,
+ GEN12_VD2_AUX_NV,
+ GEN12_VD3_AUX_NV,
+ };
+
+ static const i915_reg_t ve[] = {
+ GEN12_VE0_AUX_NV,
+ GEN12_VE1_AUX_NV,
+ };
+
+ if (engine->class == VIDEO_DECODE_CLASS)
+ return vd[engine->instance];
+
+ if (engine->class == VIDEO_ENHANCEMENT_CLASS)
+ return ve[engine->instance];
+
+ GEM_BUG_ON("unknown aux_inv_reg\n");
+
+ return INVALID_MMIO_REG;
+}
+
+static u32 *
+gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
+{
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(inv_reg);
+ *cs++ = AUX_INV;
+ *cs++ = MI_NOOP;
+
+ return cs;
+}
+
static int gen12_emit_flush_render(struct i915_request *request,
u32 mode)
{
@@ -4553,13 +4626,13 @@ static int gen12_emit_flush_render(struct i915_request *request,
u32 *cs;
flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_FLUSH_L3;
flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
/* Wa_1409600907:tgl */
flags |= PIPE_CONTROL_DEPTH_STALL;
flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
flags |= PIPE_CONTROL_FLUSH_ENABLE;
- flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
flags |= PIPE_CONTROL_STORE_DATA_INDEX;
flags |= PIPE_CONTROL_QW_WRITE;
@@ -4570,7 +4643,9 @@ static int gen12_emit_flush_render(struct i915_request *request,
if (IS_ERR(cs))
return PTR_ERR(cs);
- cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+ cs = gen12_emit_pipe_control(cs,
+ PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
+ flags, LRC_PPHWSP_SCRATCH_ADDR);
intel_ring_advance(request, cs);
}
@@ -4585,14 +4660,13 @@ static int gen12_emit_flush_render(struct i915_request *request,
flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STORE_DATA_INDEX;
flags |= PIPE_CONTROL_QW_WRITE;
flags |= PIPE_CONTROL_CS_STALL;
- cs = intel_ring_begin(request, 8);
+ cs = intel_ring_begin(request, 8 + 4);
if (IS_ERR(cs))
return PTR_ERR(cs);
@@ -4605,6 +4679,9 @@ static int gen12_emit_flush_render(struct i915_request *request,
cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+ /* hsdes: 1809175790 */
+ cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs);
+
*cs++ = preparser_disable(false);
intel_ring_advance(request, cs);
}
@@ -4612,6 +4689,56 @@ static int gen12_emit_flush_render(struct i915_request *request,
return 0;
}
+static int gen12_emit_flush(struct i915_request *request, u32 mode)
+{
+ intel_engine_mask_t aux_inv = 0;
+ u32 cmd, *cs;
+
+ if (mode & EMIT_INVALIDATE)
+ aux_inv = request->engine->mask & ~BIT(BCS0);
+
+ cs = intel_ring_begin(request,
+ 4 + (aux_inv ? 2 * hweight8(aux_inv) + 2 : 0));
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ cmd = MI_FLUSH_DW + 1;
+
+ /* We always require a command barrier so that subsequent
+ * commands, such as breadcrumb interrupts, are strictly ordered
+ * wrt the contents of the write cache being flushed to memory
+ * (and thus being coherent from the CPU).
+ */
+ cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
+
+ if (mode & EMIT_INVALIDATE) {
+ cmd |= MI_INVALIDATE_TLB;
+ if (request->engine->class == VIDEO_DECODE_CLASS)
+ cmd |= MI_INVALIDATE_BSD;
+ }
+
+ *cs++ = cmd;
+ *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
+ *cs++ = 0; /* upper addr */
+ *cs++ = 0; /* value */
+
+ if (aux_inv) { /* hsdes: 1809175790 */
+ struct intel_engine_cs *engine;
+ unsigned int tmp;
+
+ *cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv));
+ for_each_engine_masked(engine, request->engine->gt,
+ aux_inv, tmp) {
+ *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
+ *cs++ = AUX_INV;
+ }
+ *cs++ = MI_NOOP;
+ }
+ intel_ring_advance(request, cs);
+
+ return 0;
+}
+
/*
* Reserve space for 2 NOOPs at the end of each request to be
* used as a workaround for not being allowed to do lite
@@ -4641,8 +4768,7 @@ static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
}
static __always_inline u32*
-gen8_emit_fini_breadcrumb_footer(struct i915_request *request,
- u32 *cs)
+gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
{
*cs++ = MI_USER_INTERRUPT;
@@ -4656,14 +4782,16 @@ gen8_emit_fini_breadcrumb_footer(struct i915_request *request,
return gen8_emit_wa_tail(request, cs);
}
-static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
+static u32 *emit_xcs_breadcrumb(struct i915_request *request, u32 *cs)
{
- cs = gen8_emit_ggtt_write(cs,
- request->fence.seqno,
- i915_request_active_timeline(request)->hwsp_offset,
- 0);
+ u32 addr = i915_request_active_timeline(request)->hwsp_offset;
+
+ return gen8_emit_ggtt_write(cs, request->fence.seqno, addr, 0);
+}
- return gen8_emit_fini_breadcrumb_footer(request, cs);
+static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+ return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
}
static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
@@ -4681,7 +4809,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
PIPE_CONTROL_FLUSH_ENABLE |
PIPE_CONTROL_CS_STALL);
- return gen8_emit_fini_breadcrumb_footer(request, cs);
+ return gen8_emit_fini_breadcrumb_tail(request, cs);
}
static u32 *
@@ -4697,7 +4825,7 @@ gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_FLUSH_ENABLE);
- return gen8_emit_fini_breadcrumb_footer(request, cs);
+ return gen8_emit_fini_breadcrumb_tail(request, cs);
}
/*
@@ -4735,7 +4863,7 @@ static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
}
static __always_inline u32*
-gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
+gen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
{
*cs++ = MI_USER_INTERRUPT;
@@ -4749,33 +4877,29 @@ gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
return gen8_emit_wa_tail(request, cs);
}
-static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
+static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
{
- cs = gen8_emit_ggtt_write(cs,
- request->fence.seqno,
- i915_request_active_timeline(request)->hwsp_offset,
- 0);
-
- return gen12_emit_fini_breadcrumb_footer(request, cs);
+ return gen12_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
}
static u32 *
gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{
- cs = gen8_emit_ggtt_write_rcs(cs,
- request->fence.seqno,
- i915_request_active_timeline(request)->hwsp_offset,
- PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_TILE_CACHE_FLUSH |
- PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- /* Wa_1409600907:tgl */
- PIPE_CONTROL_DEPTH_STALL |
- PIPE_CONTROL_DC_FLUSH_ENABLE |
- PIPE_CONTROL_FLUSH_ENABLE |
- PIPE_CONTROL_HDC_PIPELINE_FLUSH);
+ cs = gen12_emit_ggtt_write_rcs(cs,
+ request->fence.seqno,
+ i915_request_active_timeline(request)->hwsp_offset,
+ PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_TILE_CACHE_FLUSH |
+ PIPE_CONTROL_FLUSH_L3 |
+ PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ /* Wa_1409600907:tgl */
+ PIPE_CONTROL_DEPTH_STALL |
+ PIPE_CONTROL_DC_FLUSH_ENABLE |
+ PIPE_CONTROL_FLUSH_ENABLE);
- return gen12_emit_fini_breadcrumb_footer(request, cs);
+ return gen12_emit_fini_breadcrumb_tail(request, cs);
}
static void execlists_park(struct intel_engine_cs *engine)
@@ -4801,8 +4925,11 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
engine->flags |= I915_ENGINE_SUPPORTS_STATS;
if (!intel_vgpu_active(engine->i915)) {
engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
- if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
+ if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) {
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
+ if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ engine->flags |= I915_ENGINE_HAS_TIMESLICES;
+ }
}
if (INTEL_GEN(engine->i915) >= 12)
@@ -4845,9 +4972,10 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
engine->emit_flush = gen8_emit_flush;
engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
- if (INTEL_GEN(engine->i915) >= 12)
+ if (INTEL_GEN(engine->i915) >= 12) {
engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
-
+ engine->emit_flush = gen12_emit_flush;
+ }
engine->set_default_submission = intel_execlists_set_default_submission;
if (INTEL_GEN(engine->i915) < 11) {
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index 1c1923ec8be7..ab675d35030d 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -113,7 +113,6 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6)
struct intel_uncore *uncore = rc6_to_uncore(rc6);
struct intel_engine_cs *engine;
enum intel_engine_id id;
- u32 rc6_mode;
/* 2b: Program RC6 thresholds.*/
if (INTEL_GEN(rc6_to_i915(rc6)) >= 10) {
@@ -165,16 +164,11 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6)
/* 3a: Enable RC6 */
set(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
- /* WaRsUseTimeoutMode:cnl (pre-prod) */
- if (IS_CNL_REVID(rc6_to_i915(rc6), CNL_REVID_A0, CNL_REVID_C0))
- rc6_mode = GEN7_RC_CTL_TO_MODE;
- else
- rc6_mode = GEN6_RC_CTL_EI_MODE(1);
rc6->ctl_enable =
GEN6_RC_CTL_HW_ENABLE |
GEN6_RC_CTL_RC6_ENABLE |
- rc6_mode;
+ GEN6_RC_CTL_EI_MODE(1);
/*
* WaRsDisableCoarsePowerGating:skl,cnl
diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c
index 708cb7808865..f59e7875cc5e 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.c
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c
@@ -219,6 +219,14 @@ int intel_renderstate_emit(struct intel_renderstate *so,
if (!so->vma)
return 0;
+ i915_vma_lock(so->vma);
+ err = i915_request_await_object(rq, so->vma->obj, false);
+ if (err == 0)
+ err = i915_vma_move_to_active(so->vma, rq, 0);
+ i915_vma_unlock(so->vma);
+ if (err)
+ return err;
+
err = engine->emit_bb_start(rq,
so->batch_offset, so->batch_size,
I915_DISPATCH_SECURE);
@@ -233,13 +241,7 @@ int intel_renderstate_emit(struct intel_renderstate *so,
return err;
}
- i915_vma_lock(so->vma);
- err = i915_request_await_object(rq, so->vma->obj, false);
- if (err == 0)
- err = i915_vma_move_to_active(so->vma, rq, 0);
- i915_vma_unlock(so->vma);
-
- return err;
+ return 0;
}
void intel_renderstate_fini(struct intel_renderstate *so)
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index c682355ec79e..2f59fc6df3c2 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -1844,8 +1844,11 @@ void intel_rps_init(struct intel_rps *rps)
if (INTEL_GEN(i915) >= 8 && INTEL_GEN(i915) < 11)
rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
+}
- if (INTEL_GEN(i915) >= 6)
+void intel_rps_sanitize(struct intel_rps *rps)
+{
+ if (INTEL_GEN(rps_to_i915(rps)) >= 6)
rps_disable_interrupts(rps);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h
index af07fa5b7584..8d3c9d663662 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps.h
@@ -13,6 +13,7 @@ struct i915_request;
void intel_rps_init_early(struct intel_rps *rps);
void intel_rps_init(struct intel_rps *rps);
+void intel_rps_sanitize(struct intel_rps *rps);
void intel_rps_driver_register(struct intel_rps *rps);
void intel_rps_driver_unregister(struct intel_rps *rps);
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index e1fac1b38f27..4546284fede1 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -211,9 +211,9 @@ static void cacheline_free(struct intel_timeline_cacheline *cl)
i915_active_release(&cl->active);
}
-int intel_timeline_init(struct intel_timeline *timeline,
- struct intel_gt *gt,
- struct i915_vma *hwsp)
+static int intel_timeline_init(struct intel_timeline *timeline,
+ struct intel_gt *gt,
+ struct i915_vma *hwsp)
{
void *vaddr;
@@ -280,7 +280,7 @@ void intel_gt_init_timelines(struct intel_gt *gt)
INIT_LIST_HEAD(&timelines->hwsp_free_list);
}
-void intel_timeline_fini(struct intel_timeline *timeline)
+static void intel_timeline_fini(struct intel_timeline *timeline)
{
GEM_BUG_ON(atomic_read(&timeline->pin_count));
GEM_BUG_ON(!list_empty(&timeline->requests));
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h
index c8e59a333182..4298b9ac7327 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.h
@@ -31,11 +31,6 @@
#include "i915_syncmap.h"
#include "gt/intel_timeline_types.h"
-int intel_timeline_init(struct intel_timeline *tl,
- struct intel_gt *gt,
- struct i915_vma *hwsp);
-void intel_timeline_fini(struct intel_timeline *tl);
-
struct intel_timeline *
intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp);
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index adddc5c93b48..90a2b9e399b0 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -485,25 +485,14 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
- struct drm_i915_private *i915 = engine->i915;
-
/* WaForceContextSaveRestoreNonCoherent:cnl */
WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
- /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
- if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
- WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
-
/* WaDisableReplayBufferBankArbitrationOptimization:cnl */
WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
- /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
- if (IS_CNL_REVID(i915, 0, CNL_REVID_B0))
- WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
- GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
-
/* WaPushConstantDereferenceHoldDisable:cnl */
WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
@@ -872,12 +861,6 @@ cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
wa_init_mcr(i915, wal);
- /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
- if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
- wa_write_or(wal,
- GAMT_CHKN_BIT_REG,
- GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
-
/* WaInPlaceDecompressionHang:cnl */
wa_write_or(wal,
GEN9_GAMT_ECO_REG_RW_IA,
@@ -934,10 +917,13 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
GAMT_CHKN_BIT_REG,
GAMT_CHKN_DISABLE_L3_COH_PIPE);
- /* Wa_1607087056:icl */
- wa_write_or(wal,
- SLICE_UNIT_LEVEL_CLKGATE,
- L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
+ /* Wa_1607087056:icl,ehl,jsl */
+ if (IS_ICELAKE(i915) ||
+ IS_EHL_REVID(i915, EHL_REVID_A0, EHL_REVID_A0)) {
+ wa_write_or(wal,
+ SLICE_UNIT_LEVEL_CLKGATE,
+ L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
+ }
}
static void
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 4a53ded7c2dd..b8dd3cbc8696 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -28,7 +28,6 @@
#include "i915_drv.h"
#include "intel_context.h"
#include "intel_engine_pm.h"
-#include "intel_engine_pool.h"
#include "mock_engine.h"
#include "selftests/mock_request.h"
@@ -328,7 +327,6 @@ int mock_engine_init(struct intel_engine_cs *engine)
intel_engine_init_execlists(engine);
intel_engine_init__pm(engine);
intel_engine_init_retire(engine);
- intel_engine_pool_init(&engine->pool);
ce = create_kernel_context(engine);
if (IS_ERR(ce))
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
index b8ed3cbe1277..52af1cee9a94 100644
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -24,6 +24,7 @@ static int request_sync(struct i915_request *rq)
/* Opencode i915_request_add() so we can keep the timeline locked. */
__i915_request_commit(rq);
+ rq->sched.attr.priority = I915_PRIORITY_BARRIER;
__i915_request_queue(rq, NULL);
timeout = i915_request_wait(rq, 0, HZ / 10);
@@ -154,10 +155,7 @@ static int live_context_size(void *arg)
*/
for_each_engine(engine, gt, id) {
- struct {
- struct file *state;
- void *pinned;
- } saved;
+ struct file *saved;
if (!engine->context_size)
continue;
@@ -171,8 +169,7 @@ static int live_context_size(void *arg)
* active state is sufficient, we are only checking that we
* don't use more than we planned.
*/
- saved.state = fetch_and_zero(&engine->default_state);
- saved.pinned = fetch_and_zero(&engine->pinned_default_state);
+ saved = fetch_and_zero(&engine->default_state);
/* Overlaps with the execlists redzone */
engine->context_size += I915_GTT_PAGE_SIZE;
@@ -181,8 +178,7 @@ static int live_context_size(void *arg)
engine->context_size -= I915_GTT_PAGE_SIZE;
- engine->pinned_default_state = saved.pinned;
- engine->default_state = saved.state;
+ engine->default_state = saved;
intel_engine_pm_put(engine);
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 7529df92f6a2..824f99c4cc7c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -4342,35 +4342,6 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
return intel_gt_live_subtests(tests, &i915->gt);
}
-static void hexdump(const void *buf, size_t len)
-{
- const size_t rowsize = 8 * sizeof(u32);
- const void *prev = NULL;
- bool skip = false;
- size_t pos;
-
- for (pos = 0; pos < len; pos += rowsize) {
- char line[128];
-
- if (prev && !memcmp(prev, buf + pos, rowsize)) {
- if (!skip) {
- pr_info("*\n");
- skip = true;
- }
- continue;
- }
-
- WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
- rowsize, sizeof(u32),
- line, sizeof(line),
- false) >= sizeof(line));
- pr_info("[%04zx] %s\n", pos, line);
-
- prev = buf + pos;
- skip = false;
- }
-}
-
static int emit_semaphore_signal(struct intel_context *ce, void *slot)
{
const u32 offset =
@@ -4518,10 +4489,10 @@ static int live_lrc_layout(void *arg)
if (err) {
pr_info("%s: HW register image:\n", engine->name);
- hexdump(hw, PAGE_SIZE);
+ igt_hexdump(hw, PAGE_SIZE);
pr_info("%s: SW register image:\n", engine->name);
- hexdump(lrc, PAGE_SIZE);
+ igt_hexdump(lrc, PAGE_SIZE);
}
shmem_unpin_map(engine->default_state, hw);
@@ -5206,6 +5177,7 @@ store_context(struct intel_context *ce, struct i915_vma *scratch)
{
struct i915_vma *batch;
u32 dw, x, *cs, *hw;
+ u32 *defaults;
batch = create_user_vma(ce->vm, SZ_64K);
if (IS_ERR(batch))
@@ -5217,9 +5189,16 @@ store_context(struct intel_context *ce, struct i915_vma *scratch)
return ERR_CAST(cs);
}
+ defaults = shmem_pin_map(ce->engine->default_state);
+ if (!defaults) {
+ i915_gem_object_unpin_map(batch->obj);
+ i915_vma_put(batch);
+ return ERR_PTR(-ENOMEM);
+ }
+
x = 0;
dw = 0;
- hw = ce->engine->pinned_default_state;
+ hw = defaults;
hw += LRC_STATE_OFFSET / sizeof(*hw);
do {
u32 len = hw[dw] & 0x7f;
@@ -5250,6 +5229,8 @@ store_context(struct intel_context *ce, struct i915_vma *scratch)
*cs++ = MI_BATCH_BUFFER_END;
+ shmem_unpin_map(ce->engine->default_state, defaults);
+
i915_gem_object_flush_map(batch->obj);
i915_gem_object_unpin_map(batch->obj);
@@ -5360,6 +5341,7 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
{
struct i915_vma *batch;
u32 dw, *cs, *hw;
+ u32 *defaults;
batch = create_user_vma(ce->vm, SZ_64K);
if (IS_ERR(batch))
@@ -5371,8 +5353,15 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
return ERR_CAST(cs);
}
+ defaults = shmem_pin_map(ce->engine->default_state);
+ if (!defaults) {
+ i915_gem_object_unpin_map(batch->obj);
+ i915_vma_put(batch);
+ return ERR_PTR(-ENOMEM);
+ }
+
dw = 0;
- hw = ce->engine->pinned_default_state;
+ hw = defaults;
hw += LRC_STATE_OFFSET / sizeof(*hw);
do {
u32 len = hw[dw] & 0x7f;
@@ -5400,6 +5389,8 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
*cs++ = MI_BATCH_BUFFER_END;
+ shmem_unpin_map(ce->engine->default_state, defaults);
+
i915_gem_object_flush_map(batch->obj);
i915_gem_object_unpin_map(batch->obj);
@@ -5467,6 +5458,7 @@ static int compare_isolation(struct intel_engine_cs *engine,
{
u32 x, dw, *hw, *lrc;
u32 *A[2], *B[2];
+ u32 *defaults;
int err = 0;
A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC);
@@ -5499,9 +5491,15 @@ static int compare_isolation(struct intel_engine_cs *engine,
}
lrc += LRC_STATE_OFFSET / sizeof(*hw);
+ defaults = shmem_pin_map(ce->engine->default_state);
+ if (!defaults) {
+ err = -ENOMEM;
+ goto err_lrc;
+ }
+
x = 0;
dw = 0;
- hw = engine->pinned_default_state;
+ hw = defaults;
hw += LRC_STATE_OFFSET / sizeof(*hw);
do {
u32 len = hw[dw] & 0x7f;
@@ -5541,6 +5539,8 @@ static int compare_isolation(struct intel_engine_cs *engine,
} while (dw < PAGE_SIZE / sizeof(u32) &&
(hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
+ shmem_unpin_map(ce->engine->default_state, defaults);
+err_lrc:
i915_gem_object_unpin_map(ce->state->obj);
err_B1:
i915_gem_object_unpin_map(result[1]->obj);
@@ -5690,18 +5690,16 @@ static int live_lrc_isolation(void *arg)
continue;
intel_engine_pm_get(engine);
- if (engine->pinned_default_state) {
- for (i = 0; i < ARRAY_SIZE(poison); i++) {
- int result;
+ for (i = 0; i < ARRAY_SIZE(poison); i++) {
+ int result;
- result = __lrc_isolation(engine, poison[i]);
- if (result && !err)
- err = result;
+ result = __lrc_isolation(engine, poison[i]);
+ if (result && !err)
+ err = result;
- result = __lrc_isolation(engine, ~poison[i]);
- if (result && !err)
- err = result;
- }
+ result = __lrc_isolation(engine, ~poison[i]);
+ if (result && !err)
+ err = result;
}
intel_engine_pm_put(engine);
if (igt_flush_test(gt->i915)) {
diff --git a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
index 9995faadd7e8..3350e7c995bc 100644
--- a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
@@ -54,6 +54,8 @@ static struct i915_vma *create_wally(struct intel_engine_cs *engine)
*cs++ = STACK_MAGIC;
*cs++ = MI_BATCH_BUFFER_END;
+
+ i915_gem_object_flush_map(obj);
i915_gem_object_unpin_map(obj);
vma->private = intel_context_create(engine); /* dummy residuals */
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index b89a7d7611f6..6275d69aa9cc 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -56,6 +56,18 @@ static int cmp_u64(const void *A, const void *B)
return 0;
}
+static int cmp_u32(const void *A, const void *B)
+{
+ const u32 *a = A, *b = B;
+
+ if (a < b)
+ return -1;
+ else if (a > b)
+ return 1;
+ else
+ return 0;
+}
+
static struct i915_vma *
create_spin_counter(struct intel_engine_cs *engine,
struct i915_address_space *vm,
@@ -236,8 +248,8 @@ int live_rps_clock_interval(void *arg)
for_each_engine(engine, gt, id) {
unsigned long saved_heartbeat;
struct i915_request *rq;
- ktime_t dt;
u32 cycles;
+ u64 dt;
if (!intel_engine_can_store_dword(engine))
continue;
@@ -286,15 +298,29 @@ int live_rps_clock_interval(void *arg)
engine->name);
err = -ENODEV;
} else {
- preempt_disable();
- dt = ktime_get();
- cycles = -intel_uncore_read_fw(gt->uncore,
- GEN6_RP_CUR_UP_EI);
- udelay(1000);
- dt = ktime_sub(ktime_get(), dt);
- cycles += intel_uncore_read_fw(gt->uncore,
- GEN6_RP_CUR_UP_EI);
- preempt_enable();
+ ktime_t dt_[5];
+ u32 cycles_[5];
+ int i;
+
+ for (i = 0; i < 5; i++) {
+ preempt_disable();
+
+ dt_[i] = ktime_get();
+ cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
+
+ udelay(1000);
+
+ dt_[i] = ktime_sub(ktime_get(), dt_[i]);
+ cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
+
+ preempt_enable();
+ }
+
+ /* Use the median of both cycle/dt; close enough */
+ sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
+ cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
+ sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
+ dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
}
intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
@@ -306,14 +332,14 @@ int live_rps_clock_interval(void *arg)
if (err == 0) {
u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
u32 expected =
- intel_gt_ns_to_pm_interval(gt, ktime_to_ns(dt));
+ intel_gt_ns_to_pm_interval(gt, dt);
pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
- engine->name, cycles, time, ktime_to_ns(dt), expected,
+ engine->name, cycles, time, dt, expected,
gt->clock_frequency / 1000);
- if (10 * time < 8 * ktime_to_ns(dt) ||
- 8 * time > 10 * ktime_to_ns(dt)) {
+ if (10 * time < 8 * dt ||
+ 8 * time > 10 * dt) {
pr_err("%s: rps clock time does not match walltime!\n",
engine->name);
err = -EINVAL;
@@ -701,6 +727,7 @@ int live_rps_frequency_cs(void *arg)
err_vma:
*cancel = MI_BATCH_BUFFER_END;
+ i915_gem_object_flush_map(vma->obj);
i915_gem_object_unpin_map(vma->obj);
i915_vma_unpin(vma);
i915_vma_put(vma);
@@ -842,6 +869,7 @@ int live_rps_frequency_srm(void *arg)
err_vma:
*cancel = MI_BATCH_BUFFER_END;
+ i915_gem_object_flush_map(vma->obj);
i915_gem_object_unpin_map(vma->obj);
i915_vma_unpin(vma);
i915_vma_put(vma);
diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c
index 8f9b2f33dbaf..535cc1169e54 100644
--- a/drivers/gpu/drm/i915/gt/sysfs_engines.c
+++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c
@@ -192,6 +192,17 @@ static struct kobj_attribute max_spin_attr =
__ATTR(max_busywait_duration_ns, 0644, max_spin_show, max_spin_store);
static ssize_t
+max_spin_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return sprintf(buf, "%lu\n", engine->defaults.max_busywait_duration_ns);
+}
+
+static struct kobj_attribute max_spin_def =
+__ATTR(max_busywait_duration_ns, 0444, max_spin_default, NULL);
+
+static ssize_t
timeslice_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
@@ -234,6 +245,17 @@ static struct kobj_attribute timeslice_duration_attr =
__ATTR(timeslice_duration_ms, 0644, timeslice_show, timeslice_store);
static ssize_t
+timeslice_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return sprintf(buf, "%lu\n", engine->defaults.timeslice_duration_ms);
+}
+
+static struct kobj_attribute timeslice_duration_def =
+__ATTR(timeslice_duration_ms, 0444, timeslice_default, NULL);
+
+static ssize_t
stop_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
@@ -273,6 +295,17 @@ static struct kobj_attribute stop_timeout_attr =
__ATTR(stop_timeout_ms, 0644, stop_show, stop_store);
static ssize_t
+stop_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return sprintf(buf, "%lu\n", engine->defaults.stop_timeout_ms);
+}
+
+static struct kobj_attribute stop_timeout_def =
+__ATTR(stop_timeout_ms, 0444, stop_default, NULL);
+
+static ssize_t
preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
@@ -317,6 +350,18 @@ static struct kobj_attribute preempt_timeout_attr =
__ATTR(preempt_timeout_ms, 0644, preempt_timeout_show, preempt_timeout_store);
static ssize_t
+preempt_timeout_default(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return sprintf(buf, "%lu\n", engine->defaults.preempt_timeout_ms);
+}
+
+static struct kobj_attribute preempt_timeout_def =
+__ATTR(preempt_timeout_ms, 0444, preempt_timeout_default, NULL);
+
+static ssize_t
heartbeat_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
@@ -359,6 +404,17 @@ heartbeat_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
static struct kobj_attribute heartbeat_interval_attr =
__ATTR(heartbeat_interval_ms, 0644, heartbeat_show, heartbeat_store);
+static ssize_t
+heartbeat_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return sprintf(buf, "%lu\n", engine->defaults.heartbeat_interval_ms);
+}
+
+static struct kobj_attribute heartbeat_interval_def =
+__ATTR(heartbeat_interval_ms, 0444, heartbeat_default, NULL);
+
static void kobj_engine_release(struct kobject *kobj)
{
kfree(kobj);
@@ -390,6 +446,42 @@ kobj_engine(struct kobject *dir, struct intel_engine_cs *engine)
return &ke->base;
}
+static void add_defaults(struct kobj_engine *parent)
+{
+ static const struct attribute *files[] = {
+ &max_spin_def.attr,
+ &stop_timeout_def.attr,
+#if CONFIG_DRM_I915_HEARTBEAT_INTERVAL
+ &heartbeat_interval_def.attr,
+#endif
+ NULL
+ };
+ struct kobj_engine *ke;
+
+ ke = kzalloc(sizeof(*ke), GFP_KERNEL);
+ if (!ke)
+ return;
+
+ kobject_init(&ke->base, &kobj_engine_type);
+ ke->engine = parent->engine;
+
+ if (kobject_add(&ke->base, &parent->base, "%s", ".defaults")) {
+ kobject_put(&ke->base);
+ return;
+ }
+
+ if (sysfs_create_files(&ke->base, files))
+ return;
+
+ if (intel_engine_has_timeslices(ke->engine) &&
+ sysfs_create_file(&ke->base, &timeslice_duration_def.attr))
+ return;
+
+ if (intel_engine_has_preempt_reset(ke->engine) &&
+ sysfs_create_file(&ke->base, &preempt_timeout_def.attr))
+ return;
+}
+
void intel_engines_add_sysfs(struct drm_i915_private *i915)
{
static const struct attribute *files[] = {
@@ -433,6 +525,8 @@ void intel_engines_add_sysfs(struct drm_i915_private *i915)
sysfs_create_file(kobj, &preempt_timeout_attr.attr))
goto err_engine;
+ add_defaults(container_of(kobj, struct kobj_engine, base));
+
if (0) {
err_object:
kobject_put(kobj);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index aa6d56e25a10..94eb63f309ce 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -258,7 +258,7 @@ static void guc_submit(struct intel_engine_cs *engine,
static inline int rq_prio(const struct i915_request *rq)
{
- return rq->sched.attr.priority | __NO_PREEMPTION;
+ return rq->sched.attr.priority;
}
static struct i915_request *schedule_in(struct i915_request *rq, int idx)