summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gt
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_ppgtt.c106
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_ppgtt.h5
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.c181
-rw-r--r--drivers/gpu/drm/i915/gt/intel_breadcrumbs.c305
-rw-r--r--drivers/gpu/drm/i915/gt/intel_breadcrumbs.h36
-rw-r--r--drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h47
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c319
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.h13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_types.h5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h29
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c34
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c106
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h31
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt.c97
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c23
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c105
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_irq.c1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.c300
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.h142
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c169
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ppgtt.c150
-rw-r--r--drivers/gpu/drm/i915/gt/intel_renderstate.c73
-rw-r--r--drivers/gpu/drm/i915/gt/intel_renderstate.h9
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.c10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_submission.c42
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.c28
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.h24
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c183
-rw-r--r--drivers/gpu/drm/i915/gt/mock_engine.c30
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_context.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c5
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_lrc.c22
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rps.c34
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_timeline.c36
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_workarounds.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c4
42 files changed, 1568 insertions, 1156 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index cdc0b9c54305..fd0d24d28763 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -16,8 +16,10 @@ static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt,
const unsigned int pde,
const struct i915_page_table *pt)
{
+ dma_addr_t addr = pt ? px_dma(pt) : px_dma(ppgtt->base.vm.scratch[1]);
+
/* Caller needs to make sure the write completes if necessary */
- iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
+ iowrite32(GEN6_PDE_ADDR_ENCODE(addr) | GEN6_PDE_VALID,
ppgtt->pd_addr + pde);
}
@@ -79,7 +81,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
{
struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
const unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
- const gen6_pte_t scratch_pte = vm->scratch[0].encode;
+ const gen6_pte_t scratch_pte = vm->scratch[0]->encode;
unsigned int pde = first_entry / GEN6_PTES;
unsigned int pte = first_entry % GEN6_PTES;
unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
@@ -90,8 +92,6 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
const unsigned int count = min(num_entries, GEN6_PTES - pte);
gen6_pte_t *vaddr;
- GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1]));
-
num_entries -= count;
GEM_BUG_ON(count > atomic_read(&pt->used));
@@ -127,7 +127,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
struct sgt_dma iter = sgt_dma(vma);
gen6_pte_t *vaddr;
- GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch[1]);
+ GEM_BUG_ON(!pd->entry[act_pt]);
vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt));
do {
@@ -177,39 +177,36 @@ static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end)
mutex_unlock(&ppgtt->flush);
}
-static int gen6_alloc_va_range(struct i915_address_space *vm,
- u64 start, u64 length)
+static void gen6_alloc_va_range(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ u64 start, u64 length)
{
struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
struct i915_page_directory * const pd = ppgtt->base.pd;
- struct i915_page_table *pt, *alloc = NULL;
+ struct i915_page_table *pt;
bool flush = false;
u64 from = start;
unsigned int pde;
- int ret = 0;
spin_lock(&pd->lock);
gen6_for_each_pde(pt, pd, start, length, pde) {
const unsigned int count = gen6_pte_count(start, length);
- if (px_base(pt) == px_base(&vm->scratch[1])) {
+ if (!pt) {
spin_unlock(&pd->lock);
- pt = fetch_and_zero(&alloc);
- if (!pt)
- pt = alloc_pt(vm);
- if (IS_ERR(pt)) {
- ret = PTR_ERR(pt);
- goto unwind_out;
- }
+ pt = stash->pt[0];
+ __i915_gem_object_pin_pages(pt->base);
+ i915_gem_object_make_unshrinkable(pt->base);
- fill32_px(pt, vm->scratch[0].encode);
+ fill32_px(pt, vm->scratch[0]->encode);
spin_lock(&pd->lock);
- if (pd->entry[pde] == &vm->scratch[1]) {
+ if (!pd->entry[pde]) {
+ stash->pt[0] = pt->stash;
+ atomic_set(&pt->used, 0);
pd->entry[pde] = pt;
} else {
- alloc = pt;
pt = pd->entry[pde];
}
@@ -226,38 +223,32 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
with_intel_runtime_pm(&vm->i915->runtime_pm, wakeref)
gen6_flush_pd(ppgtt, from, start);
}
-
- goto out;
-
-unwind_out:
- gen6_ppgtt_clear_range(vm, from, start - from);
-out:
- if (alloc)
- free_px(vm, alloc);
- return ret;
}
static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
{
struct i915_address_space * const vm = &ppgtt->base.vm;
- struct i915_page_directory * const pd = ppgtt->base.pd;
int ret;
- ret = setup_scratch_page(vm, __GFP_HIGHMEM);
+ ret = setup_scratch_page(vm);
if (ret)
return ret;
- vm->scratch[0].encode =
- vm->pte_encode(px_dma(&vm->scratch[0]),
+ vm->scratch[0]->encode =
+ vm->pte_encode(px_dma(vm->scratch[0]),
I915_CACHE_NONE, PTE_READ_ONLY);
- if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[1])))) {
- cleanup_scratch_page(vm);
- return -ENOMEM;
+ vm->scratch[1] = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
+ if (IS_ERR(vm->scratch[1]))
+ return PTR_ERR(vm->scratch[1]);
+
+ ret = pin_pt_dma(vm, vm->scratch[1]);
+ if (ret) {
+ i915_gem_object_put(vm->scratch[1]);
+ return ret;
}
- fill32_px(&vm->scratch[1], vm->scratch[0].encode);
- memset_p(pd->entry, &vm->scratch[1], I915_PDES);
+ fill32_px(vm->scratch[1], vm->scratch[0]->encode);
return 0;
}
@@ -265,14 +256,12 @@ static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
{
struct i915_page_directory * const pd = ppgtt->base.pd;
- struct i915_page_dma * const scratch =
- px_base(&ppgtt->base.vm.scratch[1]);
struct i915_page_table *pt;
u32 pde;
gen6_for_all_pdes(pt, pd, pde)
- if (px_base(pt) != scratch)
- free_px(&ppgtt->base.vm, pt);
+ if (pt)
+ free_pt(&ppgtt->base.vm, pt);
}
static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
@@ -286,7 +275,8 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
mutex_destroy(&ppgtt->flush);
mutex_destroy(&ppgtt->pin_mutex);
- kfree(ppgtt->base.pd);
+
+ free_pd(&ppgtt->base.vm, ppgtt->base.pd);
}
static int pd_vma_set_pages(struct i915_vma *vma)
@@ -302,28 +292,26 @@ static void pd_vma_clear_pages(struct i915_vma *vma)
vma->pages = NULL;
}
-static int pd_vma_bind(struct i915_address_space *vm,
- struct i915_vma *vma,
- enum i915_cache_level cache_level,
- u32 unused)
+static void pd_vma_bind(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 unused)
{
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
struct gen6_ppgtt *ppgtt = vma->private;
u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE;
- px_base(ppgtt->base.pd)->ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
+ ppgtt->pp_dir = ggtt_offset * sizeof(gen6_pte_t) << 10;
ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total);
- return 0;
}
static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma)
{
struct gen6_ppgtt *ppgtt = vma->private;
struct i915_page_directory * const pd = ppgtt->base.pd;
- struct i915_page_dma * const scratch =
- px_base(&ppgtt->base.vm.scratch[1]);
struct i915_page_table *pt;
unsigned int pde;
@@ -332,11 +320,11 @@ static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma)
/* Free all no longer used page tables */
gen6_for_all_pdes(pt, ppgtt->base.pd, pde) {
- if (px_base(pt) == scratch || atomic_read(&pt->used))
+ if (!pt || atomic_read(&pt->used))
continue;
- free_px(&ppgtt->base.vm, pt);
- pd->entry[pde] = scratch;
+ free_pt(&ppgtt->base.vm, pt);
+ pd->entry[pde] = NULL;
}
ppgtt->scan_for_unused_pt = false;
@@ -380,7 +368,7 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
return vma;
}
-int gen6_ppgtt_pin(struct i915_ppgtt *base)
+int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww)
{
struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
int err;
@@ -406,7 +394,7 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base)
*/
err = 0;
if (!atomic_read(&ppgtt->pin_count))
- err = i915_ggtt_pin(ppgtt->vma, GEN6_PD_ALIGN, PIN_HIGH);
+ err = i915_ggtt_pin(ppgtt->vma, ww, GEN6_PD_ALIGN, PIN_HIGH);
if (!err)
atomic_inc(&ppgtt->pin_count);
mutex_unlock(&ppgtt->pin_mutex);
@@ -448,6 +436,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
mutex_init(&ppgtt->pin_mutex);
ppgtt_init(&ppgtt->base, gt);
+ ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t));
ppgtt->base.vm.top = 1;
ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND;
@@ -456,9 +445,10 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
+ ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma;
ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
- ppgtt->base.pd = __alloc_pd(sizeof(*ppgtt->base.pd));
+ ppgtt->base.pd = __alloc_pd(I915_PDES);
if (!ppgtt->base.pd) {
err = -ENOMEM;
goto err_free;
@@ -479,7 +469,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
err_scratch:
free_scratch(&ppgtt->base.vm);
err_pd:
- kfree(ppgtt->base.pd);
+ free_pd(&ppgtt->base.vm, ppgtt->base.pd);
err_free:
mutex_destroy(&ppgtt->pin_mutex);
kfree(ppgtt);
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
index 72e481806c96..3357228f3304 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
@@ -8,12 +8,15 @@
#include "intel_gtt.h"
+struct i915_gem_ww_ctx;
+
struct gen6_ppgtt {
struct i915_ppgtt base;
struct mutex flush;
struct i915_vma *vma;
gen6_pte_t __iomem *pd_addr;
+ u32 pp_dir;
atomic_t pin_count;
struct mutex pin_mutex;
@@ -66,7 +69,7 @@ static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base)
(pt = i915_pt_entry(pd, iter), true); \
++iter)
-int gen6_ppgtt_pin(struct i915_ppgtt *base);
+int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww);
void gen6_ppgtt_unpin(struct i915_ppgtt *base);
void gen6_ppgtt_unpin_all(struct i915_ppgtt *base);
void gen6_ppgtt_enable(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 699125928272..eb64f474a78c 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -181,7 +181,7 @@ static void __gen8_ppgtt_cleanup(struct i915_address_space *vm,
} while (pde++, --count);
}
- free_px(vm, pd);
+ free_px(vm, &pd->pt, lvl);
}
static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
@@ -199,7 +199,7 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
struct i915_page_directory * const pd,
u64 start, const u64 end, int lvl)
{
- const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
+ const struct drm_i915_gem_object * const scratch = vm->scratch[lvl];
unsigned int idx, len;
GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
@@ -239,7 +239,7 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
vaddr = kmap_atomic_px(pt);
memset64(vaddr + gen8_pd_index(start, 0),
- vm->scratch[0].encode,
+ vm->scratch[0]->encode,
count);
kunmap_atomic(vaddr);
@@ -248,7 +248,7 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
}
if (release_pd_entry(pd, idx, pt, scratch))
- free_px(vm, pt);
+ free_px(vm, pt, lvl);
} while (idx++, --len);
return start;
@@ -269,14 +269,12 @@ static void gen8_ppgtt_clear(struct i915_address_space *vm,
start, start + length, vm->top);
}
-static int __gen8_ppgtt_alloc(struct i915_address_space * const vm,
- struct i915_page_directory * const pd,
- u64 * const start, const u64 end, int lvl)
+static void __gen8_ppgtt_alloc(struct i915_address_space * const vm,
+ struct i915_vm_pt_stash *stash,
+ struct i915_page_directory * const pd,
+ u64 * const start, const u64 end, int lvl)
{
- const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
- struct i915_page_table *alloc = NULL;
unsigned int idx, len;
- int ret = 0;
GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
@@ -297,49 +295,31 @@ static int __gen8_ppgtt_alloc(struct i915_address_space * const vm,
DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
__func__, vm, lvl + 1, idx);
- pt = fetch_and_zero(&alloc);
- if (lvl) {
- if (!pt) {
- pt = &alloc_pd(vm)->pt;
- if (IS_ERR(pt)) {
- ret = PTR_ERR(pt);
- goto out;
- }
- }
-
- fill_px(pt, vm->scratch[lvl].encode);
- } else {
- if (!pt) {
- pt = alloc_pt(vm);
- if (IS_ERR(pt)) {
- ret = PTR_ERR(pt);
- goto out;
- }
- }
-
- if (intel_vgpu_active(vm->i915) ||
- gen8_pt_count(*start, end) < I915_PDES)
- fill_px(pt, vm->scratch[lvl].encode);
- }
+ pt = stash->pt[!!lvl];
+ __i915_gem_object_pin_pages(pt->base);
+ i915_gem_object_make_unshrinkable(pt->base);
+
+ if (lvl ||
+ gen8_pt_count(*start, end) < I915_PDES ||
+ intel_vgpu_active(vm->i915))
+ fill_px(pt, vm->scratch[lvl]->encode);
spin_lock(&pd->lock);
- if (likely(!pd->entry[idx]))
+ if (likely(!pd->entry[idx])) {
+ stash->pt[!!lvl] = pt->stash;
+ atomic_set(&pt->used, 0);
set_pd_entry(pd, idx, pt);
- else
- alloc = pt, pt = pd->entry[idx];
+ } else {
+ pt = pd->entry[idx];
+ }
}
if (lvl) {
atomic_inc(&pt->used);
spin_unlock(&pd->lock);
- ret = __gen8_ppgtt_alloc(vm, as_pd(pt),
- start, end, lvl);
- if (unlikely(ret)) {
- if (release_pd_entry(pd, idx, pt, scratch))
- free_px(vm, pt);
- goto out;
- }
+ __gen8_ppgtt_alloc(vm, stash,
+ as_pd(pt), start, end, lvl);
spin_lock(&pd->lock);
atomic_dec(&pt->used);
@@ -359,18 +339,12 @@ static int __gen8_ppgtt_alloc(struct i915_address_space * const vm,
}
} while (idx++, --len);
spin_unlock(&pd->lock);
-out:
- if (alloc)
- free_px(vm, alloc);
- return ret;
}
-static int gen8_ppgtt_alloc(struct i915_address_space *vm,
- u64 start, u64 length)
+static void gen8_ppgtt_alloc(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ u64 start, u64 length)
{
- u64 from;
- int err;
-
GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
GEM_BUG_ON(range_overflows(start, length, vm->total));
@@ -378,25 +352,9 @@ static int gen8_ppgtt_alloc(struct i915_address_space *vm,
start >>= GEN8_PTE_SHIFT;
length >>= GEN8_PTE_SHIFT;
GEM_BUG_ON(length == 0);
- from = start;
- err = __gen8_ppgtt_alloc(vm, i915_vm_to_ppgtt(vm)->pd,
- &start, start + length, vm->top);
- if (unlikely(err && from != start))
- __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
- from, start, vm->top);
-
- return err;
-}
-
-static __always_inline void
-write_pte(gen8_pte_t *pte, const gen8_pte_t val)
-{
- /* Magic delays? Or can we refine these to flush all in one pass? */
- *pte = val;
- wmb(); /* cpu to cache */
- clflush(pte); /* cache to memory */
- wmb(); /* visible to all */
+ __gen8_ppgtt_alloc(vm, stash, i915_vm_to_ppgtt(vm)->pd,
+ &start, start + length, vm->top);
}
static __always_inline u64
@@ -415,8 +373,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
do {
GEM_BUG_ON(iter->sg->length < I915_GTT_PAGE_SIZE);
- write_pte(&vaddr[gen8_pd_index(idx, 0)],
- pte_encode | iter->dma);
+ vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
iter->dma += I915_GTT_PAGE_SIZE;
if (iter->dma >= iter->max) {
@@ -439,10 +396,12 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
pd = pdp->entry[gen8_pd_index(idx, 2)];
}
+ clflush_cache_range(vaddr, PAGE_SIZE);
kunmap_atomic(vaddr);
vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
}
} while (1);
+ clflush_cache_range(vaddr, PAGE_SIZE);
kunmap_atomic(vaddr);
return idx;
@@ -498,7 +457,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
do {
GEM_BUG_ON(iter->sg->length < page_size);
- write_pte(&vaddr[index++], encode | iter->dma);
+ vaddr[index++] = encode | iter->dma;
start += page_size;
iter->dma += page_size;
@@ -523,6 +482,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
}
} while (rem >= page_size && index < I915_PDES);
+ clflush_cache_range(vaddr, PAGE_SIZE);
kunmap_atomic(vaddr);
/*
@@ -554,7 +514,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) {
u16 i;
- encode = vma->vm->scratch[0].encode;
+ encode = vma->vm->scratch[0]->encode;
vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K));
for (i = 1; i < index; i += 16)
@@ -608,27 +568,37 @@ static int gen8_init_scratch(struct i915_address_space *vm)
GEM_BUG_ON(!clone->has_read_only);
vm->scratch_order = clone->scratch_order;
- memcpy(vm->scratch, clone->scratch, sizeof(vm->scratch));
- px_dma(&vm->scratch[0]) = 0; /* no xfer of ownership */
+ for (i = 0; i <= vm->top; i++)
+ vm->scratch[i] = i915_gem_object_get(clone->scratch[i]);
+
return 0;
}
- ret = setup_scratch_page(vm, __GFP_HIGHMEM);
+ ret = setup_scratch_page(vm);
if (ret)
return ret;
- vm->scratch[0].encode =
- gen8_pte_encode(px_dma(&vm->scratch[0]),
+ vm->scratch[0]->encode =
+ gen8_pte_encode(px_dma(vm->scratch[0]),
I915_CACHE_LLC, vm->has_read_only);
for (i = 1; i <= vm->top; i++) {
- if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[i]))))
+ struct drm_i915_gem_object *obj;
+
+ obj = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
+ if (IS_ERR(obj))
+ goto free_scratch;
+
+ ret = pin_pt_dma(vm, obj);
+ if (ret) {
+ i915_gem_object_put(obj);
goto free_scratch;
+ }
- fill_px(&vm->scratch[i], vm->scratch[i - 1].encode);
- vm->scratch[i].encode =
- gen8_pde_encode(px_dma(&vm->scratch[i]),
- I915_CACHE_LLC);
+ fill_px(obj, vm->scratch[i - 1]->encode);
+ obj->encode = gen8_pde_encode(px_dma(obj), I915_CACHE_LLC);
+
+ vm->scratch[i] = obj;
}
return 0;
@@ -649,12 +619,20 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) {
struct i915_page_directory *pde;
+ int err;
pde = alloc_pd(vm);
if (IS_ERR(pde))
return PTR_ERR(pde);
- fill_px(pde, vm->scratch[1].encode);
+ err = pin_pt_dma(vm, pde->pt.base);
+ if (err) {
+ i915_gem_object_put(pde->pt.base);
+ free_pd(vm, pde);
+ return err;
+ }
+
+ fill_px(pde, vm->scratch[1]->encode);
set_pd_entry(pd, idx, pde);
atomic_inc(px_used(pde)); /* keep pinned */
}
@@ -668,21 +646,32 @@ gen8_alloc_top_pd(struct i915_address_space *vm)
{
const unsigned int count = gen8_pd_top_count(vm);
struct i915_page_directory *pd;
+ int err;
- GEM_BUG_ON(count > ARRAY_SIZE(pd->entry));
+ GEM_BUG_ON(count > I915_PDES);
- pd = __alloc_pd(offsetof(typeof(*pd), entry[count]));
+ pd = __alloc_pd(count);
if (unlikely(!pd))
return ERR_PTR(-ENOMEM);
- if (unlikely(setup_page_dma(vm, px_base(pd)))) {
- kfree(pd);
- return ERR_PTR(-ENOMEM);
+ pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
+ if (IS_ERR(pd->pt.base)) {
+ err = PTR_ERR(pd->pt.base);
+ pd->pt.base = NULL;
+ goto err_pd;
}
- fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count);
+ err = pin_pt_dma(vm, pd->pt.base);
+ if (err)
+ goto err_pd;
+
+ fill_page_dma(px_base(pd), vm->scratch[vm->top]->encode, count);
atomic_inc(px_used(pd)); /* mark as pinned */
return pd;
+
+err_pd:
+ free_pd(vm, pd);
+ return ERR_PTR(err);
}
/*
@@ -703,6 +692,7 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
ppgtt_init(ppgtt, gt);
ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
+ ppgtt->vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen8_pte_t));
/*
* From bdw, there is hw support for read-only pages in the PPGTT.
@@ -714,12 +704,7 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
*/
ppgtt->vm.has_read_only = !IS_GEN_RANGE(gt->i915, 11, 12);
- /*
- * There are only few exceptions for gen >=6. chv and bxt.
- * And we are not sure about the latter so play safe for now.
- */
- if (IS_CHERRYVIEW(gt->i915) || IS_BROXTON(gt->i915))
- ppgtt->vm.pt_kmap_wc = true;
+ ppgtt->vm.alloc_pt_dma = alloc_pt_dma;
err = gen8_init_scratch(&ppgtt->vm);
if (err)
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 91786310c114..d8b206e53660 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -28,6 +28,8 @@
#include "i915_drv.h"
#include "i915_trace.h"
+#include "intel_breadcrumbs.h"
+#include "intel_context.h"
#include "intel_gt_pm.h"
#include "intel_gt_requests.h"
@@ -53,33 +55,65 @@ static void irq_disable(struct intel_engine_cs *engine)
spin_unlock(&engine->gt->irq_lock);
}
-static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
+static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
{
- struct intel_engine_cs *engine =
- container_of(b, struct intel_engine_cs, breadcrumbs);
+ lockdep_assert_held(&b->irq_lock);
+
+ if (!b->irq_engine || b->irq_armed)
+ return;
+
+ if (!intel_gt_pm_get_if_awake(b->irq_engine->gt))
+ return;
+
+ /*
+ * The breadcrumb irq will be disarmed on the interrupt after the
+ * waiters are signaled. This gives us a single interrupt window in
+ * which we can add a new waiter and avoid the cost of re-enabling
+ * the irq.
+ */
+ WRITE_ONCE(b->irq_armed, true);
+
+ /*
+ * Since we are waiting on a request, the GPU should be busy
+ * and should have its own rpm reference. This is tracked
+ * by i915->gt.awake, we can forgo holding our own wakref
+ * for the interrupt as before i915->gt.awake is released (when
+ * the driver is idle) we disarm the breadcrumbs.
+ */
+ if (!b->irq_enabled++)
+ irq_enable(b->irq_engine);
+}
+
+static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
+{
lockdep_assert_held(&b->irq_lock);
+ if (!b->irq_engine || !b->irq_armed)
+ return;
+
GEM_BUG_ON(!b->irq_enabled);
if (!--b->irq_enabled)
- irq_disable(engine);
+ irq_disable(b->irq_engine);
WRITE_ONCE(b->irq_armed, false);
- intel_gt_pm_put_async(engine->gt);
+ intel_gt_pm_put_async(b->irq_engine->gt);
}
-void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
+static void add_signaling_context(struct intel_breadcrumbs *b,
+ struct intel_context *ce)
{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
- unsigned long flags;
-
- if (!READ_ONCE(b->irq_armed))
- return;
+ intel_context_get(ce);
+ list_add_tail(&ce->signal_link, &b->signalers);
+ if (list_is_first(&ce->signal_link, &b->signalers))
+ __intel_breadcrumbs_arm_irq(b);
+}
- spin_lock_irqsave(&b->irq_lock, flags);
- if (b->irq_armed)
- __intel_breadcrumbs_disarm_irq(b);
- spin_unlock_irqrestore(&b->irq_lock, flags);
+static void remove_signaling_context(struct intel_breadcrumbs *b,
+ struct intel_context *ce)
+{
+ list_del(&ce->signal_link);
+ intel_context_put(ce);
}
static inline bool __request_completed(const struct i915_request *rq)
@@ -90,6 +124,9 @@ static inline bool __request_completed(const struct i915_request *rq)
__maybe_unused static bool
check_signal_order(struct intel_context *ce, struct i915_request *rq)
{
+ if (rq->context != ce)
+ return false;
+
if (!list_is_last(&rq->signal_link, &ce->signals) &&
i915_seqno_passed(rq->fence.seqno,
list_next_entry(rq, signal_link)->fence.seqno))
@@ -133,25 +170,21 @@ __dma_fence_signal__notify(struct dma_fence *fence,
static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
{
- struct intel_engine_cs *engine =
- container_of(b, struct intel_engine_cs, breadcrumbs);
-
- if (unlikely(intel_engine_is_virtual(engine)))
- engine = intel_virtual_engine_get_sibling(engine, 0);
-
- intel_engine_add_retire(engine, tl);
+ if (b->irq_engine)
+ intel_engine_add_retire(b->irq_engine, tl);
}
-static void __signal_request(struct i915_request *rq, struct list_head *signals)
+static bool __signal_request(struct i915_request *rq, struct list_head *signals)
{
- GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
- if (!__dma_fence_signal(&rq->fence))
- return;
+ if (!__dma_fence_signal(&rq->fence)) {
+ i915_request_put(rq);
+ return false;
+ }
- i915_request_get(rq);
list_add_tail(&rq->signal_link, signals);
+ return true;
}
static void signal_irq_work(struct irq_work *work)
@@ -164,7 +197,7 @@ static void signal_irq_work(struct irq_work *work)
spin_lock(&b->irq_lock);
- if (b->irq_armed && list_empty(&b->signalers))
+ if (list_empty(&b->signalers))
__intel_breadcrumbs_disarm_irq(b);
list_splice_init(&b->signaled_requests, &signal);
@@ -197,8 +230,8 @@ static void signal_irq_work(struct irq_work *work)
/* Advance the list to the first incomplete request */
__list_del_many(&ce->signals, pos);
if (&ce->signals == pos) { /* now empty */
- list_del_init(&ce->signal_link);
add_retire(b, ce->timeline);
+ remove_signaling_context(b, ce);
}
}
}
@@ -220,116 +253,89 @@ static void signal_irq_work(struct irq_work *work)
}
}
-static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
+struct intel_breadcrumbs *
+intel_breadcrumbs_create(struct intel_engine_cs *irq_engine)
{
- struct intel_engine_cs *engine =
- container_of(b, struct intel_engine_cs, breadcrumbs);
-
- lockdep_assert_held(&b->irq_lock);
- if (b->irq_armed)
- return true;
-
- if (!intel_gt_pm_get_if_awake(engine->gt))
- return false;
-
- /*
- * The breadcrumb irq will be disarmed on the interrupt after the
- * waiters are signaled. This gives us a single interrupt window in
- * which we can add a new waiter and avoid the cost of re-enabling
- * the irq.
- */
- WRITE_ONCE(b->irq_armed, true);
-
- /*
- * Since we are waiting on a request, the GPU should be busy
- * and should have its own rpm reference. This is tracked
- * by i915->gt.awake, we can forgo holding our own wakref
- * for the interrupt as before i915->gt.awake is released (when
- * the driver is idle) we disarm the breadcrumbs.
- */
-
- if (!b->irq_enabled++)
- irq_enable(engine);
+ struct intel_breadcrumbs *b;
- return true;
-}
-
-void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
-{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
+ b = kzalloc(sizeof(*b), GFP_KERNEL);
+ if (!b)
+ return NULL;
spin_lock_init(&b->irq_lock);
INIT_LIST_HEAD(&b->signalers);
INIT_LIST_HEAD(&b->signaled_requests);
init_irq_work(&b->irq_work, signal_irq_work);
+
+ b->irq_engine = irq_engine;
+
+ return b;
}
-void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
+void intel_breadcrumbs_reset(struct intel_breadcrumbs *b)
{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
unsigned long flags;
+ if (!b->irq_engine)
+ return;
+
spin_lock_irqsave(&b->irq_lock, flags);
if (b->irq_enabled)
- irq_enable(engine);
+ irq_enable(b->irq_engine);
else
- irq_disable(engine);
+ irq_disable(b->irq_engine);
spin_unlock_irqrestore(&b->irq_lock, flags);
}
-void intel_engine_transfer_stale_breadcrumbs(struct intel_engine_cs *engine,
- struct intel_context *ce)
+void intel_breadcrumbs_park(struct intel_breadcrumbs *b)
{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
unsigned long flags;
- spin_lock_irqsave(&b->irq_lock, flags);
- if (!list_empty(&ce->signals)) {
- struct i915_request *rq, *next;
-
- /* Queue for executing the signal callbacks in the irq_work */
- list_for_each_entry_safe(rq, next, &ce->signals, signal_link) {
- GEM_BUG_ON(rq->engine != engine);
- GEM_BUG_ON(!__request_completed(rq));
-
- __signal_request(rq, &b->signaled_requests);
- }
+ if (!READ_ONCE(b->irq_armed))
+ return;
- INIT_LIST_HEAD(&ce->signals);
- list_del_init(&ce->signal_link);
+ spin_lock_irqsave(&b->irq_lock, flags);
+ __intel_breadcrumbs_disarm_irq(b);
+ spin_unlock_irqrestore(&b->irq_lock, flags);
+ if (!list_empty(&b->signalers))
irq_work_queue(&b->irq_work);
- }
- spin_unlock_irqrestore(&b->irq_lock, flags);
}
-void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
+void intel_breadcrumbs_free(struct intel_breadcrumbs *b)
{
+ kfree(b);
}
-bool i915_request_enable_breadcrumb(struct i915_request *rq)
+static void insert_breadcrumb(struct i915_request *rq,
+ struct intel_breadcrumbs *b)
{
- lockdep_assert_held(&rq->lock);
-
- if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
- return true;
+ struct intel_context *ce = rq->context;
+ struct list_head *pos;
- if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
- struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
- struct intel_context *ce = rq->context;
- struct list_head *pos;
-
- spin_lock(&b->irq_lock);
+ if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
+ return;
- if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
- goto unlock;
+ i915_request_get(rq);
- if (!__intel_breadcrumbs_arm_irq(b))
- goto unlock;
+ /*
+ * If the request is already completed, we can transfer it
+ * straight onto a signaled list, and queue the irq worker for
+ * its signal completion.
+ */
+ if (__request_completed(rq)) {
+ if (__signal_request(rq, &b->signaled_requests))
+ irq_work_queue(&b->irq_work);
+ return;
+ }
+ if (list_empty(&ce->signals)) {
+ add_signaling_context(b, ce);
+ pos = &ce->signals;
+ } else {
/*
* We keep the seqno in retirement order, so we can break
* inside intel_engine_signal_breadcrumbs as soon as we've
@@ -351,24 +357,75 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno))
break;
}
- list_add(&rq->signal_link, pos);
- if (pos == &ce->signals) /* catch transitions from empty list */
- list_move_tail(&ce->signal_link, &b->signalers);
- GEM_BUG_ON(!check_signal_order(ce, rq));
+ }
+ list_add(&rq->signal_link, pos);
+ GEM_BUG_ON(!check_signal_order(ce, rq));
+ set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+
+ /* Check after attaching to irq, interrupt may have already fired. */
+ if (__request_completed(rq))
+ irq_work_queue(&b->irq_work);
+}
- set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
-unlock:
+bool i915_request_enable_breadcrumb(struct i915_request *rq)
+{
+ struct intel_breadcrumbs *b;
+
+ /* Serialises with i915_request_retire() using rq->lock */
+ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
+ return true;
+
+ /*
+ * Peek at i915_request_submit()/i915_request_unsubmit() status.
+ *
+ * If the request is not yet active (and not signaled), we will
+ * attach the breadcrumb later.
+ */
+ if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
+ return true;
+
+ /*
+ * rq->engine is locked by rq->engine->active.lock. That however
+ * is not known until after rq->engine has been dereferenced and
+ * the lock acquired. Hence we acquire the lock and then validate
+ * that rq->engine still matches the lock we hold for it.
+ *
+ * Here, we are using the breadcrumb lock as a proxy for the
+ * rq->engine->active.lock, and we know that since the breadcrumb
+ * will be serialised within i915_request_submit/i915_request_unsubmit,
+ * the engine cannot change while active as long as we hold the
+ * breadcrumb lock on that engine.
+ *
+ * From the dma_fence_enable_signaling() path, we are outside of the
+ * request submit/unsubmit path, and so we must be more careful to
+ * acquire the right lock.
+ */
+ b = READ_ONCE(rq->engine)->breadcrumbs;
+ spin_lock(&b->irq_lock);
+ while (unlikely(b != READ_ONCE(rq->engine)->breadcrumbs)) {
spin_unlock(&b->irq_lock);
+ b = READ_ONCE(rq->engine)->breadcrumbs;
+ spin_lock(&b->irq_lock);
}
- return !__request_completed(rq);
+ /*
+ * Now that we are finally serialised with request submit/unsubmit,
+ * [with b->irq_lock] and with i915_request_retire() [via checking
+ * SIGNALED with rq->lock] confirm the request is indeed active. If
+ * it is no longer active, the breadcrumb will be attached upon
+ * i915_request_submit().
+ */
+ if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
+ insert_breadcrumb(rq, b);
+
+ spin_unlock(&b->irq_lock);
+
+ return true;
}
void i915_request_cancel_breadcrumb(struct i915_request *rq)
{
- struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
-
- lockdep_assert_held(&rq->lock);
+ struct intel_breadcrumbs *b = rq->engine->breadcrumbs;
/*
* We must wait for b->irq_lock so that we know the interrupt handler
@@ -382,23 +439,19 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
list_del(&rq->signal_link);
if (list_empty(&ce->signals))
- list_del_init(&ce->signal_link);
+ remove_signaling_context(b, ce);
clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+ i915_request_put(rq);
}
spin_unlock(&b->irq_lock);
}
-void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
- struct drm_printer *p)
+static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p)
{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
struct intel_context *ce;
struct i915_request *rq;
- if (list_empty(&b->signalers))
- return;
-
drm_printf(p, "Signals:\n");
spin_lock_irq(&b->irq_lock);
@@ -414,3 +467,17 @@ void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
}
spin_unlock_irq(&b->irq_lock);
}
+
+void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
+ struct drm_printer *p)
+{
+ struct intel_breadcrumbs *b;
+
+ b = engine->breadcrumbs;
+ if (!b)
+ return;
+
+ drm_printf(p, "IRQ: %s\n", enableddisabled(b->irq_armed));
+ if (!list_empty(&b->signalers))
+ print_signals(b, p);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h
new file mode 100644
index 000000000000..ed3d1deabfbd
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_BREADCRUMBS__
+#define __INTEL_BREADCRUMBS__
+
+#include <linux/irq_work.h>
+
+#include "intel_engine_types.h"
+
+struct drm_printer;
+struct i915_request;
+struct intel_breadcrumbs;
+
+struct intel_breadcrumbs *
+intel_breadcrumbs_create(struct intel_engine_cs *irq_engine);
+void intel_breadcrumbs_free(struct intel_breadcrumbs *b);
+
+void intel_breadcrumbs_reset(struct intel_breadcrumbs *b);
+void intel_breadcrumbs_park(struct intel_breadcrumbs *b);
+
+static inline void
+intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
+{
+ irq_work_queue(&engine->breadcrumbs->irq_work);
+}
+
+void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
+ struct drm_printer *p);
+
+bool i915_request_enable_breadcrumb(struct i915_request *request);
+void i915_request_cancel_breadcrumb(struct i915_request *request);
+
+#endif /* __INTEL_BREADCRUMBS__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
new file mode 100644
index 000000000000..8e53b9942695
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_BREADCRUMBS_TYPES__
+#define __INTEL_BREADCRUMBS_TYPES__
+
+#include <linux/irq_work.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+/*
+ * Rather than have every client wait upon all user interrupts,
+ * with the herd waking after every interrupt and each doing the
+ * heavyweight seqno dance, we delegate the task (of being the
+ * bottom-half of the user interrupt) to the first client. After
+ * every interrupt, we wake up one client, who does the heavyweight
+ * coherent seqno read and either goes back to sleep (if incomplete),
+ * or wakes up all the completed clients in parallel, before then
+ * transferring the bottom-half status to the next client in the queue.
+ *
+ * Compared to walking the entire list of waiters in a single dedicated
+ * bottom-half, we reduce the latency of the first waiter by avoiding
+ * a context switch, but incur additional coherent seqno reads when
+ * following the chain of request breadcrumbs. Since it is most likely
+ * that we have a single client waiting on each seqno, then reducing
+ * the overhead of waking that client is much preferred.
+ */
+struct intel_breadcrumbs {
+ spinlock_t irq_lock; /* protects the lists used in hardirq context */
+
+ /* Not all breadcrumbs are attached to physical HW */
+ struct intel_engine_cs *irq_engine;
+
+ struct list_head signalers;
+ struct list_head signaled_requests;
+
+ struct irq_work irq_work; /* for use from inside irq_lock */
+
+ unsigned int irq_enabled;
+
+ bool irq_armed;
+};
+
+#endif /* __INTEL_BREADCRUMBS_TYPES__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 52db2bde44a3..92a3f25c4006 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -93,57 +93,210 @@ static void intel_context_active_release(struct intel_context *ce)
i915_active_release(&ce->active);
}
-int __intel_context_do_pin(struct intel_context *ce)
+static int __context_pin_state(struct i915_vma *vma, struct i915_gem_ww_ctx *ww)
+{
+ unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
+ int err;
+
+ err = i915_ggtt_pin(vma, ww, 0, bias | PIN_HIGH);
+ if (err)
+ return err;
+
+ err = i915_active_acquire(&vma->active);
+ if (err)
+ goto err_unpin;
+
+ /*
+ * And mark it as a globally pinned object to let the shrinker know
+ * it cannot reclaim the object until we release it.
+ */
+ i915_vma_make_unshrinkable(vma);
+ vma->obj->mm.dirty = true;
+
+ return 0;
+
+err_unpin:
+ i915_vma_unpin(vma);
+ return err;
+}
+
+static void __context_unpin_state(struct i915_vma *vma)
+{
+ i915_vma_make_shrinkable(vma);
+ i915_active_release(&vma->active);
+ __i915_vma_unpin(vma);
+}
+
+static int __ring_active(struct intel_ring *ring,
+ struct i915_gem_ww_ctx *ww)
+{
+ int err;
+
+ err = intel_ring_pin(ring, ww);
+ if (err)
+ return err;
+
+ err = i915_active_acquire(&ring->vma->active);
+ if (err)
+ goto err_pin;
+
+ return 0;
+
+err_pin:
+ intel_ring_unpin(ring);
+ return err;
+}
+
+static void __ring_retire(struct intel_ring *ring)
+{
+ i915_active_release(&ring->vma->active);
+ intel_ring_unpin(ring);
+}
+
+static int intel_context_pre_pin(struct intel_context *ce,
+ struct i915_gem_ww_ctx *ww)
{
int err;
+ CE_TRACE(ce, "active\n");
+
+ err = __ring_active(ce->ring, ww);
+ if (err)
+ return err;
+
+ err = intel_timeline_pin(ce->timeline, ww);
+ if (err)
+ goto err_ring;
+
+ if (!ce->state)
+ return 0;
+
+ err = __context_pin_state(ce->state, ww);
+ if (err)
+ goto err_timeline;
+
+
+ return 0;
+
+err_timeline:
+ intel_timeline_unpin(ce->timeline);
+err_ring:
+ __ring_retire(ce->ring);
+ return err;
+}
+
+static void intel_context_post_unpin(struct intel_context *ce)
+{
+ if (ce->state)
+ __context_unpin_state(ce->state);
+
+ intel_timeline_unpin(ce->timeline);
+ __ring_retire(ce->ring);
+}
+
+int __intel_context_do_pin_ww(struct intel_context *ce,
+ struct i915_gem_ww_ctx *ww)
+{
+ bool handoff = false;
+ void *vaddr;
+ int err = 0;
+
if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
err = intel_context_alloc_state(ce);
if (err)
return err;
}
- err = i915_active_acquire(&ce->active);
+ /*
+ * We always pin the context/ring/timeline here, to ensure a pin
+ * refcount for __intel_context_active(), which prevent a lock
+ * inversion of ce->pin_mutex vs dma_resv_lock().
+ */
+
+ err = i915_gem_object_lock(ce->timeline->hwsp_ggtt->obj, ww);
+ if (!err && ce->ring->vma->obj)
+ err = i915_gem_object_lock(ce->ring->vma->obj, ww);
+ if (!err && ce->state)
+ err = i915_gem_object_lock(ce->state->obj, ww);
+ if (!err)
+ err = intel_context_pre_pin(ce, ww);
if (err)
return err;
- if (mutex_lock_interruptible(&ce->pin_mutex)) {
- err = -EINTR;
- goto out_release;
- }
+ err = i915_active_acquire(&ce->active);
+ if (err)
+ goto err_ctx_unpin;
+
+ err = ce->ops->pre_pin(ce, ww, &vaddr);
+ if (err)
+ goto err_release;
+
+ err = mutex_lock_interruptible(&ce->pin_mutex);
+ if (err)
+ goto err_post_unpin;
if (unlikely(intel_context_is_closed(ce))) {
err = -ENOENT;
- goto out_unlock;
+ goto err_unlock;
}
if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
err = intel_context_active_acquire(ce);
if (unlikely(err))
- goto out_unlock;
+ goto err_unlock;
- err = ce->ops->pin(ce);
- if (unlikely(err))
- goto err_active;
+ err = ce->ops->pin(ce, vaddr);
+ if (err) {
+ intel_context_active_release(ce);
+ goto err_unlock;
+ }
CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n",
i915_ggtt_offset(ce->ring->vma),
ce->ring->head, ce->ring->tail);
+ handoff = true;
smp_mb__before_atomic(); /* flush pin before it is visible */
atomic_inc(&ce->pin_count);
}
GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
- GEM_BUG_ON(i915_active_is_idle(&ce->active));
- goto out_unlock;
-err_active:
- intel_context_active_release(ce);
-out_unlock:
+err_unlock:
mutex_unlock(&ce->pin_mutex);
-out_release:
+err_post_unpin:
+ if (!handoff)
+ ce->ops->post_unpin(ce);
+err_release:
i915_active_release(&ce->active);
+err_ctx_unpin:
+ intel_context_post_unpin(ce);
+
+ /*
+ * Unlock the hwsp_ggtt object since it's shared.
+ * In principle we can unlock all the global state locked above
+ * since it's pinned and doesn't need fencing, and will
+ * thus remain resident until it is explicitly unpinned.
+ */
+ i915_gem_ww_unlock_single(ce->timeline->hwsp_ggtt->obj);
+
+ return err;
+}
+
+int __intel_context_do_pin(struct intel_context *ce)
+{
+ struct i915_gem_ww_ctx ww;
+ int err;
+
+ i915_gem_ww_ctx_init(&ww, true);
+retry:
+ err = __intel_context_do_pin_ww(ce, &ww);
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
return err;
}
@@ -154,6 +307,7 @@ void intel_context_unpin(struct intel_context *ce)
CE_TRACE(ce, "unpin\n");
ce->ops->unpin(ce);
+ ce->ops->post_unpin(ce);
/*
* Once released, we may asynchronously drop the active reference.
@@ -166,65 +320,6 @@ void intel_context_unpin(struct intel_context *ce)
intel_context_put(ce);
}
-static int __context_pin_state(struct i915_vma *vma)
-{
- unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
- int err;
-
- err = i915_ggtt_pin(vma, 0, bias | PIN_HIGH);
- if (err)
- return err;
-
- err = i915_active_acquire(&vma->active);
- if (err)
- goto err_unpin;
-
- /*
- * And mark it as a globally pinned object to let the shrinker know
- * it cannot reclaim the object until we release it.
- */
- i915_vma_make_unshrinkable(vma);
- vma->obj->mm.dirty = true;
-
- return 0;
-
-err_unpin:
- i915_vma_unpin(vma);
- return err;
-}
-
-static void __context_unpin_state(struct i915_vma *vma)
-{
- i915_vma_make_shrinkable(vma);
- i915_active_release(&vma->active);
- __i915_vma_unpin(vma);
-}
-
-static int __ring_active(struct intel_ring *ring)
-{
- int err;
-
- err = intel_ring_pin(ring);
- if (err)
- return err;
-
- err = i915_active_acquire(&ring->vma->active);
- if (err)
- goto err_pin;
-
- return 0;
-
-err_pin:
- intel_ring_unpin(ring);
- return err;
-}
-
-static void __ring_retire(struct intel_ring *ring)
-{
- i915_active_release(&ring->vma->active);
- intel_ring_unpin(ring);
-}
-
__i915_active_call
static void __intel_context_retire(struct i915_active *active)
{
@@ -235,48 +330,29 @@ static void __intel_context_retire(struct i915_active *active)
intel_context_get_avg_runtime_ns(ce));
set_bit(CONTEXT_VALID_BIT, &ce->flags);
- if (ce->state)
- __context_unpin_state(ce->state);
-
- intel_timeline_unpin(ce->timeline);
- __ring_retire(ce->ring);
-
+ intel_context_post_unpin(ce);
intel_context_put(ce);
}
static int __intel_context_active(struct i915_active *active)
{
struct intel_context *ce = container_of(active, typeof(*ce), active);
- int err;
-
- CE_TRACE(ce, "active\n");
intel_context_get(ce);
- err = __ring_active(ce->ring);
- if (err)
- goto err_put;
+ /* everything should already be activated by intel_context_pre_pin() */
+ GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->ring->vma->active));
+ __intel_ring_pin(ce->ring);
- err = intel_timeline_pin(ce->timeline);
- if (err)
- goto err_ring;
+ __intel_timeline_pin(ce->timeline);
- if (!ce->state)
- return 0;
-
- err = __context_pin_state(ce->state);
- if (err)
- goto err_timeline;
+ if (ce->state) {
+ GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->state->active));
+ __i915_vma_pin(ce->state);
+ i915_vma_make_unshrinkable(ce->state);
+ }
return 0;
-
-err_timeline:
- intel_timeline_unpin(ce->timeline);
-err_ring:
- __ring_retire(ce->ring);
-err_put:
- intel_context_put(ce);
- return err;
}
void
@@ -382,15 +458,38 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
struct i915_request *intel_context_create_request(struct intel_context *ce)
{
+ struct i915_gem_ww_ctx ww;
struct i915_request *rq;
int err;
- err = intel_context_pin(ce);
- if (unlikely(err))
- return ERR_PTR(err);
+ i915_gem_ww_ctx_init(&ww, true);
+retry:
+ err = intel_context_pin_ww(ce, &ww);
+ if (!err) {
+ rq = i915_request_create(ce);
+ intel_context_unpin(ce);
+ } else if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err)
+ goto retry;
+ rq = ERR_PTR(err);
+ } else {
+ rq = ERR_PTR(err);
+ }
+
+ i915_gem_ww_ctx_fini(&ww);
- rq = i915_request_create(ce);
- intel_context_unpin(ce);
+ if (IS_ERR(rq))
+ return rq;
+
+ /*
+ * timeline->mutex should be the inner lock, but is used as outer lock.
+ * Hack around this to shut up lockdep in selftests..
+ */
+ lockdep_unpin_lock(&ce->timeline->mutex, rq->cookie);
+ mutex_release(&ce->timeline->mutex.dep_map, _RET_IP_);
+ mutex_acquire(&ce->timeline->mutex.dep_map, SINGLE_DEPTH_NESTING, 0, _RET_IP_);
+ rq->cookie = lockdep_pin_lock(&ce->timeline->mutex);
return rq;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 07be021882cc..fda2eba81e22 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -25,6 +25,8 @@
##__VA_ARGS__); \
} while (0)
+struct i915_gem_ww_ctx;
+
void intel_context_init(struct intel_context *ce,
struct intel_engine_cs *engine);
void intel_context_fini(struct intel_context *ce);
@@ -81,6 +83,8 @@ static inline void intel_context_unlock_pinned(struct intel_context *ce)
}
int __intel_context_do_pin(struct intel_context *ce);
+int __intel_context_do_pin_ww(struct intel_context *ce,
+ struct i915_gem_ww_ctx *ww);
static inline bool intel_context_pin_if_active(struct intel_context *ce)
{
@@ -95,6 +99,15 @@ static inline int intel_context_pin(struct intel_context *ce)
return __intel_context_do_pin(ce);
}
+static inline int intel_context_pin_ww(struct intel_context *ce,
+ struct i915_gem_ww_ctx *ww)
+{
+ if (likely(intel_context_pin_if_active(ce)))
+ return 0;
+
+ return __intel_context_do_pin_ww(ce, ww);
+}
+
static inline void __intel_context_pin(struct intel_context *ce)
{
GEM_BUG_ON(!intel_context_is_pinned(ce));
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 4954b0df4864..552cb57a2e8c 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -23,6 +23,7 @@
DECLARE_EWMA(runtime, 3, 8);
struct i915_gem_context;
+struct i915_gem_ww_ctx;
struct i915_vma;
struct intel_context;
struct intel_ring;
@@ -30,8 +31,10 @@ struct intel_ring;
struct intel_context_ops {
int (*alloc)(struct intel_context *ce);
- int (*pin)(struct intel_context *ce);
+ int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr);
+ int (*pin)(struct intel_context *ce, void *vaddr);
void (*unpin)(struct intel_context *ce);
+ void (*post_unpin)(struct intel_context *ce);
void (*enter)(struct intel_context *ce);
void (*exit)(struct intel_context *ce);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index a9249a23903a..7c3a1012e702 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -223,26 +223,6 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine,
void intel_engine_init_execlists(struct intel_engine_cs *engine);
-void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
-void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
-
-void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
-
-static inline void
-intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
-{
- irq_work_queue(&engine->breadcrumbs.irq_work);
-}
-
-void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
-void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
-
-void intel_engine_transfer_stale_breadcrumbs(struct intel_engine_cs *engine,
- struct intel_context *ce);
-
-void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
- struct drm_printer *p);
-
static inline u32 *__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
{
memset(batch, 0, 6 * sizeof(u32));
@@ -357,4 +337,13 @@ intel_engine_has_preempt_reset(const struct intel_engine_cs *engine)
return intel_engine_has_preemption(engine);
}
+static inline bool
+intel_engine_has_heartbeat(const struct intel_engine_cs *engine)
+{
+ if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL))
+ return false;
+
+ return READ_ONCE(engine->props.heartbeat_interval_ms);
+}
+
#endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 26087dd79782..5bfb5f7ed02c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -28,6 +28,7 @@
#include "i915_drv.h"
+#include "intel_breadcrumbs.h"
#include "intel_context.h"
#include "intel_engine.h"
#include "intel_engine_pm.h"
@@ -634,7 +635,7 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine,
else
flags = PIN_HIGH;
- return i915_ggtt_pin(vma, 0, flags);
+ return i915_ggtt_pin(vma, NULL, 0, flags);
}
static int init_status_page(struct intel_engine_cs *engine)
@@ -700,8 +701,13 @@ static int engine_setup_common(struct intel_engine_cs *engine)
if (err)
return err;
+ engine->breadcrumbs = intel_breadcrumbs_create(engine);
+ if (!engine->breadcrumbs) {
+ err = -ENOMEM;
+ goto err_status;
+ }
+
intel_engine_init_active(engine, ENGINE_PHYSICAL);
- intel_engine_init_breadcrumbs(engine);
intel_engine_init_execlists(engine);
intel_engine_init_cmd_parser(engine);
intel_engine_init__pm(engine);
@@ -716,6 +722,10 @@ static int engine_setup_common(struct intel_engine_cs *engine)
intel_engine_init_ctx_wa(engine);
return 0;
+
+err_status:
+ cleanup_status_page(engine);
+ return err;
}
struct measure_breadcrumb {
@@ -785,9 +795,11 @@ intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
}
static struct intel_context *
-create_kernel_context(struct intel_engine_cs *engine)
+create_pinned_context(struct intel_engine_cs *engine,
+ unsigned int hwsp,
+ struct lock_class_key *key,
+ const char *name)
{
- static struct lock_class_key kernel;
struct intel_context *ce;
int err;
@@ -796,6 +808,7 @@ create_kernel_context(struct intel_engine_cs *engine)
return ce;
__set_bit(CONTEXT_BARRIER_BIT, &ce->flags);
+ ce->timeline = page_pack_bits(NULL, hwsp);
err = intel_context_pin(ce); /* perma-pin so it is always available */
if (err) {
@@ -809,11 +822,20 @@ create_kernel_context(struct intel_engine_cs *engine)
* should we need to inject GPU operations during their request
* construction.
*/
- lockdep_set_class(&ce->timeline->mutex, &kernel);
+ lockdep_set_class_and_name(&ce->timeline->mutex, key, name);
return ce;
}
+static struct intel_context *
+create_kernel_context(struct intel_engine_cs *engine)
+{
+ static struct lock_class_key kernel;
+
+ return create_pinned_context(engine, I915_GEM_HWS_SEQNO_ADDR,
+ &kernel, "kernel_context");
+}
+
/**
* intel_engines_init_common - initialize cengine state which might require hw access
* @engine: Engine to initialize.
@@ -902,9 +924,9 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
tasklet_kill(&engine->execlists.tasklet); /* flush the callback */
cleanup_status_page(engine);
+ intel_breadcrumbs_free(engine->breadcrumbs);
intel_engine_fini_retire(engine);
- intel_engine_fini_breadcrumbs(engine);
intel_engine_cleanup_cmd_parser(engine);
if (engine->default_state)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 8ffdf676c0a0..5067d0524d4b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -177,36 +177,82 @@ void intel_engine_init_heartbeat(struct intel_engine_cs *engine)
INIT_DELAYED_WORK(&engine->heartbeat.work, heartbeat);
}
+static int __intel_engine_pulse(struct intel_engine_cs *engine)
+{
+ struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
+ struct intel_context *ce = engine->kernel_context;
+ struct i915_request *rq;
+
+ lockdep_assert_held(&ce->timeline->mutex);
+ GEM_BUG_ON(!intel_engine_has_preemption(engine));
+ GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
+
+ intel_context_enter(ce);
+ rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
+ intel_context_exit(ce);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ __set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
+ idle_pulse(engine, rq);
+
+ __i915_request_commit(rq);
+ __i915_request_queue(rq, &attr);
+ GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER);
+
+ return 0;
+}
+
+static unsigned long set_heartbeat(struct intel_engine_cs *engine,
+ unsigned long delay)
+{
+ unsigned long old;
+
+ old = xchg(&engine->props.heartbeat_interval_ms, delay);
+ if (delay)
+ intel_engine_unpark_heartbeat(engine);
+ else
+ intel_engine_park_heartbeat(engine);
+
+ return old;
+}
+
int intel_engine_set_heartbeat(struct intel_engine_cs *engine,
unsigned long delay)
{
- int err;
+ struct intel_context *ce = engine->kernel_context;
+ int err = 0;
- /* Send one last pulse before to cleanup persistent hogs */
- if (!delay && IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) {
- err = intel_engine_pulse(engine);
- if (err)
- return err;
- }
+ if (!delay && !intel_engine_has_preempt_reset(engine))
+ return -ENODEV;
+
+ intel_engine_pm_get(engine);
+
+ err = mutex_lock_interruptible(&ce->timeline->mutex);
+ if (err)
+ goto out_rpm;
- WRITE_ONCE(engine->props.heartbeat_interval_ms, delay);
+ if (delay != engine->props.heartbeat_interval_ms) {
+ unsigned long saved = set_heartbeat(engine, delay);
- if (intel_engine_pm_get_if_awake(engine)) {
- if (delay)
- intel_engine_unpark_heartbeat(engine);
- else
- intel_engine_park_heartbeat(engine);
- intel_engine_pm_put(engine);
+ /* recheck current execution */
+ if (intel_engine_has_preemption(engine)) {
+ err = __intel_engine_pulse(engine);
+ if (err)
+ set_heartbeat(engine, saved);
+ }
}
- return 0;
+ mutex_unlock(&ce->timeline->mutex);
+
+out_rpm:
+ intel_engine_pm_put(engine);
+ return err;
}
int intel_engine_pulse(struct intel_engine_cs *engine)
{
- struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
struct intel_context *ce = engine->kernel_context;
- struct i915_request *rq;
int err;
if (!intel_engine_has_preemption(engine))
@@ -215,30 +261,12 @@ int intel_engine_pulse(struct intel_engine_cs *engine)
if (!intel_engine_pm_get_if_awake(engine))
return 0;
- if (mutex_lock_interruptible(&ce->timeline->mutex)) {
- err = -EINTR;
- goto out_rpm;
- }
-
- intel_context_enter(ce);
- rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
- intel_context_exit(ce);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto out_unlock;
+ err = -EINTR;
+ if (!mutex_lock_interruptible(&ce->timeline->mutex)) {
+ err = __intel_engine_pulse(engine);
+ mutex_unlock(&ce->timeline->mutex);
}
- __set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
- idle_pulse(engine, rq);
-
- __i915_request_commit(rq);
- __i915_request_queue(rq, &attr);
- GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER);
- err = 0;
-
-out_unlock:
- mutex_unlock(&ce->timeline->mutex);
-out_rpm:
intel_engine_pm_put(engine);
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 8ec3eecf3e39..f7b2e07e2229 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -6,6 +6,7 @@
#include "i915_drv.h"
+#include "intel_breadcrumbs.h"
#include "intel_context.h"
#include "intel_engine.h"
#include "intel_engine_heartbeat.h"
@@ -247,7 +248,7 @@ static int __engine_park(struct intel_wakeref *wf)
call_idle_barriers(engine); /* cleanup after wedging */
intel_engine_park_heartbeat(engine);
- intel_engine_disarm_breadcrumbs(engine);
+ intel_breadcrumbs_park(engine->breadcrumbs);
/* Must be reset upon idling, or we may miss the busy wakeup. */
GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 8de92fd7d392..c400aaa2287b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -22,6 +22,7 @@
#include "i915_pmu.h"
#include "i915_priolist_types.h"
#include "i915_selftest.h"
+#include "intel_breadcrumbs_types.h"
#include "intel_sseu.h"
#include "intel_timeline_types.h"
#include "intel_uncore.h"
@@ -373,34 +374,8 @@ struct intel_engine_cs {
*/
struct ewma__engine_latency latency;
- /* Rather than have every client wait upon all user interrupts,
- * with the herd waking after every interrupt and each doing the
- * heavyweight seqno dance, we delegate the task (of being the
- * bottom-half of the user interrupt) to the first client. After
- * every interrupt, we wake up one client, who does the heavyweight
- * coherent seqno read and either goes back to sleep (if incomplete),
- * or wakes up all the completed clients in parallel, before then
- * transferring the bottom-half status to the next client in the queue.
- *
- * Compared to walking the entire list of waiters in a single dedicated
- * bottom-half, we reduce the latency of the first waiter by avoiding
- * a context switch, but incur additional coherent seqno reads when
- * following the chain of request breadcrumbs. Since it is most likely
- * that we have a single client waiting on each seqno, then reducing
- * the overhead of waking that client is much preferred.
- */
- struct intel_breadcrumbs {
- spinlock_t irq_lock;
- struct list_head signalers;
-
- struct list_head signaled_requests;
-
- struct irq_work irq_work; /* for use from inside irq_lock */
-
- unsigned int irq_enabled;
-
- bool irq_armed;
- } breadcrumbs;
+ /* Keep track of all the seqno used, a trail of breadcrumbs */
+ struct intel_breadcrumbs *breadcrumbs;
struct intel_engine_pmu {
/**
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 99e28d9021e8..81c05f551b9c 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -78,8 +78,6 @@ int i915_ggtt_init_hw(struct drm_i915_private *i915)
{
int ret;
- stash_init(&i915->mm.wc_stash);
-
/*
* Note that we use page colouring to enforce a guard page at the
* end of the address space. This is required as the CS may prefetch
@@ -232,7 +230,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
/* Fill the allocated but "unused" space beyond the end of the buffer */
while (gte < end)
- gen8_set_pte(gte++, vm->scratch[0].encode);
+ gen8_set_pte(gte++, vm->scratch[0]->encode);
/*
* We want to flush the TLBs only after we're certain all the PTE
@@ -283,7 +281,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
/* Fill the allocated but "unused" space beyond the end of the buffer */
while (gte < end)
- iowrite32(vm->scratch[0].encode, gte++);
+ iowrite32(vm->scratch[0]->encode, gte++);
/*
* We want to flush the TLBs only after we're certain all the PTE
@@ -303,7 +301,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
- const gen8_pte_t scratch_pte = vm->scratch[0].encode;
+ const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
gen8_pte_t __iomem *gtt_base =
(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
const int max_entries = ggtt_total_entries(ggtt) - first_entry;
@@ -401,7 +399,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
first_entry, num_entries, max_entries))
num_entries = max_entries;
- scratch_pte = vm->scratch[0].encode;
+ scratch_pte = vm->scratch[0]->encode;
for (i = 0; i < num_entries; i++)
iowrite32(scratch_pte, &gtt_base[i]);
}
@@ -436,16 +434,17 @@ static void i915_ggtt_clear_range(struct i915_address_space *vm,
intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
}
-static int ggtt_bind_vma(struct i915_address_space *vm,
- struct i915_vma *vma,
- enum i915_cache_level cache_level,
- u32 flags)
+static void ggtt_bind_vma(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags)
{
struct drm_i915_gem_object *obj = vma->obj;
u32 pte_flags;
if (i915_vma_is_bound(vma, ~flags & I915_VMA_BIND_MASK))
- return 0;
+ return;
/* Applicable to VLV (gen8+ do not support RO in the GGTT) */
pte_flags = 0;
@@ -454,8 +453,6 @@ static int ggtt_bind_vma(struct i915_address_space *vm,
vm->insert_entries(vm, vma, cache_level, pte_flags);
vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
-
- return 0;
}
static void ggtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma)
@@ -568,31 +565,25 @@ err:
return ret;
}
-static int aliasing_gtt_bind_vma(struct i915_address_space *vm,
- struct i915_vma *vma,
- enum i915_cache_level cache_level,
- u32 flags)
+static void aliasing_gtt_bind_vma(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags)
{
u32 pte_flags;
- int ret;
/* Currently applicable only to VLV */
pte_flags = 0;
if (i915_gem_object_is_readonly(vma->obj))
pte_flags |= PTE_READ_ONLY;
- if (flags & I915_VMA_LOCAL_BIND) {
- struct i915_ppgtt *alias = i915_vm_to_ggtt(vm)->alias;
-
- ret = ppgtt_bind_vma(&alias->vm, vma, cache_level, flags);
- if (ret)
- return ret;
- }
+ if (flags & I915_VMA_LOCAL_BIND)
+ ppgtt_bind_vma(&i915_vm_to_ggtt(vm)->alias->vm,
+ stash, vma, cache_level, flags);
if (flags & I915_VMA_GLOBAL_BIND)
vm->insert_entries(vm, vma, cache_level, pte_flags);
-
- return 0;
}
static void aliasing_gtt_unbind_vma(struct i915_address_space *vm,
@@ -607,6 +598,7 @@ static void aliasing_gtt_unbind_vma(struct i915_address_space *vm,
static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
{
+ struct i915_vm_pt_stash stash = {};
struct i915_ppgtt *ppgtt;
int err;
@@ -619,15 +611,21 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
goto err_ppgtt;
}
+ err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, ggtt->vm.total);
+ if (err)
+ goto err_ppgtt;
+
+ err = i915_vm_pin_pt_stash(&ppgtt->vm, &stash);
+ if (err)
+ goto err_stash;
+
/*
* Note we only pre-allocate as far as the end of the global
* GTT. On 48b / 4-level page-tables, the difference is very,
* very significant! We have to preallocate as GVT/vgpu does
* not like the page directory disappearing.
*/
- err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total);
- if (err)
- goto err_ppgtt;
+ ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total);
ggtt->alias = ppgtt;
ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
@@ -638,8 +636,11 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma);
ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
+ i915_vm_free_pt_stash(&ppgtt->vm, &stash);
return 0;
+err_stash:
+ i915_vm_free_pt_stash(&ppgtt->vm, &stash);
err_ppgtt:
i915_vm_put(&ppgtt->vm);
return err;
@@ -715,18 +716,11 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
void i915_ggtt_driver_release(struct drm_i915_private *i915)
{
struct i915_ggtt *ggtt = &i915->ggtt;
- struct pagevec *pvec;
fini_aliasing_ppgtt(ggtt);
intel_ggtt_fini_fences(ggtt);
ggtt_cleanup_hw(ggtt);
-
- pvec = &i915->mm.wc_stash.pvec;
- if (pvec->nr) {
- set_pages_array_wb(pvec->pages, pvec->nr);
- __pagevec_release(pvec);
- }
}
static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
@@ -789,7 +783,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
return -ENOMEM;
}
- ret = setup_scratch_page(&ggtt->vm, GFP_DMA32);
+ ret = setup_scratch_page(&ggtt->vm);
if (ret) {
drm_err(&i915->drm, "Scratch setup failed\n");
/* iounmap will also get called at remove, but meh */
@@ -797,8 +791,8 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
return ret;
}
- ggtt->vm.scratch[0].encode =
- ggtt->vm.pte_encode(px_dma(&ggtt->vm.scratch[0]),
+ ggtt->vm.scratch[0]->encode =
+ ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]),
I915_CACHE_NONE, 0);
return 0;
@@ -824,7 +818,7 @@ static void gen6_gmch_remove(struct i915_address_space *vm)
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
iounmap(ggtt->gsm);
- cleanup_scratch_page(vm);
+ free_scratch(vm);
}
static struct resource pci_resource(struct pci_dev *pdev, int bar)
@@ -852,6 +846,8 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
else
size = gen8_get_total_gtt_size(snb_gmch_ctl);
+ ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+
ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
ggtt->vm.cleanup = gen6_gmch_remove;
ggtt->vm.insert_page = gen8_ggtt_insert_page;
@@ -1000,6 +996,8 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
size = gen6_get_total_gtt_size(snb_gmch_ctl);
ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
+ ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+
ggtt->vm.clear_range = nop_clear_range;
if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
ggtt->vm.clear_range = gen6_ggtt_clear_range;
@@ -1050,6 +1048,8 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt)
ggtt->gmadr =
(struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end);
+ ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+
ggtt->do_idle_maps = needs_idle_maps(i915);
ggtt->vm.insert_page = i915_ggtt_insert_page;
ggtt->vm.insert_entries = i915_ggtt_insert_entries;
@@ -1165,11 +1165,6 @@ void i915_ggtt_disable_guc(struct i915_ggtt *ggtt)
ggtt->invalidate(ggtt);
}
-static unsigned int clear_bind(struct i915_vma *vma)
-{
- return atomic_fetch_and(~I915_VMA_BIND_MASK, &vma->flags);
-}
-
void i915_ggtt_resume(struct i915_ggtt *ggtt)
{
struct i915_vma *vma;
@@ -1187,11 +1182,13 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
/* clflush objects bound into the GGTT and rebind them. */
list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) {
struct drm_i915_gem_object *obj = vma->obj;
- unsigned int was_bound = clear_bind(vma);
+ unsigned int was_bound =
+ atomic_read(&vma->flags) & I915_VMA_BIND_MASK;
- WARN_ON(i915_vma_bind(vma,
- obj ? obj->cache_level : 0,
- was_bound, NULL));
+ GEM_BUG_ON(!was_bound);
+ vma->ops->bind_vma(&ggtt->vm, NULL, vma,
+ obj ? obj->cache_level : 0,
+ was_bound);
if (obj) { /* only used during resume => exclusive access */
flush |= fetch_and_zero(&obj->write_domain);
obj->read_domains |= I915_GEM_DOMAIN_GTT;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index e0755f1a904b..39b428c5049c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -356,7 +356,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
goto err_unref;
}
- ret = i915_ggtt_pin(vma, 0, PIN_HIGH);
+ ret = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
if (ret)
goto err_unref;
@@ -406,21 +406,20 @@ static int __engines_record_defaults(struct intel_gt *gt)
/* We must be able to switch to something! */
GEM_BUG_ON(!engine->kernel_context);
- err = intel_renderstate_init(&so, engine);
- if (err)
- goto out;
-
ce = intel_context_create(engine);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
goto out;
}
- rq = intel_context_create_request(ce);
+ err = intel_renderstate_init(&so, ce);
+ if (err)
+ goto err;
+
+ rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
- intel_context_put(ce);
- goto out;
+ goto err_fini;
}
err = intel_engine_emit_ctx_wa(rq);
@@ -434,9 +433,13 @@ static int __engines_record_defaults(struct intel_gt *gt)
err_rq:
requests[id] = i915_request_get(rq);
i915_request_add(rq);
- intel_renderstate_fini(&so);
- if (err)
+err_fini:
+ intel_renderstate_fini(&so, ce);
+err:
+ if (err) {
+ intel_context_put(ce);
goto out;
+ }
}
/* Flush the default context image to memory, and enable powersaving. */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
index 418ae184cecf..104cb30e8c13 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
@@ -35,39 +35,65 @@ static void node_free(struct intel_gt_buffer_pool_node *node)
{
i915_gem_object_put(node->obj);
i915_active_fini(&node->active);
- kfree(node);
+ kfree_rcu(node, rcu);
}
-static void pool_free_work(struct work_struct *wrk)
+static bool pool_free_older_than(struct intel_gt_buffer_pool *pool, long keep)
{
- struct intel_gt_buffer_pool *pool =
- container_of(wrk, typeof(*pool), work.work);
- struct intel_gt_buffer_pool_node *node, *next;
- unsigned long old = jiffies - HZ;
+ struct intel_gt_buffer_pool_node *node, *stale = NULL;
bool active = false;
- LIST_HEAD(stale);
int n;
/* Free buffers that have not been used in the past second */
- spin_lock_irq(&pool->lock);
for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
struct list_head *list = &pool->cache_list[n];
- /* Most recent at head; oldest at tail */
- list_for_each_entry_safe_reverse(node, next, list, link) {
- if (time_before(node->age, old))
- break;
+ if (list_empty(list))
+ continue;
+
+ if (spin_trylock_irq(&pool->lock)) {
+ struct list_head *pos;
+
+ /* Most recent at head; oldest at tail */
+ list_for_each_prev(pos, list) {
+ unsigned long age;
+
+ node = list_entry(pos, typeof(*node), link);
+
+ age = READ_ONCE(node->age);
+ if (!age || jiffies - age < keep)
+ break;
+
+ /* Check we are the first to claim this node */
+ if (!xchg(&node->age, 0))
+ break;
- list_move(&node->link, &stale);
+ node->free = stale;
+ stale = node;
+ }
+ if (!list_is_last(pos, list))
+ __list_del_many(pos, list);
+
+ spin_unlock_irq(&pool->lock);
}
+
active |= !list_empty(list);
}
- spin_unlock_irq(&pool->lock);
- list_for_each_entry_safe(node, next, &stale, link)
+ while ((node = stale)) {
+ stale = stale->free;
node_free(node);
+ }
+
+ return active;
+}
+
+static void pool_free_work(struct work_struct *wrk)
+{
+ struct intel_gt_buffer_pool *pool =
+ container_of(wrk, typeof(*pool), work.work);
- if (active)
+ if (pool_free_older_than(pool, HZ))
schedule_delayed_work(&pool->work,
round_jiffies_up_relative(HZ));
}
@@ -108,9 +134,10 @@ static void pool_retire(struct i915_active *ref)
/* Return this object to the shrinker pool */
i915_gem_object_make_purgeable(node->obj);
+ GEM_BUG_ON(node->age);
spin_lock_irqsave(&pool->lock, flags);
- node->age = jiffies;
- list_add(&node->link, list);
+ list_add_rcu(&node->link, list);
+ WRITE_ONCE(node->age, jiffies ?: 1); /* 0 reserved for active nodes */
spin_unlock_irqrestore(&pool->lock, flags);
schedule_delayed_work(&pool->work,
@@ -129,6 +156,7 @@ node_create(struct intel_gt_buffer_pool *pool, size_t sz)
if (!node)
return ERR_PTR(-ENOMEM);
+ node->age = 0;
node->pool = pool;
i915_active_init(&node->active, pool_active, pool_retire);
@@ -151,20 +179,30 @@ intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size)
struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
struct intel_gt_buffer_pool_node *node;
struct list_head *list;
- unsigned long flags;
int ret;
size = PAGE_ALIGN(size);
list = bucket_for_size(pool, size);
- spin_lock_irqsave(&pool->lock, flags);
- list_for_each_entry(node, list, link) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(node, list, link) {
+ unsigned long age;
+
if (node->obj->base.size < size)
continue;
- list_del(&node->link);
- break;
+
+ age = READ_ONCE(node->age);
+ if (!age)
+ continue;
+
+ if (cmpxchg(&node->age, age, 0) == age) {
+ spin_lock_irq(&pool->lock);
+ list_del_rcu(&node->link);
+ spin_unlock_irq(&pool->lock);
+ break;
+ }
}
- spin_unlock_irqrestore(&pool->lock, flags);
+ rcu_read_unlock();
if (&node->link == list) {
node = node_create(pool, size);
@@ -192,28 +230,13 @@ void intel_gt_init_buffer_pool(struct intel_gt *gt)
INIT_DELAYED_WORK(&pool->work, pool_free_work);
}
-static void pool_free_imm(struct intel_gt_buffer_pool *pool)
-{
- int n;
-
- spin_lock_irq(&pool->lock);
- for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
- struct intel_gt_buffer_pool_node *node, *next;
- struct list_head *list = &pool->cache_list[n];
-
- list_for_each_entry_safe(node, next, list, link)
- node_free(node);
- INIT_LIST_HEAD(list);
- }
- spin_unlock_irq(&pool->lock);
-}
-
void intel_gt_flush_buffer_pool(struct intel_gt *gt)
{
struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
do {
- pool_free_imm(pool);
+ while (pool_free_older_than(pool, 0))
+ ;
} while (cancel_delayed_work_sync(&pool->work));
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
index e28bdda771ed..bcf1658c9633 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
@@ -25,7 +25,11 @@ struct intel_gt_buffer_pool_node {
struct i915_active active;
struct drm_i915_gem_object *obj;
struct list_head link;
- struct intel_gt_buffer_pool *pool;
+ union {
+ struct intel_gt_buffer_pool *pool;
+ struct intel_gt_buffer_pool_node *free;
+ struct rcu_head rcu;
+ };
unsigned long age;
};
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index b05da68e52f4..257063a57101 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -8,6 +8,7 @@
#include "i915_drv.h"
#include "i915_irq.h"
+#include "intel_breadcrumbs.h"
#include "intel_gt.h"
#include "intel_gt_irq.h"
#include "intel_uncore.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 2a72cce63fd9..3f1114b58b01 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -11,160 +11,24 @@
#include "intel_gt.h"
#include "intel_gtt.h"
-void stash_init(struct pagestash *stash)
+struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz)
{
- pagevec_init(&stash->pvec);
- spin_lock_init(&stash->lock);
-}
-
-static struct page *stash_pop_page(struct pagestash *stash)
-{
- struct page *page = NULL;
-
- spin_lock(&stash->lock);
- if (likely(stash->pvec.nr))
- page = stash->pvec.pages[--stash->pvec.nr];
- spin_unlock(&stash->lock);
-
- return page;
-}
-
-static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec)
-{
- unsigned int nr;
-
- spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING);
-
- nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec));
- memcpy(stash->pvec.pages + stash->pvec.nr,
- pvec->pages + pvec->nr - nr,
- sizeof(pvec->pages[0]) * nr);
- stash->pvec.nr += nr;
-
- spin_unlock(&stash->lock);
-
- pvec->nr -= nr;
-}
-
-static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
-{
- struct pagevec stack;
- struct page *page;
-
if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
i915_gem_shrink_all(vm->i915);
- page = stash_pop_page(&vm->free_pages);
- if (page)
- return page;
-
- if (!vm->pt_kmap_wc)
- return alloc_page(gfp);
-
- /* Look in our global stash of WC pages... */
- page = stash_pop_page(&vm->i915->mm.wc_stash);
- if (page)
- return page;
-
- /*
- * Otherwise batch allocate pages to amortize cost of set_pages_wc.
- *
- * We have to be careful as page allocation may trigger the shrinker
- * (via direct reclaim) which will fill up the WC stash underneath us.
- * So we add our WB pages into a temporary pvec on the stack and merge
- * them into the WC stash after all the allocations are complete.
- */
- pagevec_init(&stack);
- do {
- struct page *page;
-
- page = alloc_page(gfp);
- if (unlikely(!page))
- break;
-
- stack.pages[stack.nr++] = page;
- } while (pagevec_space(&stack));
-
- if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) {
- page = stack.pages[--stack.nr];
-
- /* Merge spare WC pages to the global stash */
- if (stack.nr)
- stash_push_pagevec(&vm->i915->mm.wc_stash, &stack);
-
- /* Push any surplus WC pages onto the local VM stash */
- if (stack.nr)
- stash_push_pagevec(&vm->free_pages, &stack);
- }
-
- /* Return unwanted leftovers */
- if (unlikely(stack.nr)) {
- WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr));
- __pagevec_release(&stack);
- }
-
- return page;
+ return i915_gem_object_create_internal(vm->i915, sz);
}
-static void vm_free_pages_release(struct i915_address_space *vm,
- bool immediate)
+int pin_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj)
{
- struct pagevec *pvec = &vm->free_pages.pvec;
- struct pagevec stack;
-
- lockdep_assert_held(&vm->free_pages.lock);
- GEM_BUG_ON(!pagevec_count(pvec));
-
- if (vm->pt_kmap_wc) {
- /*
- * When we use WC, first fill up the global stash and then
- * only if full immediately free the overflow.
- */
- stash_push_pagevec(&vm->i915->mm.wc_stash, pvec);
-
- /*
- * As we have made some room in the VM's free_pages,
- * we can wait for it to fill again. Unless we are
- * inside i915_address_space_fini() and must
- * immediately release the pages!
- */
- if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1))
- return;
+ int err;
- /*
- * We have to drop the lock to allow ourselves to sleep,
- * so take a copy of the pvec and clear the stash for
- * others to use it as we sleep.
- */
- stack = *pvec;
- pagevec_reinit(pvec);
- spin_unlock(&vm->free_pages.lock);
-
- pvec = &stack;
- set_pages_array_wb(pvec->pages, pvec->nr);
-
- spin_lock(&vm->free_pages.lock);
- }
+ err = i915_gem_object_pin_pages(obj);
+ if (err)
+ return err;
- __pagevec_release(pvec);
-}
-
-static void vm_free_page(struct i915_address_space *vm, struct page *page)
-{
- /*
- * On !llc, we need to change the pages back to WB. We only do so
- * in bulk, so we rarely need to change the page attributes here,
- * but doing so requires a stop_machine() from deep inside arch/x86/mm.
- * To make detection of the possible sleep more likely, use an
- * unconditional might_sleep() for everybody.
- */
- might_sleep();
- spin_lock(&vm->free_pages.lock);
- while (!pagevec_space(&vm->free_pages.pvec))
- vm_free_pages_release(vm, false);
- GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE);
- pagevec_add(&vm->free_pages.pvec, page);
- spin_unlock(&vm->free_pages.lock);
+ i915_gem_object_make_unshrinkable(obj);
+ return 0;
}
void __i915_vm_close(struct i915_address_space *vm)
@@ -194,14 +58,7 @@ void __i915_vm_close(struct i915_address_space *vm)
void i915_address_space_fini(struct i915_address_space *vm)
{
- spin_lock(&vm->free_pages.lock);
- if (pagevec_count(&vm->free_pages.pvec))
- vm_free_pages_release(vm, true);
- GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec));
- spin_unlock(&vm->free_pages.lock);
-
drm_mm_takedown(&vm->mm);
-
mutex_destroy(&vm->mutex);
}
@@ -246,8 +103,6 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
drm_mm_init(&vm->mm, 0, vm->total);
vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
- stash_init(&vm->free_pages);
-
INIT_LIST_HEAD(&vm->bound_list);
}
@@ -264,64 +119,50 @@ void clear_pages(struct i915_vma *vma)
memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
}
-static int __setup_page_dma(struct i915_address_space *vm,
- struct i915_page_dma *p,
- gfp_t gfp)
-{
- p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL);
- if (unlikely(!p->page))
- return -ENOMEM;
-
- p->daddr = dma_map_page_attrs(vm->dma,
- p->page, 0, PAGE_SIZE,
- PCI_DMA_BIDIRECTIONAL,
- DMA_ATTR_SKIP_CPU_SYNC |
- DMA_ATTR_NO_WARN);
- if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
- vm_free_page(vm, p->page);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p)
+dma_addr_t __px_dma(struct drm_i915_gem_object *p)
{
- return __setup_page_dma(vm, p, __GFP_HIGHMEM);
+ GEM_BUG_ON(!i915_gem_object_has_pages(p));
+ return sg_dma_address(p->mm.pages->sgl);
}
-void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p)
+struct page *__px_page(struct drm_i915_gem_object *p)
{
- dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
- vm_free_page(vm, p->page);
+ GEM_BUG_ON(!i915_gem_object_has_pages(p));
+ return sg_page(p->mm.pages->sgl);
}
void
-fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count)
+fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count)
{
- kunmap_atomic(memset64(kmap_atomic(p->page), val, count));
+ struct page *page = __px_page(p);
+ void *vaddr;
+
+ vaddr = kmap(page);
+ memset64(vaddr, val, count);
+ clflush_cache_range(vaddr, PAGE_SIZE);
+ kunmap(page);
}
-static void poison_scratch_page(struct page *page, unsigned long size)
+static void poison_scratch_page(struct drm_i915_gem_object *scratch)
{
- if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
- return;
+ struct sgt_iter sgt;
+ struct page *page;
+ u8 val;
- GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
+ val = 0;
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ val = POISON_FREE;
- do {
+ for_each_sgt_page(page, sgt, scratch->mm.pages) {
void *vaddr;
vaddr = kmap(page);
- memset(vaddr, POISON_FREE, PAGE_SIZE);
+ memset(vaddr, val, PAGE_SIZE);
kunmap(page);
-
- page = pfn_to_page(page_to_pfn(page) + 1);
- size -= PAGE_SIZE;
- } while (size);
+ }
}
-int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
+int setup_scratch_page(struct i915_address_space *vm)
{
unsigned long size;
@@ -338,21 +179,27 @@ int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
*/
size = I915_GTT_PAGE_SIZE_4K;
if (i915_vm_is_4lvl(vm) &&
- HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
+ HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K))
size = I915_GTT_PAGE_SIZE_64K;
- gfp |= __GFP_NOWARN;
- }
- gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL;
do {
- unsigned int order = get_order(size);
- struct page *page;
- dma_addr_t addr;
+ struct drm_i915_gem_object *obj;
- page = alloc_pages(gfp, order);
- if (unlikely(!page))
+ obj = vm->alloc_pt_dma(vm, size);
+ if (IS_ERR(obj))
goto skip;
+ if (pin_pt_dma(vm, obj))
+ goto skip_obj;
+
+ /* We need a single contiguous page for our scratch */
+ if (obj->mm.page_sizes.sg < size)
+ goto skip_obj;
+
+ /* And it needs to be correspondingly aligned */
+ if (__px_dma(obj) & (size - 1))
+ goto skip_obj;
+
/*
* Use a non-zero scratch page for debugging.
*
@@ -362,61 +209,28 @@ int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
* should it ever be accidentally used, the effect should be
* fairly benign.
*/
- poison_scratch_page(page, size);
-
- addr = dma_map_page_attrs(vm->dma,
- page, 0, size,
- PCI_DMA_BIDIRECTIONAL,
- DMA_ATTR_SKIP_CPU_SYNC |
- DMA_ATTR_NO_WARN);
- if (unlikely(dma_mapping_error(vm->dma, addr)))
- goto free_page;
-
- if (unlikely(!IS_ALIGNED(addr, size)))
- goto unmap_page;
-
- vm->scratch[0].base.page = page;
- vm->scratch[0].base.daddr = addr;
- vm->scratch_order = order;
+ poison_scratch_page(obj);
+
+ vm->scratch[0] = obj;
+ vm->scratch_order = get_order(size);
return 0;
-unmap_page:
- dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL);
-free_page:
- __free_pages(page, order);
+skip_obj:
+ i915_gem_object_put(obj);
skip:
if (size == I915_GTT_PAGE_SIZE_4K)
return -ENOMEM;
size = I915_GTT_PAGE_SIZE_4K;
- gfp &= ~__GFP_NOWARN;
} while (1);
}
-void cleanup_scratch_page(struct i915_address_space *vm)
-{
- struct i915_page_dma *p = px_base(&vm->scratch[0]);
- unsigned int order = vm->scratch_order;
-
- dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT,
- PCI_DMA_BIDIRECTIONAL);
- __free_pages(p->page, order);
-}
-
void free_scratch(struct i915_address_space *vm)
{
int i;
- if (!px_dma(&vm->scratch[0])) /* set to 0 on clones */
- return;
-
- for (i = 1; i <= vm->top; i++) {
- if (!px_dma(&vm->scratch[i]))
- break;
- cleanup_page_dma(vm, px_base(&vm->scratch[i]));
- }
-
- cleanup_scratch_page(vm);
+ for (i = 0; i <= vm->top; i++)
+ i915_gem_object_put(vm->scratch[i]);
}
void gtt_write_workarounds(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index f2b75078e05f..c13c650ced22 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -134,38 +134,29 @@ typedef u64 gen8_pte_t;
#define GEN8_PDE_IPS_64K BIT(11)
#define GEN8_PDE_PS_2M BIT(7)
+enum i915_cache_level;
+
+struct drm_i915_file_private;
+struct drm_i915_gem_object;
struct i915_fence_reg;
+struct i915_vma;
+struct intel_gt;
#define for_each_sgt_daddr(__dp, __iter, __sgt) \
__for_each_sgt_daddr(__dp, __iter, __sgt, I915_GTT_PAGE_SIZE)
-struct i915_page_dma {
- struct page *page;
+struct i915_page_table {
+ struct drm_i915_gem_object *base;
union {
- dma_addr_t daddr;
-
- /*
- * For gen6/gen7 only. This is the offset in the GGTT
- * where the page directory entries for PPGTT begin
- */
- u32 ggtt_offset;
+ atomic_t used;
+ struct i915_page_table *stash;
};
};
-struct i915_page_scratch {
- struct i915_page_dma base;
- u64 encode;
-};
-
-struct i915_page_table {
- struct i915_page_dma base;
- atomic_t used;
-};
-
struct i915_page_directory {
struct i915_page_table pt;
spinlock_t lock;
- void *entry[512];
+ void **entry;
};
#define __px_choose_expr(x, type, expr, other) \
@@ -176,12 +167,14 @@ struct i915_page_directory {
other)
#define px_base(px) \
- __px_choose_expr(px, struct i915_page_dma *, __x, \
- __px_choose_expr(px, struct i915_page_scratch *, &__x->base, \
- __px_choose_expr(px, struct i915_page_table *, &__x->base, \
- __px_choose_expr(px, struct i915_page_directory *, &__x->pt.base, \
- (void)0))))
-#define px_dma(px) (px_base(px)->daddr)
+ __px_choose_expr(px, struct drm_i915_gem_object *, __x, \
+ __px_choose_expr(px, struct i915_page_table *, __x->base, \
+ __px_choose_expr(px, struct i915_page_directory *, __x->pt.base, \
+ (void)0)))
+
+struct page *__px_page(struct drm_i915_gem_object *p);
+dma_addr_t __px_dma(struct drm_i915_gem_object *p);
+#define px_dma(px) (__px_dma(px_base(px)))
#define px_pt(px) \
__px_choose_expr(px, struct i915_page_table *, __x, \
@@ -189,19 +182,18 @@ struct i915_page_directory {
(void)0))
#define px_used(px) (&px_pt(px)->used)
-enum i915_cache_level;
-
-struct drm_i915_file_private;
-struct drm_i915_gem_object;
-struct i915_vma;
-struct intel_gt;
+struct i915_vm_pt_stash {
+ /* preallocated chains of page tables/directories */
+ struct i915_page_table *pt[2];
+};
struct i915_vma_ops {
/* Map an object into an address space with the given cache flags. */
- int (*bind_vma)(struct i915_address_space *vm,
- struct i915_vma *vma,
- enum i915_cache_level cache_level,
- u32 flags);
+ void (*bind_vma)(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags);
/*
* Unmap an object from an address space. This usually consists of
* setting the valid PTE entries to a reserved scratch page.
@@ -213,13 +205,6 @@ struct i915_vma_ops {
void (*clear_pages)(struct i915_vma *vma);
};
-struct pagestash {
- spinlock_t lock;
- struct pagevec pvec;
-};
-
-void stash_init(struct pagestash *stash);
-
struct i915_address_space {
struct kref ref;
struct rcu_work rcu;
@@ -256,33 +241,33 @@ struct i915_address_space {
#define VM_CLASS_GGTT 0
#define VM_CLASS_PPGTT 1
- struct i915_page_scratch scratch[4];
- unsigned int scratch_order;
- unsigned int top;
-
+ struct drm_i915_gem_object *scratch[4];
/**
* List of vma currently bound.
*/
struct list_head bound_list;
- struct pagestash free_pages;
-
/* Global GTT */
bool is_ggtt:1;
- /* Some systems require uncached updates of the page directories */
- bool pt_kmap_wc:1;
-
/* Some systems support read-only mappings for GGTT and/or PPGTT */
bool has_read_only:1;
+ u8 top;
+ u8 pd_shift;
+ u8 scratch_order;
+
+ struct drm_i915_gem_object *
+ (*alloc_pt_dma)(struct i915_address_space *vm, int sz);
+
u64 (*pte_encode)(dma_addr_t addr,
enum i915_cache_level level,
u32 flags); /* Create a valid PTE */
#define PTE_READ_ONLY BIT(0)
- int (*allocate_va_range)(struct i915_address_space *vm,
- u64 start, u64 length);
+ void (*allocate_va_range)(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ u64 start, u64 length);
void (*clear_range)(struct i915_address_space *vm,
u64 start, u64 length);
void (*insert_page)(struct i915_address_space *vm,
@@ -490,9 +475,9 @@ i915_pd_entry(const struct i915_page_directory * const pdp,
static inline dma_addr_t
i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
{
- struct i915_page_dma *pt = ppgtt->pd->entry[n];
+ struct i915_page_table *pt = ppgtt->pd->entry[n];
- return px_dma(pt ?: px_base(&ppgtt->vm.scratch[ppgtt->vm.top]));
+ return __px_dma(pt ? px_base(pt) : ppgtt->vm.scratch[ppgtt->vm.top]);
}
void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt);
@@ -517,13 +502,10 @@ struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt);
void i915_ggtt_suspend(struct i915_ggtt *gtt);
void i915_ggtt_resume(struct i915_ggtt *ggtt);
-int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p);
-void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p);
-
-#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
+#define kmap_atomic_px(px) kmap_atomic(__px_page(px_base(px)))
void
-fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count);
+fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count);
#define fill_px(px, v) fill_page_dma(px_base(px), (v), PAGE_SIZE / sizeof(u64))
#define fill32_px(px, v) do { \
@@ -531,47 +513,51 @@ fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count);
fill_px((px), v__ << 32 | v__); \
} while (0)
-int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp);
-void cleanup_scratch_page(struct i915_address_space *vm);
+int setup_scratch_page(struct i915_address_space *vm);
void free_scratch(struct i915_address_space *vm);
+struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz);
struct i915_page_table *alloc_pt(struct i915_address_space *vm);
struct i915_page_directory *alloc_pd(struct i915_address_space *vm);
-struct i915_page_directory *__alloc_pd(size_t sz);
+struct i915_page_directory *__alloc_pd(int npde);
-void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd);
+int pin_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj);
-#define free_px(vm, px) free_pd(vm, px_base(px))
+void free_px(struct i915_address_space *vm,
+ struct i915_page_table *pt, int lvl);
+#define free_pt(vm, px) free_px(vm, px, 0)
+#define free_pd(vm, px) free_px(vm, px_pt(px), 1)
void
__set_pd_entry(struct i915_page_directory * const pd,
const unsigned short idx,
- struct i915_page_dma * const to,
+ struct i915_page_table *pt,
u64 (*encode)(const dma_addr_t, const enum i915_cache_level));
#define set_pd_entry(pd, idx, to) \
- __set_pd_entry((pd), (idx), px_base(to), gen8_pde_encode)
+ __set_pd_entry((pd), (idx), px_pt(to), gen8_pde_encode)
void
clear_pd_entry(struct i915_page_directory * const pd,
const unsigned short idx,
- const struct i915_page_scratch * const scratch);
+ const struct drm_i915_gem_object * const scratch);
bool
release_pd_entry(struct i915_page_directory * const pd,
const unsigned short idx,
struct i915_page_table * const pt,
- const struct i915_page_scratch * const scratch);
+ const struct drm_i915_gem_object * const scratch);
void gen6_ggtt_invalidate(struct i915_ggtt *ggtt);
int ggtt_set_pages(struct i915_vma *vma);
int ppgtt_set_pages(struct i915_vma *vma);
void clear_pages(struct i915_vma *vma);
-int ppgtt_bind_vma(struct i915_address_space *vm,
- struct i915_vma *vma,
- enum i915_cache_level cache_level,
- u32 flags);
+void ppgtt_bind_vma(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags);
void ppgtt_unbind_vma(struct i915_address_space *vm,
struct i915_vma *vma);
@@ -579,6 +565,14 @@ void gtt_write_workarounds(struct intel_gt *gt);
void setup_private_pat(struct intel_uncore *uncore);
+int i915_vm_alloc_pt_stash(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ u64 size);
+int i915_vm_pin_pt_stash(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash);
+void i915_vm_free_pt_stash(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash);
+
static inline struct sgt_dma {
struct scatterlist *sg;
dma_addr_t dma, max;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 9eeaca957a7e..0412a44f25f2 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -137,6 +137,7 @@
#include "i915_perf.h"
#include "i915_trace.h"
#include "i915_vgpu.h"
+#include "intel_breadcrumbs.h"
#include "intel_context.h"
#include "intel_engine_pm.h"
#include "intel_gt.h"
@@ -1148,20 +1149,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
} else {
struct intel_engine_cs *owner = rq->context->engine;
- /*
- * Decouple the virtual breadcrumb before moving it
- * back to the virtual engine -- we don't want the
- * request to complete in the background and try
- * and cancel the breadcrumb on the virtual engine
- * (instead of the old engine where it is linked)!
- */
- if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
- &rq->fence.flags)) {
- spin_lock_nested(&rq->lock,
- SINGLE_DEPTH_NESTING);
- i915_request_cancel_breadcrumb(rq);
- spin_unlock(&rq->lock);
- }
WRITE_ONCE(rq->engine, owner);
owner->submit_request(rq);
active = NULL;
@@ -1819,16 +1806,31 @@ static bool virtual_matches(const struct virtual_engine *ve,
return true;
}
-static void virtual_xfer_breadcrumbs(struct virtual_engine *ve)
+static void virtual_xfer_context(struct virtual_engine *ve,
+ struct intel_engine_cs *engine)
{
+ unsigned int n;
+
+ if (likely(engine == ve->siblings[0]))
+ return;
+
+ GEM_BUG_ON(READ_ONCE(ve->context.inflight));
+ if (!intel_engine_has_relative_mmio(engine))
+ virtual_update_register_offsets(ve->context.lrc_reg_state,
+ engine);
+
/*
- * All the outstanding signals on ve->siblings[0] must have
- * been completed, just pending the interrupt handler. As those
- * signals still refer to the old sibling (via rq->engine), we must
- * transfer those to the old irq_worker to keep our locking
- * consistent.
+ * Move the bound engine to the top of the list for
+ * future execution. We then kick this tasklet first
+ * before checking others, so that we preferentially
+ * reuse this set of bound registers.
*/
- intel_engine_transfer_stale_breadcrumbs(ve->siblings[0], &ve->context);
+ for (n = 1; n < ve->num_siblings; n++) {
+ if (ve->siblings[n] == engine) {
+ swap(ve->siblings[n], ve->siblings[0]);
+ break;
+ }
+ }
}
#define for_each_waiter(p__, rq__) \
@@ -2279,38 +2281,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
GEM_BUG_ON(!(rq->execution_mask & engine->mask));
WRITE_ONCE(rq->engine, engine);
- if (engine != ve->siblings[0]) {
- u32 *regs = ve->context.lrc_reg_state;
- unsigned int n;
-
- GEM_BUG_ON(READ_ONCE(ve->context.inflight));
-
- if (!intel_engine_has_relative_mmio(engine))
- virtual_update_register_offsets(regs,
- engine);
-
- if (!list_empty(&ve->context.signals))
- virtual_xfer_breadcrumbs(ve);
-
+ if (__i915_request_submit(rq)) {
/*
- * Move the bound engine to the top of the list
- * for future execution. We then kick this
- * tasklet first before checking others, so that
- * we preferentially reuse this set of bound
- * registers.
+ * Only after we confirm that we will submit
+ * this request (i.e. it has not already
+ * completed), do we want to update the context.
+ *
+ * This serves two purposes. It avoids
+ * unnecessary work if we are resubmitting an
+ * already completed request after timeslicing.
+ * But more importantly, it prevents us altering
+ * ve->siblings[] on an idle context, where
+ * we may be using ve->siblings[] in
+ * virtual_context_enter / virtual_context_exit.
*/
- for (n = 1; n < ve->num_siblings; n++) {
- if (ve->siblings[n] == engine) {
- swap(ve->siblings[n],
- ve->siblings[0]);
- break;
- }
- }
-
+ virtual_xfer_context(ve, engine);
GEM_BUG_ON(ve->siblings[0] != engine);
- }
- if (__i915_request_submit(rq)) {
submit = true;
last = rq;
}
@@ -3316,7 +3303,10 @@ static void execlists_context_unpin(struct intel_context *ce)
{
check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
ce->engine);
+}
+static void execlists_context_post_unpin(struct intel_context *ce)
+{
i915_gem_object_unpin_map(ce->state->obj);
}
@@ -3478,20 +3468,24 @@ __execlists_update_reg_state(const struct intel_context *ce,
}
static int
-__execlists_context_pin(struct intel_context *ce,
- struct intel_engine_cs *engine)
+execlists_context_pre_pin(struct intel_context *ce,
+ struct i915_gem_ww_ctx *ww, void **vaddr)
{
- void *vaddr;
-
GEM_BUG_ON(!ce->state);
GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
- vaddr = i915_gem_object_pin_map(ce->state->obj,
- i915_coherent_map_type(engine->i915) |
+ *vaddr = i915_gem_object_pin_map(ce->state->obj,
+ i915_coherent_map_type(ce->engine->i915) |
I915_MAP_OVERRIDE);
- if (IS_ERR(vaddr))
- return PTR_ERR(vaddr);
+ return PTR_ERR_OR_ZERO(*vaddr);
+}
+
+static int
+__execlists_context_pin(struct intel_context *ce,
+ struct intel_engine_cs *engine,
+ void *vaddr)
+{
ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
__execlists_update_reg_state(ce, engine, ce->ring->tail);
@@ -3499,9 +3493,9 @@ __execlists_context_pin(struct intel_context *ce,
return 0;
}
-static int execlists_context_pin(struct intel_context *ce)
+static int execlists_context_pin(struct intel_context *ce, void *vaddr)
{
- return __execlists_context_pin(ce, ce->engine);
+ return __execlists_context_pin(ce, ce->engine, vaddr);
}
static int execlists_context_alloc(struct intel_context *ce)
@@ -3527,8 +3521,10 @@ static void execlists_context_reset(struct intel_context *ce)
static const struct intel_context_ops execlists_context_ops = {
.alloc = execlists_context_alloc,
+ .pre_pin = execlists_context_pre_pin,
.pin = execlists_context_pin,
.unpin = execlists_context_unpin,
+ .post_unpin = execlists_context_post_unpin,
.enter = intel_context_enter_engine,
.exit = intel_context_exit_engine,
@@ -3892,7 +3888,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
goto err;
}
- err = i915_ggtt_pin(vma, 0, PIN_HIGH);
+ err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
if (err)
goto err;
@@ -4133,7 +4129,7 @@ static int execlists_resume(struct intel_engine_cs *engine)
{
intel_mocs_init_engine(engine);
- intel_engine_reset_breadcrumbs(engine);
+ intel_breadcrumbs_reset(engine->breadcrumbs);
if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
struct drm_printer p = drm_debug_printer(__func__);
@@ -4562,7 +4558,7 @@ static int gen8_emit_flush_render(struct i915_request *request,
vf_flush_wa = true;
/* WaForGAMHang:kbl */
- if (IS_KBL_REVID(request->engine->i915, 0, KBL_REVID_B0))
+ if (IS_KBL_GT_REVID(request->engine->i915, 0, KBL_REVID_B0))
dc_flush_wa = true;
}
@@ -4764,14 +4760,21 @@ static int gen12_emit_flush(struct i915_request *request, u32 mode)
intel_engine_mask_t aux_inv = 0;
u32 cmd, *cs;
+ cmd = 4;
+ if (mode & EMIT_INVALIDATE)
+ cmd += 2;
if (mode & EMIT_INVALIDATE)
aux_inv = request->engine->mask & ~BIT(BCS0);
+ if (aux_inv)
+ cmd += 2 * hweight8(aux_inv) + 2;
- cs = intel_ring_begin(request,
- 4 + (aux_inv ? 2 * hweight8(aux_inv) + 2 : 0));
+ cs = intel_ring_begin(request, cmd);
if (IS_ERR(cs))
return PTR_ERR(cs);
+ if (mode & EMIT_INVALIDATE)
+ *cs++ = preparser_disable(true);
+
cmd = MI_FLUSH_DW + 1;
/* We always require a command barrier so that subsequent
@@ -4804,6 +4807,10 @@ static int gen12_emit_flush(struct i915_request *request, u32 mode)
}
*cs++ = MI_NOOP;
}
+
+ if (mode & EMIT_INVALIDATE)
+ *cs++ = preparser_disable(false);
+
intel_ring_advance(request, cs);
return 0;
@@ -5302,6 +5309,14 @@ populate_lr_context(struct intel_context *ce,
return 0;
}
+static struct intel_timeline *pinned_timeline(struct intel_context *ce)
+{
+ struct intel_timeline *tl = fetch_and_zero(&ce->timeline);
+
+ return intel_timeline_create_from_engine(ce->engine,
+ page_unmask_bits(tl));
+}
+
static int __execlists_context_alloc(struct intel_context *ce,
struct intel_engine_cs *engine)
{
@@ -5332,19 +5347,17 @@ static int __execlists_context_alloc(struct intel_context *ce,
goto error_deref_obj;
}
- if (!ce->timeline) {
+ if (!page_mask_bits(ce->timeline)) {
struct intel_timeline *tl;
- struct i915_vma *hwsp;
/*
* Use the static global HWSP for the kernel context, and
* a dynamically allocated cacheline for everyone else.
*/
- hwsp = NULL;
- if (unlikely(intel_context_is_barrier(ce)))
- hwsp = engine->status_page.vma;
-
- tl = intel_timeline_create(engine->gt, hwsp);
+ if (unlikely(ce->timeline))
+ tl = pinned_timeline(ce);
+ else
+ tl = intel_timeline_create(engine->gt);
if (IS_ERR(tl)) {
ret = PTR_ERR(tl);
goto error_deref_obj;
@@ -5450,12 +5463,12 @@ static int virtual_context_alloc(struct intel_context *ce)
return __execlists_context_alloc(ce, ve->siblings[0]);
}
-static int virtual_context_pin(struct intel_context *ce)
+static int virtual_context_pin(struct intel_context *ce, void *vaddr)
{
struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
/* Note: we must use a real engine class for setting up reg state */
- return __execlists_context_pin(ce, ve->siblings[0]);
+ return __execlists_context_pin(ce, ve->siblings[0], vaddr);
}
static void virtual_context_enter(struct intel_context *ce)
@@ -5483,8 +5496,10 @@ static void virtual_context_exit(struct intel_context *ce)
static const struct intel_context_ops virtual_context_ops = {
.alloc = virtual_context_alloc,
+ .pre_pin = execlists_context_pre_pin,
.pin = virtual_context_pin,
.unpin = execlists_context_unpin,
+ .post_unpin = execlists_context_post_unpin,
.enter = virtual_context_enter,
.exit = virtual_context_exit,
@@ -5718,9 +5733,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
- intel_engine_init_breadcrumbs(&ve->base);
intel_engine_init_execlists(&ve->base);
- ve->base.breadcrumbs.irq_armed = true; /* fake HW, used for irq_work */
ve->base.cops = &virtual_context_ops;
ve->base.request_alloc = execlists_request_alloc;
@@ -5737,6 +5750,12 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
intel_context_init(&ve->context, &ve->base);
+ ve->base.breadcrumbs = intel_breadcrumbs_create(NULL);
+ if (!ve->base.breadcrumbs) {
+ err = -ENOMEM;
+ goto err_put;
+ }
+
for (n = 0; n < count; n++) {
struct intel_engine_cs *sibling = siblings[n];
diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
index f0862e924d11..46d9aceda64c 100644
--- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
@@ -18,7 +18,8 @@ struct i915_page_table *alloc_pt(struct i915_address_space *vm)
if (unlikely(!pt))
return ERR_PTR(-ENOMEM);
- if (unlikely(setup_page_dma(vm, &pt->base))) {
+ pt->base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
+ if (IS_ERR(pt->base)) {
kfree(pt);
return ERR_PTR(-ENOMEM);
}
@@ -27,14 +28,20 @@ struct i915_page_table *alloc_pt(struct i915_address_space *vm)
return pt;
}
-struct i915_page_directory *__alloc_pd(size_t sz)
+struct i915_page_directory *__alloc_pd(int count)
{
struct i915_page_directory *pd;
- pd = kzalloc(sz, I915_GFP_ALLOW_FAIL);
+ pd = kzalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL);
if (unlikely(!pd))
return NULL;
+ pd->entry = kcalloc(count, sizeof(*pd->entry), I915_GFP_ALLOW_FAIL);
+ if (unlikely(!pd->entry)) {
+ kfree(pd);
+ return NULL;
+ }
+
spin_lock_init(&pd->lock);
return pd;
}
@@ -43,11 +50,13 @@ struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
{
struct i915_page_directory *pd;
- pd = __alloc_pd(sizeof(*pd));
+ pd = __alloc_pd(I915_PDES);
if (unlikely(!pd))
return ERR_PTR(-ENOMEM);
- if (unlikely(setup_page_dma(vm, px_base(pd)))) {
+ pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
+ if (IS_ERR(pd->pt.base)) {
+ kfree(pd->entry);
kfree(pd);
return ERR_PTR(-ENOMEM);
}
@@ -55,41 +64,52 @@ struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
return pd;
}
-void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd)
+void free_px(struct i915_address_space *vm, struct i915_page_table *pt, int lvl)
{
- cleanup_page_dma(vm, pd);
- kfree(pd);
+ BUILD_BUG_ON(offsetof(struct i915_page_directory, pt));
+
+ if (lvl) {
+ struct i915_page_directory *pd =
+ container_of(pt, typeof(*pd), pt);
+ kfree(pd->entry);
+ }
+
+ if (pt->base)
+ i915_gem_object_put(pt->base);
+
+ kfree(pt);
}
static inline void
-write_dma_entry(struct i915_page_dma * const pdma,
+write_dma_entry(struct drm_i915_gem_object * const pdma,
const unsigned short idx,
const u64 encoded_entry)
{
- u64 * const vaddr = kmap_atomic(pdma->page);
+ u64 * const vaddr = kmap_atomic(__px_page(pdma));
vaddr[idx] = encoded_entry;
+ clflush_cache_range(&vaddr[idx], sizeof(u64));
kunmap_atomic(vaddr);
}
void
__set_pd_entry(struct i915_page_directory * const pd,
const unsigned short idx,
- struct i915_page_dma * const to,
+ struct i915_page_table * const to,
u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
{
/* Each thread pre-pins the pd, and we may have a thread per pde. */
- GEM_BUG_ON(atomic_read(px_used(pd)) > NALLOC * ARRAY_SIZE(pd->entry));
+ GEM_BUG_ON(atomic_read(px_used(pd)) > NALLOC * I915_PDES);
atomic_inc(px_used(pd));
pd->entry[idx] = to;
- write_dma_entry(px_base(pd), idx, encode(to->daddr, I915_CACHE_LLC));
+ write_dma_entry(px_base(pd), idx, encode(px_dma(to), I915_CACHE_LLC));
}
void
clear_pd_entry(struct i915_page_directory * const pd,
const unsigned short idx,
- const struct i915_page_scratch * const scratch)
+ const struct drm_i915_gem_object * const scratch)
{
GEM_BUG_ON(atomic_read(px_used(pd)) == 0);
@@ -102,7 +122,7 @@ bool
release_pd_entry(struct i915_page_directory * const pd,
const unsigned short idx,
struct i915_page_table * const pt,
- const struct i915_page_scratch * const scratch)
+ const struct drm_i915_gem_object * const scratch)
{
bool free = false;
@@ -155,19 +175,16 @@ struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt)
return ppgtt;
}
-int ppgtt_bind_vma(struct i915_address_space *vm,
- struct i915_vma *vma,
- enum i915_cache_level cache_level,
- u32 flags)
+void ppgtt_bind_vma(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags)
{
u32 pte_flags;
- int err;
if (!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) {
- err = vm->allocate_va_range(vm, vma->node.start, vma->size);
- if (err)
- return err;
-
+ vm->allocate_va_range(vm, stash, vma->node.start, vma->size);
set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
}
@@ -178,8 +195,6 @@ int ppgtt_bind_vma(struct i915_address_space *vm,
vm->insert_entries(vm, vma, cache_level, pte_flags);
wmb();
-
- return 0;
}
void ppgtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma)
@@ -188,12 +203,93 @@ void ppgtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma)
vm->clear_range(vm, vma->node.start, vma->size);
}
+static unsigned long pd_count(u64 size, int shift)
+{
+ /* Beware later misalignment */
+ return (size + 2 * (BIT_ULL(shift) - 1)) >> shift;
+}
+
+int i915_vm_alloc_pt_stash(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ u64 size)
+{
+ unsigned long count;
+ int shift, n;
+
+ shift = vm->pd_shift;
+ if (!shift)
+ return 0;
+
+ count = pd_count(size, shift);
+ while (count--) {
+ struct i915_page_table *pt;
+
+ pt = alloc_pt(vm);
+ if (IS_ERR(pt)) {
+ i915_vm_free_pt_stash(vm, stash);
+ return PTR_ERR(pt);
+ }
+
+ pt->stash = stash->pt[0];
+ stash->pt[0] = pt;
+ }
+
+ for (n = 1; n < vm->top; n++) {
+ shift += ilog2(I915_PDES); /* Each PD holds 512 entries */
+ count = pd_count(size, shift);
+ while (count--) {
+ struct i915_page_directory *pd;
+
+ pd = alloc_pd(vm);
+ if (IS_ERR(pd)) {
+ i915_vm_free_pt_stash(vm, stash);
+ return PTR_ERR(pd);
+ }
+
+ pd->pt.stash = stash->pt[1];
+ stash->pt[1] = &pd->pt;
+ }
+ }
+
+ return 0;
+}
+
+int i915_vm_pin_pt_stash(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash)
+{
+ struct i915_page_table *pt;
+ int n, err;
+
+ for (n = 0; n < ARRAY_SIZE(stash->pt); n++) {
+ for (pt = stash->pt[n]; pt; pt = pt->stash) {
+ err = pin_pt_dma(vm, pt->base);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+void i915_vm_free_pt_stash(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash)
+{
+ struct i915_page_table *pt;
+ int n;
+
+ for (n = 0; n < ARRAY_SIZE(stash->pt); n++) {
+ while ((pt = stash->pt[n])) {
+ stash->pt[n] = pt->stash;
+ free_px(vm, pt, n);
+ }
+ }
+}
+
int ppgtt_set_pages(struct i915_vma *vma)
{
GEM_BUG_ON(vma->pages);
vma->pages = vma->obj->mm.pages;
-
vma->page_sizes = vma->obj->mm.page_sizes;
return 0;
diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c
index 1bfad589c63b..ea2a77c7b469 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.c
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c
@@ -27,6 +27,7 @@
#include "i915_drv.h"
#include "intel_renderstate.h"
+#include "gt/intel_context.h"
#include "intel_ring.h"
static const struct intel_renderstate_rodata *
@@ -157,33 +158,47 @@ out:
#undef OUT_BATCH
int intel_renderstate_init(struct intel_renderstate *so,
- struct intel_engine_cs *engine)
+ struct intel_context *ce)
{
- struct drm_i915_gem_object *obj;
+ struct intel_engine_cs *engine = ce->engine;
+ struct drm_i915_gem_object *obj = NULL;
int err;
memset(so, 0, sizeof(*so));
so->rodata = render_state_get_rodata(engine);
- if (!so->rodata)
- return 0;
+ if (so->rodata) {
+ if (so->rodata->batch_items * 4 > PAGE_SIZE)
+ return -EINVAL;
+
+ obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ so->vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
+ if (IS_ERR(so->vma)) {
+ err = PTR_ERR(so->vma);
+ goto err_obj;
+ }
+ }
- if (so->rodata->batch_items * 4 > PAGE_SIZE)
- return -EINVAL;
+ i915_gem_ww_ctx_init(&so->ww, true);
+retry:
+ err = intel_context_pin_ww(ce, &so->ww);
+ if (err)
+ goto err_fini;
- obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
+ /* return early if there's nothing to setup */
+ if (!err && !so->rodata)
+ return 0;
- so->vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
- if (IS_ERR(so->vma)) {
- err = PTR_ERR(so->vma);
- goto err_obj;
- }
+ err = i915_gem_object_lock(so->vma->obj, &so->ww);
+ if (err)
+ goto err_context;
err = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
if (err)
- goto err_obj;
+ goto err_context;
err = render_state_setup(so, engine->i915);
if (err)
@@ -193,8 +208,18 @@ int intel_renderstate_init(struct intel_renderstate *so,
err_unpin:
i915_vma_unpin(so->vma);
+err_context:
+ intel_context_unpin(ce);
+err_fini:
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&so->ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&so->ww);
err_obj:
- i915_gem_object_put(obj);
+ if (obj)
+ i915_gem_object_put(obj);
so->vma = NULL;
return err;
}
@@ -208,11 +233,9 @@ int intel_renderstate_emit(struct intel_renderstate *so,
if (!so->vma)
return 0;
- i915_vma_lock(so->vma);
err = i915_request_await_object(rq, so->vma->obj, false);
if (err == 0)
err = i915_vma_move_to_active(so->vma, rq, 0);
- i915_vma_unlock(so->vma);
if (err)
return err;
@@ -233,7 +256,17 @@ int intel_renderstate_emit(struct intel_renderstate *so,
return 0;
}
-void intel_renderstate_fini(struct intel_renderstate *so)
+void intel_renderstate_fini(struct intel_renderstate *so,
+ struct intel_context *ce)
{
- i915_vma_unpin_and_release(&so->vma, 0);
+ if (so->vma) {
+ i915_vma_unpin(so->vma);
+ i915_vma_close(so->vma);
+ }
+
+ intel_context_unpin(ce);
+ i915_gem_ww_ctx_fini(&so->ww);
+
+ if (so->vma)
+ i915_gem_object_put(so->vma->obj);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.h b/drivers/gpu/drm/i915/gt/intel_renderstate.h
index 5700be69a05a..713aa1e86c80 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.h
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.h
@@ -25,9 +25,10 @@
#define _INTEL_RENDERSTATE_H_
#include <linux/types.h>
+#include "i915_gem.h"
struct i915_request;
-struct intel_engine_cs;
+struct intel_context;
struct i915_vma;
struct intel_renderstate_rodata {
@@ -49,6 +50,7 @@ extern const struct intel_renderstate_rodata gen8_null_state;
extern const struct intel_renderstate_rodata gen9_null_state;
struct intel_renderstate {
+ struct i915_gem_ww_ctx ww;
const struct intel_renderstate_rodata *rodata;
struct i915_vma *vma;
u32 batch_offset;
@@ -58,9 +60,10 @@ struct intel_renderstate {
};
int intel_renderstate_init(struct intel_renderstate *so,
- struct intel_engine_cs *engine);
+ struct intel_context *ce);
int intel_renderstate_emit(struct intel_renderstate *so,
struct i915_request *rq);
-void intel_renderstate_fini(struct intel_renderstate *so);
+void intel_renderstate_fini(struct intel_renderstate *so,
+ struct intel_context *ce);
#endif /* _INTEL_RENDERSTATE_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 46a5ceffc22f..ac36b67fb46b 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -15,6 +15,7 @@
#include "i915_drv.h"
#include "i915_gpu_error.h"
#include "i915_irq.h"
+#include "intel_breadcrumbs.h"
#include "intel_engine_pm.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
index bdb324167ef3..4034a4bac7f0 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -21,7 +21,13 @@ unsigned int intel_ring_update_space(struct intel_ring *ring)
return space;
}
-int intel_ring_pin(struct intel_ring *ring)
+void __intel_ring_pin(struct intel_ring *ring)
+{
+ GEM_BUG_ON(!atomic_read(&ring->pin_count));
+ atomic_inc(&ring->pin_count);
+}
+
+int intel_ring_pin(struct intel_ring *ring, struct i915_gem_ww_ctx *ww)
{
struct i915_vma *vma = ring->vma;
unsigned int flags;
@@ -39,7 +45,7 @@ int intel_ring_pin(struct intel_ring *ring)
else
flags |= PIN_HIGH;
- ret = i915_ggtt_pin(vma, 0, flags);
+ ret = i915_ggtt_pin(vma, ww, 0, flags);
if (unlikely(ret))
goto err_unpin;
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h
index cc0ebca65167..1700579bdc93 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.h
+++ b/drivers/gpu/drm/i915/gt/intel_ring.h
@@ -21,7 +21,8 @@ int intel_ring_cacheline_align(struct i915_request *rq);
unsigned int intel_ring_update_space(struct intel_ring *ring);
-int intel_ring_pin(struct intel_ring *ring);
+void __intel_ring_pin(struct intel_ring *ring);
+int intel_ring_pin(struct intel_ring *ring, struct i915_gem_ww_ctx *ww);
void intel_ring_unpin(struct intel_ring *ring);
void intel_ring_reset(struct intel_ring *ring, u32 tail);
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 898593ca4889..16b48e72c369 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -32,6 +32,7 @@
#include "gen6_ppgtt.h"
#include "gen7_renderclear.h"
#include "i915_drv.h"
+#include "intel_breadcrumbs.h"
#include "intel_context.h"
#include "intel_gt.h"
#include "intel_reset.h"
@@ -201,16 +202,18 @@ static struct i915_address_space *vm_alias(struct i915_address_space *vm)
return vm;
}
+static u32 pp_dir(struct i915_address_space *vm)
+{
+ return to_gen6_ppgtt(i915_vm_to_ppgtt(vm))->pp_dir;
+}
+
static void set_pp_dir(struct intel_engine_cs *engine)
{
struct i915_address_space *vm = vm_alias(engine->gt->vm);
if (vm) {
- struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-
ENGINE_WRITE(engine, RING_PP_DIR_DCLV, PP_DIR_DCLV_2G);
- ENGINE_WRITE(engine, RING_PP_DIR_BASE,
- px_base(ppgtt->pd)->ggtt_offset << 10);
+ ENGINE_WRITE(engine, RING_PP_DIR_BASE, pp_dir(vm));
}
}
@@ -255,7 +258,7 @@ static int xcs_resume(struct intel_engine_cs *engine)
else
ring_setup_status_page(engine);
- intel_engine_reset_breadcrumbs(engine);
+ intel_breadcrumbs_reset(engine->breadcrumbs);
/* Enforce ordering by reading HEAD register back */
ENGINE_POSTING_READ(engine, RING_HEAD);
@@ -474,14 +477,16 @@ static void ring_context_destroy(struct kref *ref)
intel_context_free(ce);
}
-static int __context_pin_ppgtt(struct intel_context *ce)
+static int ring_context_pre_pin(struct intel_context *ce,
+ struct i915_gem_ww_ctx *ww,
+ void **unused)
{
struct i915_address_space *vm;
int err = 0;
vm = vm_alias(ce->vm);
if (vm)
- err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)));
+ err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)), ww);
return err;
}
@@ -497,6 +502,10 @@ static void __context_unpin_ppgtt(struct intel_context *ce)
static void ring_context_unpin(struct intel_context *ce)
{
+}
+
+static void ring_context_post_unpin(struct intel_context *ce)
+{
__context_unpin_ppgtt(ce);
}
@@ -584,9 +593,9 @@ static int ring_context_alloc(struct intel_context *ce)
return 0;
}
-static int ring_context_pin(struct intel_context *ce)
+static int ring_context_pin(struct intel_context *ce, void *unused)
{
- return __context_pin_ppgtt(ce);
+ return 0;
}
static void ring_context_reset(struct intel_context *ce)
@@ -597,8 +606,10 @@ static void ring_context_reset(struct intel_context *ce)
static const struct intel_context_ops ring_context_ops = {
.alloc = ring_context_alloc,
+ .pre_pin = ring_context_pre_pin,
.pin = ring_context_pin,
.unpin = ring_context_unpin,
+ .post_unpin = ring_context_post_unpin,
.enter = intel_context_enter_engine,
.exit = intel_context_exit_engine,
@@ -608,7 +619,7 @@ static const struct intel_context_ops ring_context_ops = {
};
static int load_pd_dir(struct i915_request *rq,
- const struct i915_ppgtt *ppgtt,
+ struct i915_address_space *vm,
u32 valid)
{
const struct intel_engine_cs * const engine = rq->engine;
@@ -624,7 +635,7 @@ static int load_pd_dir(struct i915_request *rq,
*cs++ = MI_LOAD_REGISTER_IMM(1);
*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
- *cs++ = px_base(ppgtt->pd)->ggtt_offset << 10;
+ *cs++ = pp_dir(vm);
/* Stall until the page table load is complete? */
*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
@@ -826,7 +837,7 @@ static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
* post-sync op, this extra pass appears vital before a
* mm switch!
*/
- ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
+ ret = load_pd_dir(rq, vm, PP_DIR_DCLV_2G);
if (ret)
return ret;
@@ -1250,14 +1261,15 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine)
return -ENODEV;
}
- timeline = intel_timeline_create(engine->gt, engine->status_page.vma);
+ timeline = intel_timeline_create_from_engine(engine,
+ I915_GEM_HWS_SEQNO_ADDR);
if (IS_ERR(timeline)) {
err = PTR_ERR(timeline);
goto err;
}
GEM_BUG_ON(timeline->has_initial_breadcrumb);
- err = intel_timeline_pin(timeline);
+ err = intel_timeline_pin(timeline, NULL);
if (err)
goto err_timeline;
@@ -1267,7 +1279,7 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine)
goto err_timeline_unpin;
}
- err = intel_ring_pin(ring);
+ err = intel_ring_pin(ring, NULL);
if (err)
goto err_ring;
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index 97ba14ad52e4..e6a00eea0631 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -7,6 +7,7 @@
#include <drm/i915_drm.h>
#include "i915_drv.h"
+#include "intel_breadcrumbs.h"
#include "intel_gt.h"
#include "intel_gt_clock_utils.h"
#include "intel_gt_irq.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 46d20f5f3ddc..a2f74cefe4c3 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -215,7 +215,8 @@ static void cacheline_free(struct intel_timeline_cacheline *cl)
static int intel_timeline_init(struct intel_timeline *timeline,
struct intel_gt *gt,
- struct i915_vma *hwsp)
+ struct i915_vma *hwsp,
+ unsigned int offset)
{
void *vaddr;
@@ -246,8 +247,7 @@ static int intel_timeline_init(struct intel_timeline *timeline,
vaddr = page_mask_bits(cl->vaddr);
} else {
- timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
-
+ timeline->hwsp_offset = offset;
vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB);
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
@@ -297,7 +297,9 @@ static void intel_timeline_fini(struct intel_timeline *timeline)
}
struct intel_timeline *
-intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp)
+__intel_timeline_create(struct intel_gt *gt,
+ struct i915_vma *global_hwsp,
+ unsigned int offset)
{
struct intel_timeline *timeline;
int err;
@@ -306,7 +308,7 @@ intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp)
if (!timeline)
return ERR_PTR(-ENOMEM);
- err = intel_timeline_init(timeline, gt, global_hwsp);
+ err = intel_timeline_init(timeline, gt, global_hwsp, offset);
if (err) {
kfree(timeline);
return ERR_PTR(err);
@@ -315,14 +317,20 @@ intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp)
return timeline;
}
-int intel_timeline_pin(struct intel_timeline *tl)
+void __intel_timeline_pin(struct intel_timeline *tl)
+{
+ GEM_BUG_ON(!atomic_read(&tl->pin_count));
+ atomic_inc(&tl->pin_count);
+}
+
+int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww)
{
int err;
if (atomic_add_unless(&tl->pin_count, 1, 0))
return 0;
- err = i915_ggtt_pin(tl->hwsp_ggtt, 0, PIN_HIGH);
+ err = i915_ggtt_pin(tl->hwsp_ggtt, ww, 0, PIN_HIGH);
if (err)
return err;
@@ -465,7 +473,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
goto err_rollback;
}
- err = i915_ggtt_pin(vma, 0, PIN_HIGH);
+ err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
if (err) {
__idle_hwsp_free(vma->private, cacheline);
goto err_rollback;
@@ -484,7 +492,9 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
* free it after the current request is retired, which ensures that
* all writes into the cacheline from previous requests are complete.
*/
- err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence);
+ err = i915_active_ref(&tl->hwsp_cacheline->active,
+ tl->fence_context,
+ &rq->fence);
if (err)
goto err_cacheline;
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h
index 4298b9ac7327..9882cd911d8e 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.h
@@ -29,10 +29,27 @@
#include "i915_active.h"
#include "i915_syncmap.h"
-#include "gt/intel_timeline_types.h"
+#include "intel_timeline_types.h"
struct intel_timeline *
-intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp);
+__intel_timeline_create(struct intel_gt *gt,
+ struct i915_vma *global_hwsp,
+ unsigned int offset);
+
+static inline struct intel_timeline *
+intel_timeline_create(struct intel_gt *gt)
+{
+ return __intel_timeline_create(gt, NULL, 0);
+}
+
+static inline struct intel_timeline *
+intel_timeline_create_from_engine(struct intel_engine_cs *engine,
+ unsigned int offset)
+{
+ return __intel_timeline_create(engine->gt,
+ engine->status_page.vma,
+ offset);
+}
static inline struct intel_timeline *
intel_timeline_get(struct intel_timeline *timeline)
@@ -71,7 +88,8 @@ static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl,
return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno);
}
-int intel_timeline_pin(struct intel_timeline *tl);
+void __intel_timeline_pin(struct intel_timeline *tl);
+int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww);
void intel_timeline_enter(struct intel_timeline *tl);
int intel_timeline_get_seqno(struct intel_timeline *tl,
struct i915_request *rq,
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 5726cd0a37e0..6c580d0d9ea8 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -52,6 +52,37 @@
* - Public functions to init or apply the given workaround type.
*/
+/*
+ * KBL revision ID ordering is bizarre; higher revision ID's map to lower
+ * steppings in some cases. So rather than test against the revision ID
+ * directly, let's map that into our own range of increasing ID's that we
+ * can test against in a regular manner.
+ */
+
+const struct i915_rev_steppings kbl_revids[] = {
+ [0] = { .gt_stepping = KBL_REVID_A0, .disp_stepping = KBL_REVID_A0 },
+ [1] = { .gt_stepping = KBL_REVID_B0, .disp_stepping = KBL_REVID_B0 },
+ [2] = { .gt_stepping = KBL_REVID_C0, .disp_stepping = KBL_REVID_B0 },
+ [3] = { .gt_stepping = KBL_REVID_D0, .disp_stepping = KBL_REVID_B0 },
+ [4] = { .gt_stepping = KBL_REVID_F0, .disp_stepping = KBL_REVID_C0 },
+ [5] = { .gt_stepping = KBL_REVID_C0, .disp_stepping = KBL_REVID_B1 },
+ [6] = { .gt_stepping = KBL_REVID_D1, .disp_stepping = KBL_REVID_B1 },
+ [7] = { .gt_stepping = KBL_REVID_G0, .disp_stepping = KBL_REVID_C0 },
+};
+
+const struct i915_rev_steppings tgl_uy_revids[] = {
+ [0] = { .gt_stepping = TGL_REVID_A0, .disp_stepping = TGL_REVID_A0 },
+ [1] = { .gt_stepping = TGL_REVID_B0, .disp_stepping = TGL_REVID_C0 },
+ [2] = { .gt_stepping = TGL_REVID_B1, .disp_stepping = TGL_REVID_C0 },
+ [3] = { .gt_stepping = TGL_REVID_C0, .disp_stepping = TGL_REVID_D0 },
+};
+
+/* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */
+const struct i915_rev_steppings tgl_revids[] = {
+ [0] = { .gt_stepping = TGL_REVID_A0, .disp_stepping = TGL_REVID_B0 },
+ [1] = { .gt_stepping = TGL_REVID_B0, .disp_stepping = TGL_REVID_D0 },
+};
+
static void wa_init_start(struct i915_wa_list *wal, const char *name, const char *engine_name)
{
wal->name = name;
@@ -470,7 +501,7 @@ static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
gen9_ctx_workarounds_init(engine, wal);
/* WaToEnableHwFixForPushConstHWBug:kbl */
- if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER))
+ if (IS_KBL_GT_REVID(i915, KBL_REVID_C0, REVID_FOREVER))
WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
@@ -596,8 +627,8 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
}
-static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
- struct i915_wa_list *wal)
+static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
+ struct i915_wa_list *wal)
{
/*
* Wa_1409142259:tgl
@@ -607,12 +638,28 @@ static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
* Wa_1409207793:tgl
* Wa_1409178076:tgl
* Wa_1408979724:tgl
+ * Wa_14010443199:rkl
+ * Wa_14010698770:rkl
*/
WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
+ /* WaDisableGPGPUMidThreadPreemption:gen12 */
+ WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
+ GEN9_PREEMPT_GPGPU_LEVEL_MASK,
+ GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
+}
+
+static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
+ struct i915_wa_list *wal)
+{
+ gen12_ctx_workarounds_init(engine, wal);
+
/*
- * Wa_1604555607:gen12 and Wa_1608008084:gen12
+ * Wa_1604555607:tgl,rkl
+ *
+ * Note that the implementation of this workaround is further modified
+ * according to the FF_MODE2 guidance given by Wa_1608008084:gen12.
* FF_MODE2 register will return the wrong value when read. The default
* value for this register is zero for all fields and there are no bit
* masks. So instead of doing a RMW we should just write the GS Timer
@@ -623,11 +670,6 @@ static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
FF_MODE2_GS_TIMER_MASK | FF_MODE2_TDS_TIMER_MASK,
FF_MODE2_GS_TIMER_224 | FF_MODE2_TDS_TIMER_128,
0);
-
- /* WaDisableGPGPUMidThreadPreemption:tgl */
- WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
- GEN9_PREEMPT_GPGPU_LEVEL_MASK,
- GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
}
static void
@@ -642,8 +684,10 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
wa_init_start(wal, name, engine->name);
- if (IS_GEN(i915, 12))
+ if (IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915))
tgl_ctx_workarounds_init(engine, wal);
+ else if (IS_GEN(i915, 12))
+ gen12_ctx_workarounds_init(engine, wal);
else if (IS_GEN(i915, 11))
icl_ctx_workarounds_init(engine, wal);
else if (IS_CANNONLAKE(i915))
@@ -995,7 +1039,7 @@ kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
gen9_gt_workarounds_init(i915, wal);
/* WaDisableDynamicCreditSharing:kbl */
- if (IS_KBL_REVID(i915, 0, KBL_REVID_B0))
+ if (IS_KBL_GT_REVID(i915, 0, KBL_REVID_B0))
wa_write_or(wal,
GAMT_CHKN_BIT_REG,
GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
@@ -1176,18 +1220,25 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
}
static void
-tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+gen12_gt_workarounds_init(struct drm_i915_private *i915,
+ struct i915_wa_list *wal)
{
wa_init_mcr(i915, wal);
+}
+
+static void
+tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ gen12_gt_workarounds_init(i915, wal);
/* Wa_1409420604:tgl */
- if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0))
+ if (IS_TGL_UY_GT_REVID(i915, TGL_REVID_A0, TGL_REVID_A0))
wa_write_or(wal,
SUBSLICE_UNIT_LEVEL_CLKGATE2,
CPSSUNIT_CLKGATE_DIS);
/* Wa_1607087056:tgl also know as BUG:1409180338 */
- if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0))
+ if (IS_TGL_UY_GT_REVID(i915, TGL_REVID_A0, TGL_REVID_A0))
wa_write_or(wal,
SLICE_UNIT_LEVEL_CLKGATE,
L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
@@ -1196,8 +1247,10 @@ tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
static void
gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
- if (IS_GEN(i915, 12))
+ if (IS_TIGERLAKE(i915))
tgl_gt_workarounds_init(i915, wal);
+ else if (IS_GEN(i915, 12))
+ gen12_gt_workarounds_init(i915, wal);
else if (IS_GEN(i915, 11))
icl_gt_workarounds_init(i915, wal);
else if (IS_CANNONLAKE(i915))
@@ -1620,7 +1673,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
struct drm_i915_private *i915 = engine->i915;
- if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) {
+ if (IS_TGL_UY_GT_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) {
/*
* Wa_1607138336:tgl
* Wa_1607063988:tgl
@@ -1630,18 +1683,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN12_DISABLE_POSH_BUSY_FF_DOP_CG);
/*
- * Wa_1607030317:tgl
- * Wa_1607186500:tgl
- * Wa_1607297627:tgl there is 3 entries for this WA on BSpec, 2
- * of then says it is fixed on B0 the other one says it is
- * permanent
- */
- wa_masked_en(wal,
- GEN6_RC_SLEEP_PSMI_CONTROL,
- GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
- GEN8_RC_SEMA_IDLE_MSG_DISABLE);
-
- /*
* Wa_1606679103:tgl
* (see also Wa_1606682166:icl)
*/
@@ -1654,22 +1695,17 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
VSUNIT_CLKGATE_DIS_TGL);
}
- if (IS_TIGERLAKE(i915)) {
- /* Wa_1606931601:tgl */
+ if (IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
+ /* Wa_1606931601:tgl,rkl */
wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
- /* Wa_1409804808:tgl */
+ /* Wa_1409804808:tgl,rkl */
wa_masked_en(wal, GEN7_ROW_CHICKEN2,
GEN12_PUSH_CONST_DEREF_HOLD_DIS);
- /* Wa_1606700617:tgl */
- wa_masked_en(wal,
- GEN9_CS_DEBUG_MODE1,
- FF_DOP_CLOCK_GATE_DISABLE);
-
/*
* Wa_1409085225:tgl
- * Wa_14010229206:tgl
+ * Wa_14010229206:tgl,rkl
*/
wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
@@ -1677,9 +1713,37 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
* Wa_1407928979:tgl A*
* Wa_18011464164:tgl B0+
* Wa_22010931296:tgl B0+
+ * Wa_14010919138:rkl,tgl
*/
wa_write_or(wal, GEN7_FF_THREAD_MODE,
GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
+
+ /*
+ * Wa_1607030317:tgl
+ * Wa_1607186500:tgl
+ * Wa_1607297627:tgl,rkl there are multiple entries for this
+ * WA in the BSpec; some indicate this is an A0-only WA,
+ * others indicate it applies to all steppings.
+ */
+ wa_masked_en(wal,
+ GEN6_RC_SLEEP_PSMI_CONTROL,
+ GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
+ GEN8_RC_SEMA_IDLE_MSG_DISABLE);
+
+ /*
+ * Wa_1606700617:tgl
+ * Wa_22010271021:tgl,rkl
+ */
+ wa_masked_en(wal,
+ GEN9_CS_DEBUG_MODE1,
+ FF_DOP_CLOCK_GATE_DISABLE);
+ }
+
+ if (IS_GEN(i915, 12)) {
+ /* Wa_1406941453:gen12 */
+ wa_masked_en(wal,
+ GEN10_SAMPLER_MODE,
+ ENABLE_SMALLPL);
}
if (IS_GEN(i915, 11)) {
@@ -1898,7 +1962,7 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
struct drm_i915_private *i915 = engine->i915;
/* WaKBLVECSSemaphoreWaitPoll:kbl */
- if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) {
+ if (IS_KBL_GT_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) {
wa_write(wal,
RING_SEMA_WAIT_POLL(engine->mmio_base),
1);
@@ -2045,6 +2109,7 @@ static int engine_wa_list_verify(struct intel_context *ce,
const struct i915_wa *wa;
struct i915_request *rq;
struct i915_vma *vma;
+ struct i915_gem_ww_ctx ww;
unsigned int i;
u32 *results;
int err;
@@ -2057,29 +2122,34 @@ static int engine_wa_list_verify(struct intel_context *ce,
return PTR_ERR(vma);
intel_engine_pm_get(ce->engine);
- rq = intel_context_create_request(ce);
- intel_engine_pm_put(ce->engine);
+ i915_gem_ww_ctx_init(&ww, false);
+retry:
+ err = i915_gem_object_lock(vma->obj, &ww);
+ if (err == 0)
+ err = intel_context_pin_ww(ce, &ww);
+ if (err)
+ goto err_pm;
+
+ rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
- goto err_vma;
+ goto err_unpin;
}
- i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, true);
if (err == 0)
err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
- i915_vma_unlock(vma);
- if (err) {
- i915_request_add(rq);
- goto err_vma;
- }
-
- err = wa_list_srm(rq, wal, vma);
- if (err)
- goto err_vma;
+ if (err == 0)
+ err = wa_list_srm(rq, wal, vma);
i915_request_get(rq);
+ if (err)
+ i915_request_set_error_once(rq, err);
i915_request_add(rq);
+
+ if (err)
+ goto err_rq;
+
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
err = -ETIME;
goto err_rq;
@@ -2104,7 +2174,16 @@ static int engine_wa_list_verify(struct intel_context *ce,
err_rq:
i915_request_put(rq);
-err_vma:
+err_unpin:
+ intel_context_unpin(ce);
+err_pm:
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
+ intel_engine_pm_put(ce->engine);
i915_vma_unpin(vma);
i915_vma_put(vma);
return err;
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index b8dd3cbc8696..dfd1cfb8a7ec 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -131,6 +131,10 @@ static void mock_context_unpin(struct intel_context *ce)
{
}
+static void mock_context_post_unpin(struct intel_context *ce)
+{
+}
+
static void mock_context_destroy(struct kref *ref)
{
struct intel_context *ce = container_of(ref, typeof(*ce), ref);
@@ -152,8 +156,7 @@ static int mock_context_alloc(struct intel_context *ce)
if (!ce->ring)
return -ENOMEM;
- GEM_BUG_ON(ce->timeline);
- ce->timeline = intel_timeline_create(ce->engine->gt, NULL);
+ ce->timeline = intel_timeline_create(ce->engine->gt);
if (IS_ERR(ce->timeline)) {
kfree(ce->engine);
return PTR_ERR(ce->timeline);
@@ -164,7 +167,13 @@ static int mock_context_alloc(struct intel_context *ce)
return 0;
}
-static int mock_context_pin(struct intel_context *ce)
+static int mock_context_pre_pin(struct intel_context *ce,
+ struct i915_gem_ww_ctx *ww, void **unused)
+{
+ return 0;
+}
+
+static int mock_context_pin(struct intel_context *ce, void *unused)
{
return 0;
}
@@ -176,8 +185,10 @@ static void mock_context_reset(struct intel_context *ce)
static const struct intel_context_ops mock_context_ops = {
.alloc = mock_context_alloc,
+ .pre_pin = mock_context_pre_pin,
.pin = mock_context_pin,
.unpin = mock_context_unpin,
+ .post_unpin = mock_context_post_unpin,
.enter = intel_context_enter_engine,
.exit = intel_context_exit_engine,
@@ -261,11 +272,12 @@ static void mock_engine_release(struct intel_engine_cs *engine)
GEM_BUG_ON(timer_pending(&mock->hw_delay));
+ intel_breadcrumbs_free(engine->breadcrumbs);
+
intel_context_unpin(engine->kernel_context);
intel_context_put(engine->kernel_context);
intel_engine_fini_retire(engine);
- intel_engine_fini_breadcrumbs(engine);
}
struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
@@ -323,20 +335,26 @@ int mock_engine_init(struct intel_engine_cs *engine)
struct intel_context *ce;
intel_engine_init_active(engine, ENGINE_MOCK);
- intel_engine_init_breadcrumbs(engine);
intel_engine_init_execlists(engine);
intel_engine_init__pm(engine);
intel_engine_init_retire(engine);
+ engine->breadcrumbs = intel_breadcrumbs_create(NULL);
+ if (!engine->breadcrumbs)
+ return -ENOMEM;
+
ce = create_kernel_context(engine);
if (IS_ERR(ce))
goto err_breadcrumbs;
+ /* We insist the kernel context is using the status_page */
+ engine->status_page.vma = ce->timeline->hwsp_ggtt;
+
engine->kernel_context = ce;
return 0;
err_breadcrumbs:
- intel_engine_fini_breadcrumbs(engine);
+ intel_breadcrumbs_free(engine->breadcrumbs);
return -ENOMEM;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
index 52af1cee9a94..1f4020e906a8 100644
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -68,6 +68,8 @@ static int context_sync(struct intel_context *ce)
} while (!err);
mutex_unlock(&tl->mutex);
+ /* Wait for all barriers to complete (remote CPU) before we check */
+ i915_active_unlock_wait(&ce->active);
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
index 73243ba59c7d..e73854dd2fe0 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
@@ -47,7 +47,10 @@ static int pulse_active(struct i915_active *active)
static void pulse_free(struct kref *kref)
{
- kfree(container_of(kref, struct pulse, kref));
+ struct pulse *p = container_of(kref, typeof(*p), kref);
+
+ i915_active_fini(&p->active);
+ kfree(p);
}
static void pulse_put(struct pulse *p)
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 3fc5de961280..95d41c01d0e0 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -2729,7 +2729,7 @@ static int create_gang(struct intel_engine_cs *engine,
i915_gem_object_put(obj);
intel_context_put(ce);
- rq->client_link.next = &(*prev)->client_link;
+ rq->mock.link.next = &(*prev)->mock.link;
*prev = rq;
return 0;
@@ -2970,8 +2970,7 @@ static int live_preempt_gang(void *arg)
}
while (rq) { /* wait for each rq from highest to lowest prio */
- struct i915_request *n =
- list_next_entry(rq, client_link);
+ struct i915_request *n = list_next_entry(rq, mock.link);
if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
struct drm_printer p =
@@ -3090,7 +3089,7 @@ static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
return vma;
}
- err = i915_ggtt_pin(vma, 0, 0);
+ err = i915_ggtt_pin(vma, NULL, 0, 0);
if (err) {
i915_vma_put(vma);
return ERR_PTR(err);
@@ -4997,6 +4996,7 @@ static int __live_lrc_state(struct intel_engine_cs *engine,
{
struct intel_context *ce;
struct i915_request *rq;
+ struct i915_gem_ww_ctx ww;
enum {
RING_START_IDX = 0,
RING_TAIL_IDX,
@@ -5011,7 +5011,11 @@ static int __live_lrc_state(struct intel_engine_cs *engine,
if (IS_ERR(ce))
return PTR_ERR(ce);
- err = intel_context_pin(ce);
+ i915_gem_ww_ctx_init(&ww, false);
+retry:
+ err = i915_gem_object_lock(scratch->obj, &ww);
+ if (!err)
+ err = intel_context_pin_ww(ce, &ww);
if (err)
goto err_put;
@@ -5040,11 +5044,9 @@ static int __live_lrc_state(struct intel_engine_cs *engine,
*cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
*cs++ = 0;
- i915_vma_lock(scratch);
err = i915_request_await_object(rq, scratch->obj, true);
if (!err)
err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
- i915_vma_unlock(scratch);
i915_request_get(rq);
i915_request_add(rq);
@@ -5081,6 +5083,12 @@ err_rq:
err_unpin:
intel_context_unpin(ce);
err_put:
+ if (err == -EDEADLK) {
+ err = i915_gem_ww_ctx_backoff(&ww);
+ if (!err)
+ goto retry;
+ }
+ i915_gem_ww_ctx_fini(&ww);
intel_context_put(ce);
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index 8624f5d2a1f3..3540ba9bd459 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -77,20 +77,20 @@ create_spin_counter(struct intel_engine_cs *engine,
vma = i915_vma_instance(obj, vm, NULL);
if (IS_ERR(vma)) {
- i915_gem_object_put(obj);
- return vma;
+ err = PTR_ERR(vma);
+ goto err_put;
}
err = i915_vma_pin(vma, 0, 0, PIN_USER);
- if (err) {
- i915_vma_put(vma);
- return ERR_PTR(err);
- }
+ if (err)
+ goto err_unlock;
+
+ i915_vma_lock(vma);
base = i915_gem_object_pin_map(obj, I915_MAP_WC);
if (IS_ERR(base)) {
- i915_gem_object_put(obj);
- return ERR_CAST(base);
+ err = PTR_ERR(base);
+ goto err_unpin;
}
cs = base;
@@ -134,6 +134,14 @@ create_spin_counter(struct intel_engine_cs *engine,
*cancel = base + loop;
*counter = srm ? memset32(base + end, 0, 1) : NULL;
return vma;
+
+err_unpin:
+ i915_vma_unpin(vma);
+err_unlock:
+ i915_vma_unlock(vma);
+err_put:
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
}
static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
@@ -639,7 +647,6 @@ int live_rps_frequency_cs(void *arg)
goto err_vma;
}
- i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, false);
if (!err)
err = i915_vma_move_to_active(vma, rq, 0);
@@ -647,7 +654,6 @@ int live_rps_frequency_cs(void *arg)
err = rq->engine->emit_bb_start(rq,
vma->node.start,
PAGE_SIZE, 0);
- i915_vma_unlock(vma);
i915_request_add(rq);
if (err)
goto err_vma;
@@ -700,7 +706,7 @@ int live_rps_frequency_cs(void *arg)
f = act; /* may skip ahead [pcu granularity] */
}
- err = -EINVAL;
+ err = -EINTR; /* ignore error, continue on with test */
}
err_vma:
@@ -708,6 +714,7 @@ err_vma:
i915_gem_object_flush_map(vma->obj);
i915_gem_object_unpin_map(vma->obj);
i915_vma_unpin(vma);
+ i915_vma_unlock(vma);
i915_vma_put(vma);
st_engine_heartbeat_enable(engine);
@@ -781,7 +788,6 @@ int live_rps_frequency_srm(void *arg)
goto err_vma;
}
- i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, false);
if (!err)
err = i915_vma_move_to_active(vma, rq, 0);
@@ -789,7 +795,6 @@ int live_rps_frequency_srm(void *arg)
err = rq->engine->emit_bb_start(rq,
vma->node.start,
PAGE_SIZE, 0);
- i915_vma_unlock(vma);
i915_request_add(rq);
if (err)
goto err_vma;
@@ -841,7 +846,7 @@ int live_rps_frequency_srm(void *arg)
f = act; /* may skip ahead [pcu granularity] */
}
- err = -EINVAL;
+ err = -EINTR; /* ignore error, continue on with test */
}
err_vma:
@@ -849,6 +854,7 @@ err_vma:
i915_gem_object_flush_map(vma->obj);
i915_gem_object_unpin_map(vma->obj);
i915_vma_unpin(vma);
+ i915_vma_unlock(vma);
i915_vma_put(vma);
st_engine_heartbeat_enable(engine);
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index fb5b7d3498a6..19c2cb166e7c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -72,7 +72,7 @@ static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
unsigned long cacheline;
int err;
- tl = intel_timeline_create(state->gt, NULL);
+ tl = intel_timeline_create(state->gt);
if (IS_ERR(tl))
return PTR_ERR(tl);
@@ -158,7 +158,7 @@ out:
__mock_hwsp_record(&state, na, NULL);
kfree(state.history);
err_put:
- drm_dev_put(&i915->drm);
+ mock_destroy_device(i915);
return err;
}
@@ -455,7 +455,7 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
struct i915_request *rq;
int err;
- err = intel_timeline_pin(tl);
+ err = intel_timeline_pin(tl, NULL);
if (err) {
rq = ERR_PTR(err);
goto out;
@@ -487,11 +487,11 @@ checked_intel_timeline_create(struct intel_gt *gt)
{
struct intel_timeline *tl;
- tl = intel_timeline_create(gt, NULL);
+ tl = intel_timeline_create(gt);
if (IS_ERR(tl))
return tl;
- if (*tl->hwsp_seqno != tl->seqno) {
+ if (READ_ONCE(*tl->hwsp_seqno) != tl->seqno) {
pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
*tl->hwsp_seqno, tl->seqno);
intel_timeline_put(tl);
@@ -561,9 +561,9 @@ static int live_hwsp_engine(void *arg)
for (n = 0; n < count; n++) {
struct intel_timeline *tl = timelines[n];
- if (!err && *tl->hwsp_seqno != n) {
- pr_err("Invalid seqno stored in timeline %lu @ %x, found 0x%x\n",
- n, tl->hwsp_offset, *tl->hwsp_seqno);
+ if (!err && READ_ONCE(*tl->hwsp_seqno) != n) {
+ GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n",
+ n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno);
GEM_TRACE_DUMP();
err = -EINVAL;
}
@@ -633,9 +633,9 @@ out:
for (n = 0; n < count; n++) {
struct intel_timeline *tl = timelines[n];
- if (!err && *tl->hwsp_seqno != n) {
- pr_err("Invalid seqno stored in timeline %lu @ %x, found 0x%x\n",
- n, tl->hwsp_offset, *tl->hwsp_seqno);
+ if (!err && READ_ONCE(*tl->hwsp_seqno) != n) {
+ GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n",
+ n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno);
GEM_TRACE_DUMP();
err = -EINVAL;
}
@@ -660,14 +660,14 @@ static int live_hwsp_wrap(void *arg)
* foreign GPU references.
*/
- tl = intel_timeline_create(gt, NULL);
+ tl = intel_timeline_create(gt);
if (IS_ERR(tl))
return PTR_ERR(tl);
if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
goto out_free;
- err = intel_timeline_pin(tl);
+ err = intel_timeline_pin(tl, NULL);
if (err)
goto out_free;
@@ -733,7 +733,8 @@ static int live_hwsp_wrap(void *arg)
goto out;
}
- if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) {
+ if (READ_ONCE(*hwsp_seqno[0]) != seqno[0] ||
+ READ_ONCE(*hwsp_seqno[1]) != seqno[1]) {
pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
*hwsp_seqno[0], *hwsp_seqno[1],
seqno[0], seqno[1]);
@@ -966,9 +967,10 @@ static int live_hwsp_recycle(void *arg)
break;
}
- if (*tl->hwsp_seqno != count) {
- pr_err("Invalid seqno stored in timeline %lu @ tl->hwsp_offset, found 0x%x\n",
- count, *tl->hwsp_seqno);
+ if (READ_ONCE(*tl->hwsp_seqno) != count) {
+ GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x found 0x%x\n",
+ count, tl->fence_context,
+ tl->hwsp_offset, *tl->hwsp_seqno);
GEM_TRACE_DUMP();
err = -EINVAL;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index febc9e6692ba..61a0532d0f3d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -214,7 +214,7 @@ static int check_whitelist(struct i915_gem_context *ctx,
return PTR_ERR(results);
err = 0;
- i915_gem_object_lock(results);
+ i915_gem_object_lock(results, NULL);
intel_wedge_on_timeout(&wedge, engine->gt, HZ / 5) /* safety net! */
err = i915_gem_object_set_to_cpu_domain(results, false);
i915_gem_object_unlock(results);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 861657897c0f..942c7c187adb 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -677,7 +677,7 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
goto err;
flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
- ret = i915_ggtt_pin(vma, 0, flags);
+ ret = i915_ggtt_pin(vma, NULL, 0, flags);
if (ret) {
vma = ERR_PTR(ret);
goto err;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 59b27aba15c6..80e8b6c3bc8c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -51,8 +51,8 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
* Note that RKL uses the same firmware as TGL.
*/
#define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \
- fw_def(ROCKETLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 12)) \
- fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 12)) \
+ fw_def(ROCKETLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 5, 0)) \
+ fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 5, 0)) \
fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \
fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 9, 0, 0)) \
fw_def(COMETLAKE, 5, guc_def(cml, 33, 0, 0), huc_def(cml, 4, 0, 0)) \