From 354842df3888d63dd0371358189cafde267b4a72 Mon Sep 17 00:00:00 2001
From: Sean Paul <seanpaul@chromium.org>
Date: Thu, 17 Sep 2020 20:28:42 -0400
Subject: drm/i915/dp: Tweak initial dpcd backlight.enabled value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In commit 79946723092b ("drm/i915: Assume 100% brightness when not in
DPCD control mode"), we fixed the brightness level when DPCD control was
not active to max brightness. This is as good as we can guess since most
backlights go on full when uncontrolled.

However in doing so we changed the semantics of the initial
'backlight.enabled' value. At least on Pixelbooks, they  were relying
on the brightness level in DP_EDP_BACKLIGHT_BRIGHTNESS_MSB to be 0 on
boot such that enabled would be false. This causes the device to be
enabled when the brightness is set. Without this, brightness control
doesn't work. So by changing brightness to max, we also flipped enabled
to be true on boot.

To fix this, make enabled a function of brightness and backlight control
mechanism.

Fixes: 79946723092b ("drm/i915: Assume 100% brightness when not in DPCD control mode")
Cc: Lyude Paul <lyude@redhat.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
Cc: "Ville Syrjälä" <ville.syrjala@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Kevin Chowski <chowski@chromium.org>>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Reviewed-by: Lyude Paul <lyude@redhat.com>
Signed-off-by: Lyude Paul <lyude@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200918002845.32766-1-sean@poorly.run
(cherry picked from commit 4ade8f31c25bef7ce7ed4d7cbac17df7c4bad850)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 .../gpu/drm/i915/display/intel_dp_aux_backlight.c  | 31 ++++++++++++++--------
 1 file changed, 20 insertions(+), 11 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
index acbd7eb66cbe..036f504ac7db 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
@@ -52,17 +52,11 @@ static void set_aux_backlight_enable(struct intel_dp *intel_dp, bool enable)
 	}
 }
 
-/*
- * Read the current backlight value from DPCD register(s) based
- * on if 8-bit(MSB) or 16-bit(MSB and LSB) values are supported
- */
-static u32 intel_dp_aux_get_backlight(struct intel_connector *connector)
+static bool intel_dp_aux_backlight_dpcd_mode(struct intel_connector *connector)
 {
 	struct intel_dp *intel_dp = intel_attached_dp(connector);
 	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
-	u8 read_val[2] = { 0x0 };
 	u8 mode_reg;
-	u16 level = 0;
 
 	if (drm_dp_dpcd_readb(&intel_dp->aux,
 			      DP_EDP_BACKLIGHT_MODE_SET_REGISTER,
@@ -70,15 +64,29 @@ static u32 intel_dp_aux_get_backlight(struct intel_connector *connector)
 		drm_dbg_kms(&i915->drm,
 			    "Failed to read the DPCD register 0x%x\n",
 			    DP_EDP_BACKLIGHT_MODE_SET_REGISTER);
-		return 0;
+		return false;
 	}
 
+	return (mode_reg & DP_EDP_BACKLIGHT_CONTROL_MODE_MASK) ==
+	       DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD;
+}
+
+/*
+ * Read the current backlight value from DPCD register(s) based
+ * on if 8-bit(MSB) or 16-bit(MSB and LSB) values are supported
+ */
+static u32 intel_dp_aux_get_backlight(struct intel_connector *connector)
+{
+	struct intel_dp *intel_dp = intel_attached_dp(connector);
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
+	u8 read_val[2] = { 0x0 };
+	u16 level = 0;
+
 	/*
 	 * If we're not in DPCD control mode yet, the programmed brightness
 	 * value is meaningless and we should assume max brightness
 	 */
-	if ((mode_reg & DP_EDP_BACKLIGHT_CONTROL_MODE_MASK) !=
-	    DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD)
+	if (!intel_dp_aux_backlight_dpcd_mode(connector))
 		return connector->panel.backlight.max;
 
 	if (drm_dp_dpcd_read(&intel_dp->aux, DP_EDP_BACKLIGHT_BRIGHTNESS_MSB,
@@ -319,7 +327,8 @@ static int intel_dp_aux_setup_backlight(struct intel_connector *connector,
 
 	panel->backlight.min = 0;
 	panel->backlight.level = intel_dp_aux_get_backlight(connector);
-	panel->backlight.enabled = panel->backlight.level != 0;
+	panel->backlight.enabled = intel_dp_aux_backlight_dpcd_mode(connector) &&
+				   panel->backlight.level != 0;
 
 	return 0;
 }
-- 
cgit v1.2.3


From 849c0fe9e831dcebea1b46e2237e13f274a8756a Mon Sep 17 00:00:00 2001
From: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
Date: Wed, 29 Jul 2020 15:55:39 +0530
Subject: drm/i915/gt: Initialize reserved and unspecified MOCS indices

In order to avoid functional breakage of mis-programmed applications that
have grown to depend on unused MOCS entries, we are programming
those entries to be equal to fully cached ("L3 + LLC") entry.

These reserved and unspecified entries should not be used as they may be
changed to less performant variants with better coherency in the future
if more entries are needed.

v2: As suggested by Lucas De Marchi to utilise __init_mocs_table for
programming default value, setting I915_MOCS_PTE index of tgl_mocs_table
with desired value.

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Tomasz Lis <tomasz.lis@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Francisco Jerez <currojerez@riseup.net>
Cc: Mathew Alwin <alwin.mathew@intel.com>
Cc: Mcguire Russell W <russell.w.mcguire@intel.com>
Cc: Spruit Neil R <neil.r.spruit@intel.com>
Cc: Zhou Cheng <cheng.zhou@intel.com>
Cc: Benemelis Mike G <mike.g.benemelis@intel.com>

Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Acked-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20200729102539.134731-2-ayaz.siddiqui@intel.com
Cc: stable@vger.kernel.org
(cherry picked from commit 4d8a5cfe3b131f60903949f998c5961cc922e0b0)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_mocs.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 632e08a4592b..b8f56e62158e 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -234,11 +234,17 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
 		   L3_1_UC)
 
 static const struct drm_i915_mocs_entry tgl_mocs_table[] = {
-	/* Base - Error (Reserved for Non-Use) */
-	MOCS_ENTRY(0, 0x0, 0x0),
-	/* Base - Reserved */
-	MOCS_ENTRY(1, 0x0, 0x0),
-
+	/*
+	 * NOTE:
+	 * Reserved and unspecified MOCS indices have been set to (L3 + LCC).
+	 * These reserved entries should never be used, they may be changed
+	 * to low performant variants with better coherency in the future if
+	 * more entries are needed. We are programming index I915_MOCS_PTE(1)
+	 * only, __init_mocs_table() take care to program unused index with
+	 * this entry.
+	 */
+	MOCS_ENTRY(1, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_3_WB),
 	GEN11_MOCS_ENTRIES,
 
 	/* Implicitly enable L1 - HDC:L1 + L3 + LLC */
-- 
cgit v1.2.3


From 1664ffee760a5d98952318fdd9b198fae396d660 Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@linux.intel.com>
Date: Thu, 15 Oct 2020 13:21:35 +0100
Subject: drm/i915: Mark ininitial fb obj as WT on eLLC machines to avoid rcu
 lockup during fbdev init
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently we leave the cache_level of the initial fb obj
set to NONE. This means on eLLC machines the first pin_to_display()
will try to switch it to WT which requires a vma unbind+bind.
If that happens during the fbdev initialization rcu does not
seem operational which causes the unbind to get stuck. To
most appearances this looks like a dead machine on boot.

Avoid the unbind by already marking the object cache_level
as WT when creating it. We still do an excplicit ggtt pin
which will rewrite the PTEs anyway, so they will match whatever
cache level we set.

Cc: <stable@vger.kernel.org> # v5.7+
Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2381
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20201007120329.17076-1-ville.syrjala@linux.intel.com
Link: https://patchwork.freedesktop.org/patch/msgid/20201015122138.30161-1-chris@chris-wilson.co.uk
(cherry picked from commit d46b60a2e8d246f1f0faa38e52f4f5a73858c338)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/display/intel_display.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index fac4657a286c..d9494fd7c305 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -3434,6 +3434,14 @@ initial_plane_vma(struct drm_i915_private *i915,
 	if (IS_ERR(obj))
 		return NULL;
 
+	/*
+	 * Mark it WT ahead of time to avoid changing the
+	 * cache_level during fbdev initialization. The
+	 * unbind there would get stuck waiting for rcu.
+	 */
+	i915_gem_object_set_cache_coherency(obj, HAS_WT(i915) ?
+					    I915_CACHE_WT : I915_CACHE_NONE);
+
 	switch (plane_config->tiling) {
 	case I915_TILING_NONE:
 		break;
-- 
cgit v1.2.3


From d5e8782129c22036425f29f9b6a254895482d7bd Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 15 Oct 2020 12:59:54 +0100
Subject: drm/i915/gem: Support parsing of oversize batches

Matthew Auld noted that on more recent systems (such as the parser for
gen9) we may have objects that are larger than expected by the GEM uAPI
(i.e. greater than u32). These objects would have incorrect implicit
batch lengths, causing the parser to reject them for being incomplete,
or worse.

Based on a patch by Matthew Auld.

Reported-by: Matthew Auld <matthew.auld@intel.com>
Fixes: 435e8fc059db ("drm/i915: Allow parsing of unsized batches")
Testcase: igt/gem_exec_params/larger-than-life-batch
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Jon Bloomfield <jon.bloomfield@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Cc: stable@vger.kernel.org
Link: https://patchwork.freedesktop.org/patch/msgid/20201015115954.871-1-chris@chris-wilson.co.uk
(cherry picked from commit 57b2d834bf235daab388c3ba12d035c820ae09c6)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 4b09bcd70cf4..1904e6e5ea64 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -287,8 +287,8 @@ struct i915_execbuffer {
 	u64 invalid_flags; /** Set of execobj.flags that are invalid */
 	u32 context_flags; /** Set of execobj.flags to insert from the ctx */
 
+	u64 batch_len; /** Length of batch within object */
 	u32 batch_start_offset; /** Location within object of batch */
-	u32 batch_len; /** Length of batch within object */
 	u32 batch_flags; /** Flags composed for emit_bb_start() */
 	struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */
 
@@ -871,6 +871,10 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 
 	if (eb->batch_len == 0)
 		eb->batch_len = eb->batch->vma->size - eb->batch_start_offset;
+	if (unlikely(eb->batch_len == 0)) { /* impossible! */
+		drm_dbg(&i915->drm, "Invalid batch length\n");
+		return -EINVAL;
+	}
 
 	return 0;
 
@@ -2424,7 +2428,7 @@ static int eb_parse(struct i915_execbuffer *eb)
 	struct drm_i915_private *i915 = eb->i915;
 	struct intel_gt_buffer_pool_node *pool = eb->batch_pool;
 	struct i915_vma *shadow, *trampoline, *batch;
-	unsigned int len;
+	unsigned long len;
 	int err;
 
 	if (!eb_use_cmdparser(eb)) {
@@ -2449,6 +2453,8 @@ static int eb_parse(struct i915_execbuffer *eb)
 	} else {
 		len += I915_CMD_PARSER_TRAMPOLINE_SIZE;
 	}
+	if (unlikely(len < eb->batch_len)) /* last paranoid check of overflow */
+		return -EINVAL;
 
 	if (!pool) {
 		pool = intel_gt_get_buffer_pool(eb->engine->gt, len);
-- 
cgit v1.2.3


From 9b99e5ba3e5d68039bd6b657e4bbe520a3521f4c Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 15 Oct 2020 20:50:23 +0100
Subject: drm/i915/gt: Delay execlist processing for tgl

When running gem_exec_nop, it floods the system with many requests (with
the goal of userspace submitting faster than the HW can process a single
empty batch). This causes the driver to continually resubmit new
requests onto the end of an active context, a flood of lite-restore
preemptions. If we time this just right, Tigerlake hangs.

Inserting a small delay between the processing of CS events and
submitting the next context, prevents the hang. Naturally it does not
occur with debugging enabled. The suspicion then is that this is related
to the issues with the CS event buffer, and inserting an mmio read of
the CS pointer status appears to be very successful in preventing the
hang. Other registers, or uncached reads, or plain mb, do not prevent
the hang, suggesting that register is key -- but that the hang can be
prevented by a simple udelay, suggests it is just a timing issue like
that encountered by commit 233c1ae3c83f ("drm/i915/gt: Wait for CSB
entries on Tigerlake"). Also note that the hang is not prevented by
applying CTX_DESC_FORCE_RESTORE, or by inserting a delay on the GPU
between requests.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Bruce Chang <yu.bruce.chang@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: stable@vger.kernel.org
Acked-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201015195023.32346-1-chris@chris-wilson.co.uk
(cherry picked from commit 6ca7217dffaf1abba91558e67a2efb655ac91405)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'drivers')

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 0412a44f25f2..bff237a8843a 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2649,6 +2649,9 @@ static void process_csb(struct intel_engine_cs *engine)
 			smp_wmb(); /* complete the seqlock */
 			WRITE_ONCE(execlists->active, execlists->inflight);
 
+			/* XXX Magic delay for tgl */
+			ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
+
 			WRITE_ONCE(execlists->pending[0], NULL);
 		} else {
 			if (GEM_WARN_ON(!*execlists->active)) {
-- 
cgit v1.2.3


From 64402570e12f7b63ab33fc4640d3709c9ce2b380 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 2 Oct 2020 09:34:25 +0100
Subject: drm/i915/gt: Undo forced context restores after trivial preemptions

We may try to preempt the currently executing request, only to find that
after unravelling all the dependencies that the original executing
context is still the earliest in the topological sort and re-submitted
back to HW (if we do detect some change in the ELSP that requires
re-submission). However, due to the way we check for wrap-around during
the unravelling, we mark any context that has been submitted just once
(i.e. with the rq->wa_tail set, but the ring->tail earlier) as
potentially wrapping and requiring a forced restore on resubmission.
This was expected to be not a problem, as it was anticipated that most
unwinding for preemption would result in a context switch and the few
that did not would be lost in the noise. It did not take long for
someone to find one particular workload where the cost of those extra
context restores was measurable.

However, since we know the wa_tail is of fixed size, and we know that a
request must be larger than the wa_tail itself, we can safely maintain
the check for request wrapping and check against a slightly future point
in the ring that includes an expected wa_tail. (That is if the
ring->tail is already set to rq->wa_tail, including another 8 bytes in
the check does not invalidate the incremental wrap detection.)

Fixes: 8ab3a3812aa9 ("drm/i915/gt: Incrementally check for rewinding")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Bruce Chang <yu.bruce.chang@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: <stable@vger.kernel.org> # v5.4+
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201002083425.4605-1-chris@chris-wilson.co.uk
(cherry picked from commit bb65548e3c6e299175a9e8c3e24b2b9577656a5d)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index bff237a8843a..341ff8e3af4c 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1140,9 +1140,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 
 			/* Check in case we rollback so far we wrap [size/2] */
 			if (intel_ring_direction(rq->ring,
-						 intel_ring_wrap(rq->ring,
-								 rq->tail),
-						 rq->ring->tail) > 0)
+						 rq->tail,
+						 rq->ring->tail + 8) > 0)
 				rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE;
 
 			active = rq;
-- 
cgit v1.2.3


From db9bc2d35f49fed248296d3216597b078c0bab37 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 16 Oct 2020 10:25:27 +0100
Subject: drm/i915: Use the active reference on the vma while capturing

During error capture, we need to take a reference to the vma from before
the reset in order to catpure the contents of the vma later. Currently
we are using both an active reference and a kref, but due to nature of
the i915_vma reference handling, that kref is on the vma->obj and not
the vma itself. This means the vma may be destroyed as soon as it is
idle, that is in between the i915_active_release(&vma->active) and the
i915_vma_put(vma):

<3> [197.866181] BUG: KASAN: use-after-free in intel_engine_coredump_add_vma+0x36c/0x4a0 [i915]
<3> [197.866339] Read of size 8 at addr ffff8881258cb800 by task gem_exec_captur/1041
<3> [197.866467]
<4> [197.866512] CPU: 2 PID: 1041 Comm: gem_exec_captur Not tainted 5.9.0-g5e4234f97efba-kasan_200+ #1
<4> [197.866521] Hardware name: Intel Corp. Broxton P/Apollolake RVP1A, BIOS APLKRVPA.X64.0150.B11.1608081044 08/08/2016
<4> [197.866530] Call Trace:
<4> [197.866549]  dump_stack+0x99/0xd0
<4> [197.866760]  ? intel_engine_coredump_add_vma+0x36c/0x4a0 [i915]
<4> [197.866783]  print_address_description.constprop.8+0x3e/0x60
<4> [197.866797]  ? kmsg_dump_rewind_nolock+0xd4/0xd4
<4> [197.866819]  ? lockdep_hardirqs_off+0xd4/0x120
<4> [197.867037]  ? intel_engine_coredump_add_vma+0x36c/0x4a0 [i915]
<4> [197.867249]  ? intel_engine_coredump_add_vma+0x36c/0x4a0 [i915]
<4> [197.867270]  kasan_report.cold.10+0x1f/0x37
<4> [197.867492]  ? intel_engine_coredump_add_vma+0x36c/0x4a0 [i915]
<4> [197.867710]  intel_engine_coredump_add_vma+0x36c/0x4a0 [i915]
<4> [197.867949]  i915_gpu_coredump.part.29+0x150/0x7b0 [i915]
<4> [197.868186]  i915_capture_error_state+0x5e/0xc0 [i915]
<4> [197.868396]  intel_gt_handle_error+0x6eb/0xa20 [i915]
<4> [197.868624]  ? intel_gt_reset_global+0x370/0x370 [i915]
<4> [197.868644]  ? check_flags+0x50/0x50
<4> [197.868662]  ? __lock_acquire+0xd59/0x6b00
<4> [197.868678]  ? register_lock_class+0x1ad0/0x1ad0
<4> [197.868944]  i915_wedged_set+0xcf/0x1b0 [i915]
<4> [197.869147]  ? i915_wedged_get+0x90/0x90 [i915]
<4> [197.869371]  ? i915_wedged_get+0x90/0x90 [i915]
<4> [197.869398]  simple_attr_write+0x153/0x1c0
<4> [197.869428]  full_proxy_write+0xee/0x180
<4> [197.869442]  ? __sb_start_write+0x1f3/0x310
<4> [197.869465]  vfs_write+0x1a3/0x640
<4> [197.869492]  ksys_write+0xec/0x1c0
<4> [197.869507]  ? __ia32_sys_read+0xa0/0xa0
<4> [197.869525]  ? lockdep_hardirqs_on_prepare+0x32b/0x4e0
<4> [197.869541]  ? syscall_enter_from_user_mode+0x1c/0x50
<4> [197.869566]  do_syscall_64+0x33/0x80
<4> [197.869579]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
<4> [197.869590] RIP: 0033:0x7fd8b7aee281
<4> [197.869604] Code: c3 0f 1f 84 00 00 00 00 00 48 8b 05 59 8d 20 00 c3 0f 1f 84 00 00 00 00 00 8b 05 8a d1 20 00 85 c0 75 16 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 57 f3 c3 0f 1f 44 00 00 41 54 55 49 89 d4 53
<4> [197.869613] RSP: 002b:00007ffea3b72008 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
<4> [197.869625] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fd8b7aee281
<4> [197.869633] RDX: 0000000000000002 RSI: 00007fd8b81a82e7 RDI: 000000000000000d
<4> [197.869641] RBP: 0000000000000002 R08: 0000000000000000 R09: 0000000000000034
<4> [197.869650] R10: 0000000000000000 R11: 0000000000000246 R12: 00007fd8b81a82e7
<4> [197.869658] R13: 000000000000000d R14: 0000000000000000 R15: 0000000000000000
<3> [197.869707]
<3> [197.869757] Allocated by task 1041:
<4> [197.869833]  kasan_save_stack+0x19/0x40
<4> [197.869843]  __kasan_kmalloc.constprop.5+0xc1/0xd0
<4> [197.869853]  kmem_cache_alloc+0x106/0x8e0
<4> [197.870059]  i915_vma_instance+0x212/0x1930 [i915]
<4> [197.870270]  eb_lookup_vmas+0xe06/0x1d10 [i915]
<4> [197.870475]  i915_gem_do_execbuffer+0x131d/0x4080 [i915]
<4> [197.870682]  i915_gem_execbuffer2_ioctl+0x103/0x5d0 [i915]
<4> [197.870701]  drm_ioctl_kernel+0x1d2/0x270
<4> [197.870710]  drm_ioctl+0x40d/0x85c
<4> [197.870721]  __x64_sys_ioctl+0x10d/0x170
<4> [197.870731]  do_syscall_64+0x33/0x80
<4> [197.870740]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
<3> [197.870748]
<3> [197.870798] Freed by task 22:
<4> [197.870865]  kasan_save_stack+0x19/0x40
<4> [197.870875]  kasan_set_track+0x1c/0x30
<4> [197.870884]  kasan_set_free_info+0x1b/0x30
<4> [197.870894]  __kasan_slab_free+0x111/0x160
<4> [197.870903]  kmem_cache_free+0xcd/0x710
<4> [197.871109]  i915_vma_parked+0x618/0x800 [i915]
<4> [197.871307]  __gt_park+0xdb/0x1e0 [i915]
<4> [197.871501]  ____intel_wakeref_put_last+0xb1/0x190 [i915]
<4> [197.871516]  process_one_work+0x8dc/0x15d0
<4> [197.871525]  worker_thread+0x82/0xb30
<4> [197.871535]  kthread+0x36d/0x440
<4> [197.871545]  ret_from_fork+0x22/0x30
<3> [197.871553]
<3> [197.871602] The buggy address belongs to the object at ffff8881258cb740
 which belongs to the cache i915_vma of size 968

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2553
Fixes: 2850748ef876 ("drm/i915: Pull i915_vma_pin under the vm->mutex")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: <stable@vger.kernel.org> # v5.5+
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201016092527.29039-1-chris@chris-wilson.co.uk
(cherry picked from commit 178536b8292ecd118f59d2fac4509c7e70b99854)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index a635ec8d0b94..cf6e47adfde6 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1312,7 +1312,7 @@ capture_vma(struct intel_engine_capture_vma *next,
 	}
 
 	strcpy(c->name, name);
-	c->vma = i915_vma_get(vma);
+	c->vma = vma; /* reference held while active */
 
 	c->next = next;
 	return c;
@@ -1402,7 +1402,6 @@ intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
 						 compress));
 
 		i915_active_release(&vma->active);
-		i915_vma_put(vma);
 
 		capture = this->next;
 		kfree(this);
-- 
cgit v1.2.3


From ca05277e40216979d9976613322e64db23a850e0 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 15 Sep 2020 14:49:20 +0100
Subject: drm/i915/gt: Widen CSB pointer to u64 for the parsers

A CSB entry is 64b, and it is simpler for us to treat it as an array of
64b entries than as an array of pairs of 32b entries.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200915134923.30088-1-chris@chris-wilson.co.uk
(cherry picked from commit f24a44e52fbc9881fc5f3bcef536831a15a439f3)
(cherry picked from commit 3d4dbe0e0f0d04ebcea917b7279586817da8cf46)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  2 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c          | 33 ++++++++++++++--------------
 2 files changed, 17 insertions(+), 18 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index c400aaa2287b..ee6312601c56 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -278,7 +278,7 @@ struct intel_engine_execlists {
 	 *
 	 * Note these register may be either mmio or HWSP shadow.
 	 */
-	u32 *csb_status;
+	u64 *csb_status;
 
 	/**
 	 * @csb_size: context status buffer FIFO size
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 341ff8e3af4c..1a7bbbb16356 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2463,7 +2463,7 @@ cancel_port_requests(struct intel_engine_execlists * const execlists)
 }
 
 static inline void
-invalidate_csb_entries(const u32 *first, const u32 *last)
+invalidate_csb_entries(const u64 *first, const u64 *last)
 {
 	clflush((void *)first);
 	clflush((void *)last);
@@ -2495,14 +2495,12 @@ invalidate_csb_entries(const u32 *first, const u32 *last)
  *     bits 47-57: sw context id of the lrc the GT switched away from
  *     bits 58-63: sw counter of the lrc the GT switched away from
  */
-static inline bool
-gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
+static inline bool gen12_csb_parse(const u64 *csb)
 {
-	u32 lower_dw = csb[0];
-	u32 upper_dw = csb[1];
-	bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
-	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
-	bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
+	u64 entry = READ_ONCE(*csb);
+	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
+	bool new_queue =
+		lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
 
 	/*
 	 * The context switch detail is not guaranteed to be 5 when a preemption
@@ -2512,7 +2510,7 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
 	 * would require some extra handling, but we don't support that.
 	 */
 	if (!ctx_away_valid || new_queue) {
-		GEM_BUG_ON(!ctx_to_valid);
+		GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(entry)));
 		return true;
 	}
 
@@ -2521,12 +2519,11 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
 	 * context switch on an unsuccessful wait instruction since we always
 	 * use polling mode.
 	 */
-	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
+	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(entry)));
 	return false;
 }
 
-static inline bool
-gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
+static inline bool gen8_csb_parse(const u64 *csb)
 {
 	return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
 }
@@ -2534,7 +2531,7 @@ gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
 static void process_csb(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	const u32 * const buf = execlists->csb_status;
+	const u64 * const buf = execlists->csb_status;
 	const u8 num_entries = execlists->csb_size;
 	u8 head, tail;
 
@@ -2615,12 +2612,14 @@ static void process_csb(struct intel_engine_cs *engine)
 		 */
 
 		ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
-			     head, buf[2 * head + 0], buf[2 * head + 1]);
+			     head,
+			     upper_32_bits(buf[head]),
+			     lower_32_bits(buf[head]));
 
 		if (INTEL_GEN(engine->i915) >= 12)
-			promote = gen12_csb_parse(execlists, buf + 2 * head);
+			promote = gen12_csb_parse(buf + head);
 		else
-			promote = gen8_csb_parse(execlists, buf + 2 * head);
+			promote = gen8_csb_parse(buf + head);
 		if (promote) {
 			struct i915_request * const *old = execlists->active;
 
@@ -5159,7 +5158,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
 	}
 
 	execlists->csb_status =
-		&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
+		(u64 *)&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
 
 	execlists->csb_write =
 		&engine->status_page.addr[intel_hws_csb_write_index(i915)];
-- 
cgit v1.2.3


From 4a9bb58aba6db4eba2a8b3aa1edc415c94a669a8 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 15 Sep 2020 14:49:21 +0100
Subject: drm/i915/gt: Wait for CSB entries on Tigerlake

On Tigerlake, we are seeing a repeat of commit d8f505311717 ("drm/i915/icl:
Forcibly evict stale csb entries") where, presumably, due to a missing
Global Observation Point synchronisation, the write pointer of the CSB
ringbuffer is updated _prior_ to the contents of the ringbuffer. That is
we see the GPU report more context-switch entries for us to parse, but
those entries have not been written, leading us to process stale events,
and eventually report a hung GPU.

However, this effect appears to be much more severe than we previously
saw on Icelake (though it might be best if we try the same approach
there as well and measure), and Bruce suggested the good idea of resetting
the CSB entry after use so that we can detect when it has been updated by
the GPU. By instrumenting how long that may be, we can set a reliable
upper bound for how long we should wait for:

    513 late, avg of 61 retries (590 ns), max of 1061 retries (10099 ns)

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2045
References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")
References: HSDES#22011327657, HSDES#1508287568
Suggested-by: Bruce Chang <yu.bruce.chang@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Bruce Chang <yu.bruce.chang@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: stable@vger.kernel.org # v5.4
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200915134923.30088-2-chris@chris-wilson.co.uk
(cherry picked from commit 233c1ae3c83f21046c6c4083da904163ece8f110)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 1a7bbbb16356..a32aabce7901 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2497,9 +2497,22 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
  */
 static inline bool gen12_csb_parse(const u64 *csb)
 {
-	u64 entry = READ_ONCE(*csb);
-	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
-	bool new_queue =
+	bool ctx_away_valid;
+	bool new_queue;
+	u64 entry;
+
+	/* HSD#22011248461 */
+	entry = READ_ONCE(*csb);
+	if (unlikely(entry == -1)) {
+		preempt_disable();
+		if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
+			GEM_WARN_ON("50us CSB timeout");
+		preempt_enable();
+	}
+	WRITE_ONCE(*(u64 *)csb, -1);
+
+	ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
+	new_queue =
 		lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
 
 	/*
@@ -4006,6 +4019,8 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
 	WRITE_ONCE(*execlists->csb_write, reset_value);
 	wmb(); /* Make sure this is visible to HW (paranoia?) */
 
+	/* Check that the GPU does indeed update the CSB entries! */
+	memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64));
 	invalidate_csb_entries(&execlists->csb_status[0],
 			       &execlists->csb_status[reset_value]);
 
-- 
cgit v1.2.3


From b8cff311a42df4f15d6432583573d828b5c7b12a Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 19 Oct 2020 09:34:44 +0100
Subject: drm/i915/gt: Onion unwind for scratch page allocation failure

In switching to using objects for our ppGTT scratch pages, care was not
taken to avoid trying to unref NULL objects on failure. And for gen6
ppGTT, it appears we forgot entirely to unwind after a partial allocation
failure.

Fixes: 89351925a477 ("drm/i915/gt: Switch to object allocations for page directories")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201019083444.1286-1-chris@chris-wilson.co.uk
(cherry picked from commit fa812ce96a46efc27cae4dcad866aaee9cb25d28)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 18 ++++++++++++------
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c |  3 ++-
 2 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index fd0d24d28763..c30adc05fa98 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -239,18 +239,24 @@ static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
 			       I915_CACHE_NONE, PTE_READ_ONLY);
 
 	vm->scratch[1] = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
-	if (IS_ERR(vm->scratch[1]))
-		return PTR_ERR(vm->scratch[1]);
+	if (IS_ERR(vm->scratch[1])) {
+		ret = PTR_ERR(vm->scratch[1]);
+		goto err_scratch0;
+	}
 
 	ret = pin_pt_dma(vm, vm->scratch[1]);
-	if (ret) {
-		i915_gem_object_put(vm->scratch[1]);
-		return ret;
-	}
+	if (ret)
+		goto err_scratch1;
 
 	fill32_px(vm->scratch[1], vm->scratch[0]->encode);
 
 	return 0;
+
+err_scratch1:
+	i915_gem_object_put(vm->scratch[1]);
+err_scratch0:
+	i915_gem_object_put(vm->scratch[0]);
+	return ret;
 }
 
 static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index eb64f474a78c..38c7069b7749 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -604,7 +604,8 @@ static int gen8_init_scratch(struct i915_address_space *vm)
 	return 0;
 
 free_scratch:
-	free_scratch(vm);
+	while (i--)
+		i915_gem_object_put(vm->scratch[i]);
 	return -ENOMEM;
 }
 
-- 
cgit v1.2.3


From 3da3c5c1c9825c24168f27b021339e90af37e969 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 19 Oct 2020 17:50:05 +0100
Subject: drm/i915: Exclude low pages (128KiB) of stolen from use

The GPU is trashing the low pages of its reserved memory upon reset. If
we are using this memory for ringbuffers, then we will dutiful resubmit
the trashed rings after the reset causing further resets, and worse. We
must exclude this range from our own use. The value of 128KiB was found
by empirical measurement (and verified now with a selftest) on gen9.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: stable@vger.kernel.org
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201019165005.18128-2-chris@chris-wilson.co.uk
(cherry picked from commit d3606757e611fbd48bb239e8c2fe9779b3f50035)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/Kconfig.debug         |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c |   6 +-
 drivers/gpu/drm/i915/gem/i915_gem_stolen.h |   2 +
 drivers/gpu/drm/i915/gt/selftest_reset.c   | 196 +++++++++++++++++++++++++++++
 4 files changed, 203 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index 1cb28c20807c..25cd9788a4d5 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -153,6 +153,7 @@ config DRM_I915_SELFTEST
 	select DRM_EXPORT_FOR_TESTS if m
 	select FAULT_INJECTION
 	select PRIME_NUMBERS
+	select CRC32
 	help
 	  Choose this option to allow the driver to perform selftests upon
 	  loading; also requires the i915.selftest=1 module parameter. To
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 0be5e8683337..84b2707d8b17 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -53,8 +53,10 @@ int i915_gem_stolen_insert_node(struct drm_i915_private *i915,
 				struct drm_mm_node *node, u64 size,
 				unsigned alignment)
 {
-	return i915_gem_stolen_insert_node_in_range(i915, node, size,
-						    alignment, 0, U64_MAX);
+	return i915_gem_stolen_insert_node_in_range(i915, node,
+						    size, alignment,
+						    I915_GEM_STOLEN_BIAS,
+						    U64_MAX);
 }
 
 void i915_gem_stolen_remove_node(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h
index e15c0adad8af..61e028063f9f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h
@@ -30,4 +30,6 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 					       resource_size_t stolen_offset,
 					       resource_size_t size);
 
+#define I915_GEM_STOLEN_BIAS SZ_128K
+
 #endif /* __I915_GEM_STOLEN_H__ */
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 35406ecdf0b2..ef5aeebbeeb0 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -3,9 +3,203 @@
  * Copyright © 2018 Intel Corporation
  */
 
+#include <linux/crc32.h>
+
+#include "gem/i915_gem_stolen.h"
+
+#include "i915_memcpy.h"
 #include "i915_selftest.h"
 #include "selftests/igt_reset.h"
 #include "selftests/igt_atomic.h"
+#include "selftests/igt_spinner.h"
+
+static int
+__igt_reset_stolen(struct intel_gt *gt,
+		   intel_engine_mask_t mask,
+		   const char *msg)
+{
+	struct i915_ggtt *ggtt = &gt->i915->ggtt;
+	const struct resource *dsm = &gt->i915->dsm;
+	resource_size_t num_pages, page;
+	struct intel_engine_cs *engine;
+	intel_wakeref_t wakeref;
+	enum intel_engine_id id;
+	struct igt_spinner spin;
+	long max, count;
+	void *tmp;
+	u32 *crc;
+	int err;
+
+	if (!drm_mm_node_allocated(&ggtt->error_capture))
+		return 0;
+
+	num_pages = resource_size(dsm) >> PAGE_SHIFT;
+	if (!num_pages)
+		return 0;
+
+	crc = kmalloc_array(num_pages, sizeof(u32), GFP_KERNEL);
+	if (!crc)
+		return -ENOMEM;
+
+	tmp = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!tmp) {
+		err = -ENOMEM;
+		goto err_crc;
+	}
+
+	igt_global_reset_lock(gt);
+	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+
+	err = igt_spinner_init(&spin, gt);
+	if (err)
+		goto err_lock;
+
+	for_each_engine(engine, gt, id) {
+		struct intel_context *ce;
+		struct i915_request *rq;
+
+		if (!(mask & engine->mask))
+			continue;
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce)) {
+			err = PTR_ERR(ce);
+			goto err_spin;
+		}
+		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
+		intel_context_put(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto err_spin;
+		}
+		i915_request_add(rq);
+	}
+
+	for (page = 0; page < num_pages; page++) {
+		dma_addr_t dma = (dma_addr_t)dsm->start + (page << PAGE_SHIFT);
+		void __iomem *s;
+		void *in;
+
+		ggtt->vm.insert_page(&ggtt->vm, dma,
+				     ggtt->error_capture.start,
+				     I915_CACHE_NONE, 0);
+		mb();
+
+		s = io_mapping_map_wc(&ggtt->iomap,
+				      ggtt->error_capture.start,
+				      PAGE_SIZE);
+
+		if (!__drm_mm_interval_first(&gt->i915->mm.stolen,
+					     page << PAGE_SHIFT,
+					     ((page + 1) << PAGE_SHIFT) - 1))
+			memset32(s, STACK_MAGIC, PAGE_SIZE / sizeof(u32));
+
+		in = s;
+		if (i915_memcpy_from_wc(tmp, s, PAGE_SIZE))
+			in = tmp;
+		crc[page] = crc32_le(0, in, PAGE_SIZE);
+
+		io_mapping_unmap(s);
+	}
+	mb();
+	ggtt->vm.clear_range(&ggtt->vm, ggtt->error_capture.start, PAGE_SIZE);
+
+	if (mask == ALL_ENGINES) {
+		intel_gt_reset(gt, mask, NULL);
+	} else {
+		for_each_engine(engine, gt, id) {
+			if (mask & engine->mask)
+				intel_engine_reset(engine, NULL);
+		}
+	}
+
+	max = -1;
+	count = 0;
+	for (page = 0; page < num_pages; page++) {
+		dma_addr_t dma = (dma_addr_t)dsm->start + (page << PAGE_SHIFT);
+		void __iomem *s;
+		void *in;
+		u32 x;
+
+		ggtt->vm.insert_page(&ggtt->vm, dma,
+				     ggtt->error_capture.start,
+				     I915_CACHE_NONE, 0);
+		mb();
+
+		s = io_mapping_map_wc(&ggtt->iomap,
+				      ggtt->error_capture.start,
+				      PAGE_SIZE);
+
+		in = s;
+		if (i915_memcpy_from_wc(tmp, s, PAGE_SIZE))
+			in = tmp;
+		x = crc32_le(0, in, PAGE_SIZE);
+
+		if (x != crc[page] &&
+		    !__drm_mm_interval_first(&gt->i915->mm.stolen,
+					     page << PAGE_SHIFT,
+					     ((page + 1) << PAGE_SHIFT) - 1)) {
+			pr_debug("unused stolen page %pa modified by GPU reset\n",
+				 &page);
+			if (count++ == 0)
+				igt_hexdump(in, PAGE_SIZE);
+			max = page;
+		}
+
+		io_mapping_unmap(s);
+	}
+	mb();
+	ggtt->vm.clear_range(&ggtt->vm, ggtt->error_capture.start, PAGE_SIZE);
+
+	if (count > 0) {
+		pr_info("%s reset clobbered %ld pages of stolen, last clobber at page %ld\n",
+			msg, count, max);
+	}
+	if (max >= I915_GEM_STOLEN_BIAS >> PAGE_SHIFT) {
+		pr_err("%s reset clobbered unreserved area [above %x] of stolen; may cause severe faults\n",
+		       msg, I915_GEM_STOLEN_BIAS);
+		err = -EINVAL;
+	}
+
+err_spin:
+	igt_spinner_fini(&spin);
+
+err_lock:
+	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+	igt_global_reset_unlock(gt);
+
+	kfree(tmp);
+err_crc:
+	kfree(crc);
+	return err;
+}
+
+static int igt_reset_device_stolen(void *arg)
+{
+	return __igt_reset_stolen(arg, ALL_ENGINES, "device");
+}
+
+static int igt_reset_engines_stolen(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err;
+
+	if (!intel_has_reset_engine(gt))
+		return 0;
+
+	for_each_engine(engine, gt, id) {
+		err = __igt_reset_stolen(gt, engine->mask, engine->name);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
 
 static int igt_global_reset(void *arg)
 {
@@ -164,6 +358,8 @@ int intel_reset_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_global_reset), /* attempt to recover GPU first */
+		SUBTEST(igt_reset_device_stolen),
+		SUBTEST(igt_reset_engines_stolen),
 		SUBTEST(igt_wedged_reset),
 		SUBTEST(igt_atomic_reset),
 		SUBTEST(igt_atomic_engine_reset),
-- 
cgit v1.2.3


From 8195400f7ea95399f721ad21f4d663a62c65036f Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 19 Oct 2020 11:15:23 +0100
Subject: drm/i915: Force VT'd workarounds when running as a guest OS

If i915.ko is being used as a passthrough device, it does not know if
the host is using intel_iommu. Mixing the iommu and gfx causes a few
issues (such as scanout overfetch) which we need to workaround inside
the driver, so if we detect we are running under a hypervisor, also
assume the device access is being virtualised.

Reported-by: Stefan Fritsch <sf@sfritsch.de>
Suggested-by: Stefan Fritsch <sf@sfritsch.de>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Stefan Fritsch <sf@sfritsch.de>
Cc: stable@vger.kernel.org
Tested-by: Stefan Fritsch <sf@sfritsch.de>
Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201019101523.4145-1-chris@chris-wilson.co.uk
(cherry picked from commit f566fdcd6cc49a9d5b5d782f56e3e7cb243f01b8)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index eef9a821c49c..8426d5974669 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -33,6 +33,8 @@
 #include <uapi/drm/i915_drm.h>
 #include <uapi/drm/drm_fourcc.h>
 
+#include <asm/hypervisor.h>
+
 #include <linux/io-mapping.h>
 #include <linux/i2c.h>
 #include <linux/i2c-algo-bit.h>
@@ -1760,7 +1762,9 @@ static inline bool intel_vtd_active(void)
 	if (intel_iommu_gfx_mapped)
 		return true;
 #endif
-	return false;
+
+	/* Running as a guest, we assume the host is enforcing VT'd */
+	return !hypervisor_is_type(X86_HYPER_NATIVE);
 }
 
 static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
-- 
cgit v1.2.3


From 5c6c13cd1102caf92d006a3cf4591c0229019daf Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 11 Aug 2020 10:25:32 +0100
Subject: drm/i915: Drop runtime-pm assert from vgpu io accessors

The "mmio" writes into vgpu registers are simple memory traps from the
guest into the host. We do not need to assert in the guest that the
device is awake for the io as we do not write to the device itself.

However, over time we have refactored all the mmio accessors with the
result that the vgpu reuses the gen2 accessors and so inherits the
assert for runtime-pm of the native device. The assert though has
actually been there since commit 3be0bf5acca6 ("drm/i915: Create vGPU
specific MMIO operations to reduce traps").

References: 3be0bf5acca6 ("drm/i915: Create vGPU specific MMIO operations to reduce traps")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Yan Zhao <yan.y.zhao@intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: stable@vger.kernel.org
Link: https://patchwork.freedesktop.org/patch/msgid/20200811092532.13753-1-chris@chris-wilson.co.uk
(cherry picked from commit 0e65ce24a33c1d37da4bf43c34e080334ec6cb60)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/intel_uncore.c | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 263ffcb832b7..97ded2a59cf4 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1209,6 +1209,18 @@ unclaimed_reg_debug(struct intel_uncore *uncore,
 		spin_unlock(&uncore->debug->lock);
 }
 
+#define __vgpu_read(x) \
+static u##x \
+vgpu_read##x(struct intel_uncore *uncore, i915_reg_t reg, bool trace) { \
+	u##x val = __raw_uncore_read##x(uncore, reg); \
+	trace_i915_reg_rw(false, reg, val, sizeof(val), trace); \
+	return val; \
+}
+__vgpu_read(8)
+__vgpu_read(16)
+__vgpu_read(32)
+__vgpu_read(64)
+
 #define GEN2_READ_HEADER(x) \
 	u##x val = 0; \
 	assert_rpm_wakelock_held(uncore->rpm);
@@ -1414,6 +1426,16 @@ __gen_reg_write_funcs(gen8);
 #undef GEN6_WRITE_FOOTER
 #undef GEN6_WRITE_HEADER
 
+#define __vgpu_write(x) \
+static void \
+vgpu_write##x(struct intel_uncore *uncore, i915_reg_t reg, u##x val, bool trace) { \
+	trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \
+	__raw_uncore_write##x(uncore, reg, val); \
+}
+__vgpu_write(8)
+__vgpu_write(16)
+__vgpu_write(32)
+
 #define ASSIGN_RAW_WRITE_MMIO_VFUNCS(uncore, x) \
 do { \
 	(uncore)->funcs.mmio_writeb = x##_write8; \
@@ -1735,7 +1757,10 @@ static void uncore_raw_init(struct intel_uncore *uncore)
 {
 	GEM_BUG_ON(intel_uncore_has_forcewake(uncore));
 
-	if (IS_GEN(uncore->i915, 5)) {
+	if (intel_vgpu_active(uncore->i915)) {
+		ASSIGN_RAW_WRITE_MMIO_VFUNCS(uncore, vgpu);
+		ASSIGN_RAW_READ_MMIO_VFUNCS(uncore, vgpu);
+	} else if (IS_GEN(uncore->i915, 5)) {
 		ASSIGN_RAW_WRITE_MMIO_VFUNCS(uncore, gen5);
 		ASSIGN_RAW_READ_MMIO_VFUNCS(uncore, gen5);
 	} else {
-- 
cgit v1.2.3