From 354842df3888d63dd0371358189cafde267b4a72 Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Thu, 17 Sep 2020 20:28:42 -0400 Subject: drm/i915/dp: Tweak initial dpcd backlight.enabled value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 79946723092b ("drm/i915: Assume 100% brightness when not in DPCD control mode"), we fixed the brightness level when DPCD control was not active to max brightness. This is as good as we can guess since most backlights go on full when uncontrolled. However in doing so we changed the semantics of the initial 'backlight.enabled' value. At least on Pixelbooks, they were relying on the brightness level in DP_EDP_BACKLIGHT_BRIGHTNESS_MSB to be 0 on boot such that enabled would be false. This causes the device to be enabled when the brightness is set. Without this, brightness control doesn't work. So by changing brightness to max, we also flipped enabled to be true on boot. To fix this, make enabled a function of brightness and backlight control mechanism. Fixes: 79946723092b ("drm/i915: Assume 100% brightness when not in DPCD control mode") Cc: Lyude Paul Cc: Jani Nikula Cc: Juha-Pekka Heikkila Cc: "Ville Syrjälä" Cc: Rodrigo Vivi Cc: Kevin Chowski > Signed-off-by: Sean Paul Reviewed-by: Lyude Paul Signed-off-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20200918002845.32766-1-sean@poorly.run (cherry picked from commit 4ade8f31c25bef7ce7ed4d7cbac17df7c4bad850) Signed-off-by: Rodrigo Vivi --- .../gpu/drm/i915/display/intel_dp_aux_backlight.c | 31 ++++++++++++++-------- 1 file changed, 20 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c index acbd7eb66cbe..036f504ac7db 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c @@ -52,17 +52,11 @@ static void set_aux_backlight_enable(struct intel_dp *intel_dp, bool enable) } } -/* - * Read the current backlight value from DPCD register(s) based - * on if 8-bit(MSB) or 16-bit(MSB and LSB) values are supported - */ -static u32 intel_dp_aux_get_backlight(struct intel_connector *connector) +static bool intel_dp_aux_backlight_dpcd_mode(struct intel_connector *connector) { struct intel_dp *intel_dp = intel_attached_dp(connector); struct drm_i915_private *i915 = dp_to_i915(intel_dp); - u8 read_val[2] = { 0x0 }; u8 mode_reg; - u16 level = 0; if (drm_dp_dpcd_readb(&intel_dp->aux, DP_EDP_BACKLIGHT_MODE_SET_REGISTER, @@ -70,15 +64,29 @@ static u32 intel_dp_aux_get_backlight(struct intel_connector *connector) drm_dbg_kms(&i915->drm, "Failed to read the DPCD register 0x%x\n", DP_EDP_BACKLIGHT_MODE_SET_REGISTER); - return 0; + return false; } + return (mode_reg & DP_EDP_BACKLIGHT_CONTROL_MODE_MASK) == + DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD; +} + +/* + * Read the current backlight value from DPCD register(s) based + * on if 8-bit(MSB) or 16-bit(MSB and LSB) values are supported + */ +static u32 intel_dp_aux_get_backlight(struct intel_connector *connector) +{ + struct intel_dp *intel_dp = intel_attached_dp(connector); + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + u8 read_val[2] = { 0x0 }; + u16 level = 0; + /* * If we're not in DPCD control mode yet, the programmed brightness * value is meaningless and we should assume max brightness */ - if ((mode_reg & DP_EDP_BACKLIGHT_CONTROL_MODE_MASK) != - DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD) + if (!intel_dp_aux_backlight_dpcd_mode(connector)) return connector->panel.backlight.max; if (drm_dp_dpcd_read(&intel_dp->aux, DP_EDP_BACKLIGHT_BRIGHTNESS_MSB, @@ -319,7 +327,8 @@ static int intel_dp_aux_setup_backlight(struct intel_connector *connector, panel->backlight.min = 0; panel->backlight.level = intel_dp_aux_get_backlight(connector); - panel->backlight.enabled = panel->backlight.level != 0; + panel->backlight.enabled = intel_dp_aux_backlight_dpcd_mode(connector) && + panel->backlight.level != 0; return 0; } -- cgit v1.2.3 From 849c0fe9e831dcebea1b46e2237e13f274a8756a Mon Sep 17 00:00:00 2001 From: Ayaz A Siddiqui Date: Wed, 29 Jul 2020 15:55:39 +0530 Subject: drm/i915/gt: Initialize reserved and unspecified MOCS indices In order to avoid functional breakage of mis-programmed applications that have grown to depend on unused MOCS entries, we are programming those entries to be equal to fully cached ("L3 + LLC") entry. These reserved and unspecified entries should not be used as they may be changed to less performant variants with better coherency in the future if more entries are needed. v2: As suggested by Lucas De Marchi to utilise __init_mocs_table for programming default value, setting I915_MOCS_PTE index of tgl_mocs_table with desired value. Cc: Chris Wilson Cc: Lucas De Marchi Cc: Tomasz Lis Cc: Matt Roper Cc: Joonas Lahtinen Cc: Francisco Jerez Cc: Mathew Alwin Cc: Mcguire Russell W Cc: Spruit Neil R Cc: Zhou Cheng Cc: Benemelis Mike G Signed-off-by: Ayaz A Siddiqui Reviewed-by: Lucas De Marchi Acked-by: Joonas Lahtinen Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200729102539.134731-2-ayaz.siddiqui@intel.com Cc: stable@vger.kernel.org (cherry picked from commit 4d8a5cfe3b131f60903949f998c5961cc922e0b0) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_mocs.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 632e08a4592b..b8f56e62158e 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -234,11 +234,17 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { L3_1_UC) static const struct drm_i915_mocs_entry tgl_mocs_table[] = { - /* Base - Error (Reserved for Non-Use) */ - MOCS_ENTRY(0, 0x0, 0x0), - /* Base - Reserved */ - MOCS_ENTRY(1, 0x0, 0x0), - + /* + * NOTE: + * Reserved and unspecified MOCS indices have been set to (L3 + LCC). + * These reserved entries should never be used, they may be changed + * to low performant variants with better coherency in the future if + * more entries are needed. We are programming index I915_MOCS_PTE(1) + * only, __init_mocs_table() take care to program unused index with + * this entry. + */ + MOCS_ENTRY(1, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), GEN11_MOCS_ENTRIES, /* Implicitly enable L1 - HDC:L1 + L3 + LLC */ -- cgit v1.2.3 From 1664ffee760a5d98952318fdd9b198fae396d660 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 15 Oct 2020 13:21:35 +0100 Subject: drm/i915: Mark ininitial fb obj as WT on eLLC machines to avoid rcu lockup during fbdev init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we leave the cache_level of the initial fb obj set to NONE. This means on eLLC machines the first pin_to_display() will try to switch it to WT which requires a vma unbind+bind. If that happens during the fbdev initialization rcu does not seem operational which causes the unbind to get stuck. To most appearances this looks like a dead machine on boot. Avoid the unbind by already marking the object cache_level as WT when creating it. We still do an excplicit ggtt pin which will rewrite the PTEs anyway, so they will match whatever cache level we set. Cc: # v5.7+ Suggested-by: Chris Wilson Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2381 Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20201007120329.17076-1-ville.syrjala@linux.intel.com Link: https://patchwork.freedesktop.org/patch/msgid/20201015122138.30161-1-chris@chris-wilson.co.uk (cherry picked from commit d46b60a2e8d246f1f0faa38e52f4f5a73858c338) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_display.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index fac4657a286c..d9494fd7c305 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -3434,6 +3434,14 @@ initial_plane_vma(struct drm_i915_private *i915, if (IS_ERR(obj)) return NULL; + /* + * Mark it WT ahead of time to avoid changing the + * cache_level during fbdev initialization. The + * unbind there would get stuck waiting for rcu. + */ + i915_gem_object_set_cache_coherency(obj, HAS_WT(i915) ? + I915_CACHE_WT : I915_CACHE_NONE); + switch (plane_config->tiling) { case I915_TILING_NONE: break; -- cgit v1.2.3 From d5e8782129c22036425f29f9b6a254895482d7bd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 15 Oct 2020 12:59:54 +0100 Subject: drm/i915/gem: Support parsing of oversize batches Matthew Auld noted that on more recent systems (such as the parser for gen9) we may have objects that are larger than expected by the GEM uAPI (i.e. greater than u32). These objects would have incorrect implicit batch lengths, causing the parser to reject them for being incomplete, or worse. Based on a patch by Matthew Auld. Reported-by: Matthew Auld Fixes: 435e8fc059db ("drm/i915: Allow parsing of unsized batches") Testcase: igt/gem_exec_params/larger-than-life-batch Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Mika Kuoppala Cc: Jon Bloomfield Reviewed-by: Matthew Auld Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20201015115954.871-1-chris@chris-wilson.co.uk (cherry picked from commit 57b2d834bf235daab388c3ba12d035c820ae09c6) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 4b09bcd70cf4..1904e6e5ea64 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -287,8 +287,8 @@ struct i915_execbuffer { u64 invalid_flags; /** Set of execobj.flags that are invalid */ u32 context_flags; /** Set of execobj.flags to insert from the ctx */ + u64 batch_len; /** Length of batch within object */ u32 batch_start_offset; /** Location within object of batch */ - u32 batch_len; /** Length of batch within object */ u32 batch_flags; /** Flags composed for emit_bb_start() */ struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */ @@ -871,6 +871,10 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) if (eb->batch_len == 0) eb->batch_len = eb->batch->vma->size - eb->batch_start_offset; + if (unlikely(eb->batch_len == 0)) { /* impossible! */ + drm_dbg(&i915->drm, "Invalid batch length\n"); + return -EINVAL; + } return 0; @@ -2424,7 +2428,7 @@ static int eb_parse(struct i915_execbuffer *eb) struct drm_i915_private *i915 = eb->i915; struct intel_gt_buffer_pool_node *pool = eb->batch_pool; struct i915_vma *shadow, *trampoline, *batch; - unsigned int len; + unsigned long len; int err; if (!eb_use_cmdparser(eb)) { @@ -2449,6 +2453,8 @@ static int eb_parse(struct i915_execbuffer *eb) } else { len += I915_CMD_PARSER_TRAMPOLINE_SIZE; } + if (unlikely(len < eb->batch_len)) /* last paranoid check of overflow */ + return -EINVAL; if (!pool) { pool = intel_gt_get_buffer_pool(eb->engine->gt, len); -- cgit v1.2.3 From 9b99e5ba3e5d68039bd6b657e4bbe520a3521f4c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 15 Oct 2020 20:50:23 +0100 Subject: drm/i915/gt: Delay execlist processing for tgl When running gem_exec_nop, it floods the system with many requests (with the goal of userspace submitting faster than the HW can process a single empty batch). This causes the driver to continually resubmit new requests onto the end of an active context, a flood of lite-restore preemptions. If we time this just right, Tigerlake hangs. Inserting a small delay between the processing of CS events and submitting the next context, prevents the hang. Naturally it does not occur with debugging enabled. The suspicion then is that this is related to the issues with the CS event buffer, and inserting an mmio read of the CS pointer status appears to be very successful in preventing the hang. Other registers, or uncached reads, or plain mb, do not prevent the hang, suggesting that register is key -- but that the hang can be prevented by a simple udelay, suggests it is just a timing issue like that encountered by commit 233c1ae3c83f ("drm/i915/gt: Wait for CSB entries on Tigerlake"). Also note that the hang is not prevented by applying CTX_DESC_FORCE_RESTORE, or by inserting a delay on the GPU between requests. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Bruce Chang Cc: Joonas Lahtinen Cc: stable@vger.kernel.org Acked-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20201015195023.32346-1-chris@chris-wilson.co.uk (cherry picked from commit 6ca7217dffaf1abba91558e67a2efb655ac91405) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_lrc.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 0412a44f25f2..bff237a8843a 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2649,6 +2649,9 @@ static void process_csb(struct intel_engine_cs *engine) smp_wmb(); /* complete the seqlock */ WRITE_ONCE(execlists->active, execlists->inflight); + /* XXX Magic delay for tgl */ + ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); + WRITE_ONCE(execlists->pending[0], NULL); } else { if (GEM_WARN_ON(!*execlists->active)) { -- cgit v1.2.3 From 64402570e12f7b63ab33fc4640d3709c9ce2b380 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 2 Oct 2020 09:34:25 +0100 Subject: drm/i915/gt: Undo forced context restores after trivial preemptions We may try to preempt the currently executing request, only to find that after unravelling all the dependencies that the original executing context is still the earliest in the topological sort and re-submitted back to HW (if we do detect some change in the ELSP that requires re-submission). However, due to the way we check for wrap-around during the unravelling, we mark any context that has been submitted just once (i.e. with the rq->wa_tail set, but the ring->tail earlier) as potentially wrapping and requiring a forced restore on resubmission. This was expected to be not a problem, as it was anticipated that most unwinding for preemption would result in a context switch and the few that did not would be lost in the noise. It did not take long for someone to find one particular workload where the cost of those extra context restores was measurable. However, since we know the wa_tail is of fixed size, and we know that a request must be larger than the wa_tail itself, we can safely maintain the check for request wrapping and check against a slightly future point in the ring that includes an expected wa_tail. (That is if the ring->tail is already set to rq->wa_tail, including another 8 bytes in the check does not invalidate the incremental wrap detection.) Fixes: 8ab3a3812aa9 ("drm/i915/gt: Incrementally check for rewinding") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Bruce Chang Cc: Ramalingam C Cc: Joonas Lahtinen Cc: # v5.4+ Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20201002083425.4605-1-chris@chris-wilson.co.uk (cherry picked from commit bb65548e3c6e299175a9e8c3e24b2b9577656a5d) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_lrc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index bff237a8843a..341ff8e3af4c 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1140,9 +1140,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) /* Check in case we rollback so far we wrap [size/2] */ if (intel_ring_direction(rq->ring, - intel_ring_wrap(rq->ring, - rq->tail), - rq->ring->tail) > 0) + rq->tail, + rq->ring->tail + 8) > 0) rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE; active = rq; -- cgit v1.2.3 From db9bc2d35f49fed248296d3216597b078c0bab37 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 16 Oct 2020 10:25:27 +0100 Subject: drm/i915: Use the active reference on the vma while capturing During error capture, we need to take a reference to the vma from before the reset in order to catpure the contents of the vma later. Currently we are using both an active reference and a kref, but due to nature of the i915_vma reference handling, that kref is on the vma->obj and not the vma itself. This means the vma may be destroyed as soon as it is idle, that is in between the i915_active_release(&vma->active) and the i915_vma_put(vma): <3> [197.866181] BUG: KASAN: use-after-free in intel_engine_coredump_add_vma+0x36c/0x4a0 [i915] <3> [197.866339] Read of size 8 at addr ffff8881258cb800 by task gem_exec_captur/1041 <3> [197.866467] <4> [197.866512] CPU: 2 PID: 1041 Comm: gem_exec_captur Not tainted 5.9.0-g5e4234f97efba-kasan_200+ #1 <4> [197.866521] Hardware name: Intel Corp. Broxton P/Apollolake RVP1A, BIOS APLKRVPA.X64.0150.B11.1608081044 08/08/2016 <4> [197.866530] Call Trace: <4> [197.866549] dump_stack+0x99/0xd0 <4> [197.866760] ? intel_engine_coredump_add_vma+0x36c/0x4a0 [i915] <4> [197.866783] print_address_description.constprop.8+0x3e/0x60 <4> [197.866797] ? kmsg_dump_rewind_nolock+0xd4/0xd4 <4> [197.866819] ? lockdep_hardirqs_off+0xd4/0x120 <4> [197.867037] ? intel_engine_coredump_add_vma+0x36c/0x4a0 [i915] <4> [197.867249] ? intel_engine_coredump_add_vma+0x36c/0x4a0 [i915] <4> [197.867270] kasan_report.cold.10+0x1f/0x37 <4> [197.867492] ? intel_engine_coredump_add_vma+0x36c/0x4a0 [i915] <4> [197.867710] intel_engine_coredump_add_vma+0x36c/0x4a0 [i915] <4> [197.867949] i915_gpu_coredump.part.29+0x150/0x7b0 [i915] <4> [197.868186] i915_capture_error_state+0x5e/0xc0 [i915] <4> [197.868396] intel_gt_handle_error+0x6eb/0xa20 [i915] <4> [197.868624] ? intel_gt_reset_global+0x370/0x370 [i915] <4> [197.868644] ? check_flags+0x50/0x50 <4> [197.868662] ? __lock_acquire+0xd59/0x6b00 <4> [197.868678] ? register_lock_class+0x1ad0/0x1ad0 <4> [197.868944] i915_wedged_set+0xcf/0x1b0 [i915] <4> [197.869147] ? i915_wedged_get+0x90/0x90 [i915] <4> [197.869371] ? i915_wedged_get+0x90/0x90 [i915] <4> [197.869398] simple_attr_write+0x153/0x1c0 <4> [197.869428] full_proxy_write+0xee/0x180 <4> [197.869442] ? __sb_start_write+0x1f3/0x310 <4> [197.869465] vfs_write+0x1a3/0x640 <4> [197.869492] ksys_write+0xec/0x1c0 <4> [197.869507] ? __ia32_sys_read+0xa0/0xa0 <4> [197.869525] ? lockdep_hardirqs_on_prepare+0x32b/0x4e0 <4> [197.869541] ? syscall_enter_from_user_mode+0x1c/0x50 <4> [197.869566] do_syscall_64+0x33/0x80 <4> [197.869579] entry_SYSCALL_64_after_hwframe+0x44/0xa9 <4> [197.869590] RIP: 0033:0x7fd8b7aee281 <4> [197.869604] Code: c3 0f 1f 84 00 00 00 00 00 48 8b 05 59 8d 20 00 c3 0f 1f 84 00 00 00 00 00 8b 05 8a d1 20 00 85 c0 75 16 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 57 f3 c3 0f 1f 44 00 00 41 54 55 49 89 d4 53 <4> [197.869613] RSP: 002b:00007ffea3b72008 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 <4> [197.869625] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fd8b7aee281 <4> [197.869633] RDX: 0000000000000002 RSI: 00007fd8b81a82e7 RDI: 000000000000000d <4> [197.869641] RBP: 0000000000000002 R08: 0000000000000000 R09: 0000000000000034 <4> [197.869650] R10: 0000000000000000 R11: 0000000000000246 R12: 00007fd8b81a82e7 <4> [197.869658] R13: 000000000000000d R14: 0000000000000000 R15: 0000000000000000 <3> [197.869707] <3> [197.869757] Allocated by task 1041: <4> [197.869833] kasan_save_stack+0x19/0x40 <4> [197.869843] __kasan_kmalloc.constprop.5+0xc1/0xd0 <4> [197.869853] kmem_cache_alloc+0x106/0x8e0 <4> [197.870059] i915_vma_instance+0x212/0x1930 [i915] <4> [197.870270] eb_lookup_vmas+0xe06/0x1d10 [i915] <4> [197.870475] i915_gem_do_execbuffer+0x131d/0x4080 [i915] <4> [197.870682] i915_gem_execbuffer2_ioctl+0x103/0x5d0 [i915] <4> [197.870701] drm_ioctl_kernel+0x1d2/0x270 <4> [197.870710] drm_ioctl+0x40d/0x85c <4> [197.870721] __x64_sys_ioctl+0x10d/0x170 <4> [197.870731] do_syscall_64+0x33/0x80 <4> [197.870740] entry_SYSCALL_64_after_hwframe+0x44/0xa9 <3> [197.870748] <3> [197.870798] Freed by task 22: <4> [197.870865] kasan_save_stack+0x19/0x40 <4> [197.870875] kasan_set_track+0x1c/0x30 <4> [197.870884] kasan_set_free_info+0x1b/0x30 <4> [197.870894] __kasan_slab_free+0x111/0x160 <4> [197.870903] kmem_cache_free+0xcd/0x710 <4> [197.871109] i915_vma_parked+0x618/0x800 [i915] <4> [197.871307] __gt_park+0xdb/0x1e0 [i915] <4> [197.871501] ____intel_wakeref_put_last+0xb1/0x190 [i915] <4> [197.871516] process_one_work+0x8dc/0x15d0 <4> [197.871525] worker_thread+0x82/0xb30 <4> [197.871535] kthread+0x36d/0x440 <4> [197.871545] ret_from_fork+0x22/0x30 <3> [197.871553] <3> [197.871602] The buggy address belongs to the object at ffff8881258cb740 which belongs to the cache i915_vma of size 968 Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2553 Fixes: 2850748ef876 ("drm/i915: Pull i915_vma_pin under the vm->mutex") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: # v5.5+ Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20201016092527.29039-1-chris@chris-wilson.co.uk (cherry picked from commit 178536b8292ecd118f59d2fac4509c7e70b99854) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_gpu_error.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index a635ec8d0b94..cf6e47adfde6 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1312,7 +1312,7 @@ capture_vma(struct intel_engine_capture_vma *next, } strcpy(c->name, name); - c->vma = i915_vma_get(vma); + c->vma = vma; /* reference held while active */ c->next = next; return c; @@ -1402,7 +1402,6 @@ intel_engine_coredump_add_vma(struct intel_engine_coredump *ee, compress)); i915_active_release(&vma->active); - i915_vma_put(vma); capture = this->next; kfree(this); -- cgit v1.2.3 From ca05277e40216979d9976613322e64db23a850e0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 15 Sep 2020 14:49:20 +0100 Subject: drm/i915/gt: Widen CSB pointer to u64 for the parsers A CSB entry is 64b, and it is simpler for us to treat it as an array of 64b entries than as an array of pairs of 32b entries. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200915134923.30088-1-chris@chris-wilson.co.uk (cherry picked from commit f24a44e52fbc9881fc5f3bcef536831a15a439f3) (cherry picked from commit 3d4dbe0e0f0d04ebcea917b7279586817da8cf46) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_engine_types.h | 2 +- drivers/gpu/drm/i915/gt/intel_lrc.c | 33 ++++++++++++++-------------- 2 files changed, 17 insertions(+), 18 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index c400aaa2287b..ee6312601c56 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -278,7 +278,7 @@ struct intel_engine_execlists { * * Note these register may be either mmio or HWSP shadow. */ - u32 *csb_status; + u64 *csb_status; /** * @csb_size: context status buffer FIFO size diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 341ff8e3af4c..1a7bbbb16356 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2463,7 +2463,7 @@ cancel_port_requests(struct intel_engine_execlists * const execlists) } static inline void -invalidate_csb_entries(const u32 *first, const u32 *last) +invalidate_csb_entries(const u64 *first, const u64 *last) { clflush((void *)first); clflush((void *)last); @@ -2495,14 +2495,12 @@ invalidate_csb_entries(const u32 *first, const u32 *last) * bits 47-57: sw context id of the lrc the GT switched away from * bits 58-63: sw counter of the lrc the GT switched away from */ -static inline bool -gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) +static inline bool gen12_csb_parse(const u64 *csb) { - u32 lower_dw = csb[0]; - u32 upper_dw = csb[1]; - bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw); - bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw); - bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE; + u64 entry = READ_ONCE(*csb); + bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry)); + bool new_queue = + lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE; /* * The context switch detail is not guaranteed to be 5 when a preemption @@ -2512,7 +2510,7 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) * would require some extra handling, but we don't support that. */ if (!ctx_away_valid || new_queue) { - GEM_BUG_ON(!ctx_to_valid); + GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(entry))); return true; } @@ -2521,12 +2519,11 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) * context switch on an unsuccessful wait instruction since we always * use polling mode. */ - GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw)); + GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(entry))); return false; } -static inline bool -gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) +static inline bool gen8_csb_parse(const u64 *csb) { return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED); } @@ -2534,7 +2531,7 @@ gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) static void process_csb(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; - const u32 * const buf = execlists->csb_status; + const u64 * const buf = execlists->csb_status; const u8 num_entries = execlists->csb_size; u8 head, tail; @@ -2615,12 +2612,14 @@ static void process_csb(struct intel_engine_cs *engine) */ ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n", - head, buf[2 * head + 0], buf[2 * head + 1]); + head, + upper_32_bits(buf[head]), + lower_32_bits(buf[head])); if (INTEL_GEN(engine->i915) >= 12) - promote = gen12_csb_parse(execlists, buf + 2 * head); + promote = gen12_csb_parse(buf + head); else - promote = gen8_csb_parse(execlists, buf + 2 * head); + promote = gen8_csb_parse(buf + head); if (promote) { struct i915_request * const *old = execlists->active; @@ -5159,7 +5158,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) } execlists->csb_status = - &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; + (u64 *)&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; execlists->csb_write = &engine->status_page.addr[intel_hws_csb_write_index(i915)]; -- cgit v1.2.3 From 4a9bb58aba6db4eba2a8b3aa1edc415c94a669a8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 15 Sep 2020 14:49:21 +0100 Subject: drm/i915/gt: Wait for CSB entries on Tigerlake On Tigerlake, we are seeing a repeat of commit d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries") where, presumably, due to a missing Global Observation Point synchronisation, the write pointer of the CSB ringbuffer is updated _prior_ to the contents of the ringbuffer. That is we see the GPU report more context-switch entries for us to parse, but those entries have not been written, leading us to process stale events, and eventually report a hung GPU. However, this effect appears to be much more severe than we previously saw on Icelake (though it might be best if we try the same approach there as well and measure), and Bruce suggested the good idea of resetting the CSB entry after use so that we can detect when it has been updated by the GPU. By instrumenting how long that may be, we can set a reliable upper bound for how long we should wait for: 513 late, avg of 61 retries (590 ns), max of 1061 retries (10099 ns) Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2045 References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries") References: HSDES#22011327657, HSDES#1508287568 Suggested-by: Bruce Chang Signed-off-by: Chris Wilson Cc: Bruce Chang Cc: Mika Kuoppala Cc: stable@vger.kernel.org # v5.4 Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200915134923.30088-2-chris@chris-wilson.co.uk (cherry picked from commit 233c1ae3c83f21046c6c4083da904163ece8f110) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_lrc.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 1a7bbbb16356..a32aabce7901 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2497,9 +2497,22 @@ invalidate_csb_entries(const u64 *first, const u64 *last) */ static inline bool gen12_csb_parse(const u64 *csb) { - u64 entry = READ_ONCE(*csb); - bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry)); - bool new_queue = + bool ctx_away_valid; + bool new_queue; + u64 entry; + + /* HSD#22011248461 */ + entry = READ_ONCE(*csb); + if (unlikely(entry == -1)) { + preempt_disable(); + if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50)) + GEM_WARN_ON("50us CSB timeout"); + preempt_enable(); + } + WRITE_ONCE(*(u64 *)csb, -1); + + ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry)); + new_queue = lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE; /* @@ -4006,6 +4019,8 @@ static void reset_csb_pointers(struct intel_engine_cs *engine) WRITE_ONCE(*execlists->csb_write, reset_value); wmb(); /* Make sure this is visible to HW (paranoia?) */ + /* Check that the GPU does indeed update the CSB entries! */ + memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64)); invalidate_csb_entries(&execlists->csb_status[0], &execlists->csb_status[reset_value]); -- cgit v1.2.3 From b8cff311a42df4f15d6432583573d828b5c7b12a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 19 Oct 2020 09:34:44 +0100 Subject: drm/i915/gt: Onion unwind for scratch page allocation failure In switching to using objects for our ppGTT scratch pages, care was not taken to avoid trying to unref NULL objects on failure. And for gen6 ppGTT, it appears we forgot entirely to unwind after a partial allocation failure. Fixes: 89351925a477 ("drm/i915/gt: Switch to object allocations for page directories") Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Joonas Lahtinen Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20201019083444.1286-1-chris@chris-wilson.co.uk (cherry picked from commit fa812ce96a46efc27cae4dcad866aaee9cb25d28) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 18 ++++++++++++------ drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 3 ++- 2 files changed, 14 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index fd0d24d28763..c30adc05fa98 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -239,18 +239,24 @@ static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt) I915_CACHE_NONE, PTE_READ_ONLY); vm->scratch[1] = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); - if (IS_ERR(vm->scratch[1])) - return PTR_ERR(vm->scratch[1]); + if (IS_ERR(vm->scratch[1])) { + ret = PTR_ERR(vm->scratch[1]); + goto err_scratch0; + } ret = pin_pt_dma(vm, vm->scratch[1]); - if (ret) { - i915_gem_object_put(vm->scratch[1]); - return ret; - } + if (ret) + goto err_scratch1; fill32_px(vm->scratch[1], vm->scratch[0]->encode); return 0; + +err_scratch1: + i915_gem_object_put(vm->scratch[1]); +err_scratch0: + i915_gem_object_put(vm->scratch[0]); + return ret; } static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt) diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index eb64f474a78c..38c7069b7749 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -604,7 +604,8 @@ static int gen8_init_scratch(struct i915_address_space *vm) return 0; free_scratch: - free_scratch(vm); + while (i--) + i915_gem_object_put(vm->scratch[i]); return -ENOMEM; } -- cgit v1.2.3 From 3da3c5c1c9825c24168f27b021339e90af37e969 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 19 Oct 2020 17:50:05 +0100 Subject: drm/i915: Exclude low pages (128KiB) of stolen from use The GPU is trashing the low pages of its reserved memory upon reset. If we are using this memory for ringbuffers, then we will dutiful resubmit the trashed rings after the reset causing further resets, and worse. We must exclude this range from our own use. The value of 128KiB was found by empirical measurement (and verified now with a selftest) on gen9. Signed-off-by: Chris Wilson Cc: stable@vger.kernel.org Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20201019165005.18128-2-chris@chris-wilson.co.uk (cherry picked from commit d3606757e611fbd48bb239e8c2fe9779b3f50035) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/Kconfig.debug | 1 + drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 6 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.h | 2 + drivers/gpu/drm/i915/gt/selftest_reset.c | 196 +++++++++++++++++++++++++++++ 4 files changed, 203 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 1cb28c20807c..25cd9788a4d5 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -153,6 +153,7 @@ config DRM_I915_SELFTEST select DRM_EXPORT_FOR_TESTS if m select FAULT_INJECTION select PRIME_NUMBERS + select CRC32 help Choose this option to allow the driver to perform selftests upon loading; also requires the i915.selftest=1 module parameter. To diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 0be5e8683337..84b2707d8b17 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -53,8 +53,10 @@ int i915_gem_stolen_insert_node(struct drm_i915_private *i915, struct drm_mm_node *node, u64 size, unsigned alignment) { - return i915_gem_stolen_insert_node_in_range(i915, node, size, - alignment, 0, U64_MAX); + return i915_gem_stolen_insert_node_in_range(i915, node, + size, alignment, + I915_GEM_STOLEN_BIAS, + U64_MAX); } void i915_gem_stolen_remove_node(struct drm_i915_private *i915, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h index e15c0adad8af..61e028063f9f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h @@ -30,4 +30,6 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv resource_size_t stolen_offset, resource_size_t size); +#define I915_GEM_STOLEN_BIAS SZ_128K + #endif /* __I915_GEM_STOLEN_H__ */ diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 35406ecdf0b2..ef5aeebbeeb0 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -3,9 +3,203 @@ * Copyright © 2018 Intel Corporation */ +#include + +#include "gem/i915_gem_stolen.h" + +#include "i915_memcpy.h" #include "i915_selftest.h" #include "selftests/igt_reset.h" #include "selftests/igt_atomic.h" +#include "selftests/igt_spinner.h" + +static int +__igt_reset_stolen(struct intel_gt *gt, + intel_engine_mask_t mask, + const char *msg) +{ + struct i915_ggtt *ggtt = >->i915->ggtt; + const struct resource *dsm = >->i915->dsm; + resource_size_t num_pages, page; + struct intel_engine_cs *engine; + intel_wakeref_t wakeref; + enum intel_engine_id id; + struct igt_spinner spin; + long max, count; + void *tmp; + u32 *crc; + int err; + + if (!drm_mm_node_allocated(&ggtt->error_capture)) + return 0; + + num_pages = resource_size(dsm) >> PAGE_SHIFT; + if (!num_pages) + return 0; + + crc = kmalloc_array(num_pages, sizeof(u32), GFP_KERNEL); + if (!crc) + return -ENOMEM; + + tmp = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!tmp) { + err = -ENOMEM; + goto err_crc; + } + + igt_global_reset_lock(gt); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); + + err = igt_spinner_init(&spin, gt); + if (err) + goto err_lock; + + for_each_engine(engine, gt, id) { + struct intel_context *ce; + struct i915_request *rq; + + if (!(mask & engine->mask)) + continue; + + if (!intel_engine_can_store_dword(engine)) + continue; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto err_spin; + } + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_spin; + } + i915_request_add(rq); + } + + for (page = 0; page < num_pages; page++) { + dma_addr_t dma = (dma_addr_t)dsm->start + (page << PAGE_SHIFT); + void __iomem *s; + void *in; + + ggtt->vm.insert_page(&ggtt->vm, dma, + ggtt->error_capture.start, + I915_CACHE_NONE, 0); + mb(); + + s = io_mapping_map_wc(&ggtt->iomap, + ggtt->error_capture.start, + PAGE_SIZE); + + if (!__drm_mm_interval_first(>->i915->mm.stolen, + page << PAGE_SHIFT, + ((page + 1) << PAGE_SHIFT) - 1)) + memset32(s, STACK_MAGIC, PAGE_SIZE / sizeof(u32)); + + in = s; + if (i915_memcpy_from_wc(tmp, s, PAGE_SIZE)) + in = tmp; + crc[page] = crc32_le(0, in, PAGE_SIZE); + + io_mapping_unmap(s); + } + mb(); + ggtt->vm.clear_range(&ggtt->vm, ggtt->error_capture.start, PAGE_SIZE); + + if (mask == ALL_ENGINES) { + intel_gt_reset(gt, mask, NULL); + } else { + for_each_engine(engine, gt, id) { + if (mask & engine->mask) + intel_engine_reset(engine, NULL); + } + } + + max = -1; + count = 0; + for (page = 0; page < num_pages; page++) { + dma_addr_t dma = (dma_addr_t)dsm->start + (page << PAGE_SHIFT); + void __iomem *s; + void *in; + u32 x; + + ggtt->vm.insert_page(&ggtt->vm, dma, + ggtt->error_capture.start, + I915_CACHE_NONE, 0); + mb(); + + s = io_mapping_map_wc(&ggtt->iomap, + ggtt->error_capture.start, + PAGE_SIZE); + + in = s; + if (i915_memcpy_from_wc(tmp, s, PAGE_SIZE)) + in = tmp; + x = crc32_le(0, in, PAGE_SIZE); + + if (x != crc[page] && + !__drm_mm_interval_first(>->i915->mm.stolen, + page << PAGE_SHIFT, + ((page + 1) << PAGE_SHIFT) - 1)) { + pr_debug("unused stolen page %pa modified by GPU reset\n", + &page); + if (count++ == 0) + igt_hexdump(in, PAGE_SIZE); + max = page; + } + + io_mapping_unmap(s); + } + mb(); + ggtt->vm.clear_range(&ggtt->vm, ggtt->error_capture.start, PAGE_SIZE); + + if (count > 0) { + pr_info("%s reset clobbered %ld pages of stolen, last clobber at page %ld\n", + msg, count, max); + } + if (max >= I915_GEM_STOLEN_BIAS >> PAGE_SHIFT) { + pr_err("%s reset clobbered unreserved area [above %x] of stolen; may cause severe faults\n", + msg, I915_GEM_STOLEN_BIAS); + err = -EINVAL; + } + +err_spin: + igt_spinner_fini(&spin); + +err_lock: + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + igt_global_reset_unlock(gt); + + kfree(tmp); +err_crc: + kfree(crc); + return err; +} + +static int igt_reset_device_stolen(void *arg) +{ + return __igt_reset_stolen(arg, ALL_ENGINES, "device"); +} + +static int igt_reset_engines_stolen(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err; + + if (!intel_has_reset_engine(gt)) + return 0; + + for_each_engine(engine, gt, id) { + err = __igt_reset_stolen(gt, engine->mask, engine->name); + if (err) + return err; + } + + return 0; +} static int igt_global_reset(void *arg) { @@ -164,6 +358,8 @@ int intel_reset_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_global_reset), /* attempt to recover GPU first */ + SUBTEST(igt_reset_device_stolen), + SUBTEST(igt_reset_engines_stolen), SUBTEST(igt_wedged_reset), SUBTEST(igt_atomic_reset), SUBTEST(igt_atomic_engine_reset), -- cgit v1.2.3 From 8195400f7ea95399f721ad21f4d663a62c65036f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 19 Oct 2020 11:15:23 +0100 Subject: drm/i915: Force VT'd workarounds when running as a guest OS If i915.ko is being used as a passthrough device, it does not know if the host is using intel_iommu. Mixing the iommu and gfx causes a few issues (such as scanout overfetch) which we need to workaround inside the driver, so if we detect we are running under a hypervisor, also assume the device access is being virtualised. Reported-by: Stefan Fritsch Suggested-by: Stefan Fritsch Signed-off-by: Chris Wilson Cc: Zhenyu Wang Cc: Joonas Lahtinen Cc: Stefan Fritsch Cc: stable@vger.kernel.org Tested-by: Stefan Fritsch Reviewed-by: Zhenyu Wang Link: https://patchwork.freedesktop.org/patch/msgid/20201019101523.4145-1-chris@chris-wilson.co.uk (cherry picked from commit f566fdcd6cc49a9d5b5d782f56e3e7cb243f01b8) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_drv.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index eef9a821c49c..8426d5974669 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -33,6 +33,8 @@ #include #include +#include + #include #include #include @@ -1760,7 +1762,9 @@ static inline bool intel_vtd_active(void) if (intel_iommu_gfx_mapped) return true; #endif - return false; + + /* Running as a guest, we assume the host is enforcing VT'd */ + return !hypervisor_is_type(X86_HYPER_NATIVE); } static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv) -- cgit v1.2.3 From 5c6c13cd1102caf92d006a3cf4591c0229019daf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 11 Aug 2020 10:25:32 +0100 Subject: drm/i915: Drop runtime-pm assert from vgpu io accessors The "mmio" writes into vgpu registers are simple memory traps from the guest into the host. We do not need to assert in the guest that the device is awake for the io as we do not write to the device itself. However, over time we have refactored all the mmio accessors with the result that the vgpu reuses the gen2 accessors and so inherits the assert for runtime-pm of the native device. The assert though has actually been there since commit 3be0bf5acca6 ("drm/i915: Create vGPU specific MMIO operations to reduce traps"). References: 3be0bf5acca6 ("drm/i915: Create vGPU specific MMIO operations to reduce traps") Signed-off-by: Chris Wilson Cc: Yan Zhao Cc: Zhenyu Wang Reviewed-by: Zhenyu Wang Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20200811092532.13753-1-chris@chris-wilson.co.uk (cherry picked from commit 0e65ce24a33c1d37da4bf43c34e080334ec6cb60) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_uncore.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 263ffcb832b7..97ded2a59cf4 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1209,6 +1209,18 @@ unclaimed_reg_debug(struct intel_uncore *uncore, spin_unlock(&uncore->debug->lock); } +#define __vgpu_read(x) \ +static u##x \ +vgpu_read##x(struct intel_uncore *uncore, i915_reg_t reg, bool trace) { \ + u##x val = __raw_uncore_read##x(uncore, reg); \ + trace_i915_reg_rw(false, reg, val, sizeof(val), trace); \ + return val; \ +} +__vgpu_read(8) +__vgpu_read(16) +__vgpu_read(32) +__vgpu_read(64) + #define GEN2_READ_HEADER(x) \ u##x val = 0; \ assert_rpm_wakelock_held(uncore->rpm); @@ -1414,6 +1426,16 @@ __gen_reg_write_funcs(gen8); #undef GEN6_WRITE_FOOTER #undef GEN6_WRITE_HEADER +#define __vgpu_write(x) \ +static void \ +vgpu_write##x(struct intel_uncore *uncore, i915_reg_t reg, u##x val, bool trace) { \ + trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \ + __raw_uncore_write##x(uncore, reg, val); \ +} +__vgpu_write(8) +__vgpu_write(16) +__vgpu_write(32) + #define ASSIGN_RAW_WRITE_MMIO_VFUNCS(uncore, x) \ do { \ (uncore)->funcs.mmio_writeb = x##_write8; \ @@ -1735,7 +1757,10 @@ static void uncore_raw_init(struct intel_uncore *uncore) { GEM_BUG_ON(intel_uncore_has_forcewake(uncore)); - if (IS_GEN(uncore->i915, 5)) { + if (intel_vgpu_active(uncore->i915)) { + ASSIGN_RAW_WRITE_MMIO_VFUNCS(uncore, vgpu); + ASSIGN_RAW_READ_MMIO_VFUNCS(uncore, vgpu); + } else if (IS_GEN(uncore->i915, 5)) { ASSIGN_RAW_WRITE_MMIO_VFUNCS(uncore, gen5); ASSIGN_RAW_READ_MMIO_VFUNCS(uncore, gen5); } else { -- cgit v1.2.3