diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2018-03-20 10:04:49 +0000 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2018-03-20 14:55:58 +0000 |
commit | ce80075470f6328e487389262c95af092d421ffc (patch) | |
tree | 202950c4ee08754c07f63b93e9964708c7ea4d10 /drivers/gpu/drm/i915/i915_irq.c | |
parent | ca98317b89428e6ac17be0938b467ed78654dd56 (diff) | |
download | linux-ce80075470f6328e487389262c95af092d421ffc.tar.bz2 |
drm/i915: Add control flags to i915_handle_error()
Not all callers want the GPU error to handled in the same way, so expose
a control parameter. In the first instance, some callers do not want the
heavyweight error capture so add a bit to request the state to be
captured and saved.
v2: Pass msg down to i915_reset/i915_reset_engine so that we include the
reason for the reset in the dev_notice(), superseding the earlier option
to not print that notice.
v3: Stash the reason inside the i915->gpu_error to handover to the direct
reset from the blocking waiter.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jeff McGee <jeff.mcgee@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
Reviewed-by: Michel Thierry <michel.thierry@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180320100449.1360-2-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/i915_irq.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_irq.c | 55 |
1 files changed, 32 insertions, 23 deletions
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 44eef355e12c..fa7310766217 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2877,15 +2877,10 @@ static irqreturn_t gen11_irq_handler(int irq, void *arg) return IRQ_HANDLED; } -/** - * i915_reset_device - do process context error handling work - * @dev_priv: i915 device private - * - * Fire an error uevent so userspace can see that a hang or error - * was detected. - */ -static void i915_reset_device(struct drm_i915_private *dev_priv) +static void i915_reset_device(struct drm_i915_private *dev_priv, + const char *msg) { + struct i915_gpu_error *error = &dev_priv->gpu_error; struct kobject *kobj = &dev_priv->drm.primary->kdev->kobj; char *error_event[] = { I915_ERROR_UEVENT "=1", NULL }; char *reset_event[] = { I915_RESET_UEVENT "=1", NULL }; @@ -2901,29 +2896,32 @@ static void i915_reset_device(struct drm_i915_private *dev_priv) i915_wedge_on_timeout(&w, dev_priv, 5*HZ) { intel_prepare_reset(dev_priv); + error->reason = msg; + /* Signal that locked waiters should reset the GPU */ - set_bit(I915_RESET_HANDOFF, &dev_priv->gpu_error.flags); - wake_up_all(&dev_priv->gpu_error.wait_queue); + set_bit(I915_RESET_HANDOFF, &error->flags); + wake_up_all(&error->wait_queue); /* Wait for anyone holding the lock to wakeup, without * blocking indefinitely on struct_mutex. */ do { if (mutex_trylock(&dev_priv->drm.struct_mutex)) { - i915_reset(dev_priv, 0); + i915_reset(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); } - } while (wait_on_bit_timeout(&dev_priv->gpu_error.flags, + } while (wait_on_bit_timeout(&error->flags, I915_RESET_HANDOFF, TASK_UNINTERRUPTIBLE, 1)); + error->reason = NULL; + intel_finish_reset(dev_priv); } - if (!test_bit(I915_WEDGED, &dev_priv->gpu_error.flags)) - kobject_uevent_env(kobj, - KOBJ_CHANGE, reset_done_event); + if (!test_bit(I915_WEDGED, &error->flags)) + kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event); } static void i915_clear_error_registers(struct drm_i915_private *dev_priv) @@ -2955,6 +2953,7 @@ static void i915_clear_error_registers(struct drm_i915_private *dev_priv) * i915_handle_error - handle a gpu error * @dev_priv: i915 device private * @engine_mask: mask representing engines that are hung + * @flags: control flags * @fmt: Error message format string * * Do some basic checking of register state at error time and @@ -2965,16 +2964,23 @@ static void i915_clear_error_registers(struct drm_i915_private *dev_priv) */ void i915_handle_error(struct drm_i915_private *dev_priv, u32 engine_mask, + unsigned long flags, const char *fmt, ...) { struct intel_engine_cs *engine; unsigned int tmp; - va_list args; char error_msg[80]; + char *msg = NULL; - va_start(args, fmt); - vscnprintf(error_msg, sizeof(error_msg), fmt, args); - va_end(args); + if (fmt) { + va_list args; + + va_start(args, fmt); + vscnprintf(error_msg, sizeof(error_msg), fmt, args); + va_end(args); + + msg = error_msg; + } /* * In most cases it's guaranteed that we get here with an RPM @@ -2986,8 +2992,11 @@ void i915_handle_error(struct drm_i915_private *dev_priv, intel_runtime_pm_get(dev_priv); engine_mask &= INTEL_INFO(dev_priv)->ring_mask; - i915_capture_error_state(dev_priv, engine_mask, error_msg); - i915_clear_error_registers(dev_priv); + + if (flags & I915_ERROR_CAPTURE) { + i915_capture_error_state(dev_priv, engine_mask, msg); + i915_clear_error_registers(dev_priv); + } /* * Try engine reset when available. We fall back to full reset if @@ -3000,7 +3009,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv, &dev_priv->gpu_error.flags)) continue; - if (i915_reset_engine(engine, 0) == 0) + if (i915_reset_engine(engine, msg) == 0) engine_mask &= ~intel_engine_flag(engine); clear_bit(I915_RESET_ENGINE + engine->id, @@ -3030,7 +3039,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv, TASK_UNINTERRUPTIBLE); } - i915_reset_device(dev_priv); + i915_reset_device(dev_priv, msg); for_each_engine(engine, dev_priv, tmp) { clear_bit(I915_RESET_ENGINE + engine->id, |