From b680c37a4d145cf4d8f2b24e46b1163e5ceb1d35 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 19 Sep 2014 18:27:27 +0200 Subject: drm/i915: DocBook integration for frontbuffer tracking I shouldn't ask everyone to do this and fail myself ... This extracts all the frontbuffer tracking functions into intel_frontbuffer.c, adds a DOC overview section and also adds the missing kerneldoc for i915_gem_track_fb and also pulls it into the same section for convenience. v2: Don't forget about the header files. v3: Oops, might check compilation next time around. To make my life easier drop the increase_pllclock from set_base_atomic since really, it doesn't matter if you see your Oops or kgdb with a tiny bit of lag. v4: Try to better explain how to actually use this, requested by Paulo on irc. v5: Explain invalidate/flush a bit clearer. v6: s/business/busyness/ Acked-by: Paulo Zanoni Cc: Paulo Zanoni Cc: Vandana Kannan Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_frontbuffer.c | 277 +++++++++++++++++++++++++++++++ 1 file changed, 277 insertions(+) create mode 100644 drivers/gpu/drm/i915/intel_frontbuffer.c (limited to 'drivers/gpu/drm/i915/intel_frontbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c new file mode 100644 index 000000000000..f74744c091cb --- /dev/null +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c @@ -0,0 +1,277 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Daniel Vetter + */ + +/** + * DOC: frontbuffer tracking + * + * Many features require us to track changes to the currently active + * frontbuffer, especially rendering targetted at the frontbuffer. + * + * To be able to do so GEM tracks frontbuffers using a bitmask for all possible + * frontbuffer slots through i915_gem_track_fb(). The function in this file are + * then called when the contents of the frontbuffer are invalidated, when + * frontbuffer rendering has stopped again to flush out all the changes and when + * the frontbuffer is exchanged with a flip. Subsystems interested in + * frontbuffer changes (e.g. PSR, FBC, DRRS) should directly put their callbacks + * into the relevant places and filter for the frontbuffer slots that they are + * interested int. + * + * On a high level there are two types of powersaving features. The first one + * work like a special cache (FBC and PSR) and are interested when they should + * stop caching and when to restart caching. This is done by placing callbacks + * into the invalidate and the flush functions: At invalidate the caching must + * be stopped and at flush time it can be restarted. And maybe they need to know + * when the frontbuffer changes (e.g. when the hw doesn't initiate an invalidate + * and flush on its own) which can be achieved with placing callbacks into the + * flip functions. + * + * The other type of display power saving feature only cares about busyness + * (e.g. DRRS). In that case all three (invalidate, flush and flip) indicate + * busyness. There is no direct way to detect idleness. Instead an idle timer + * work delayed work should be started from the flush and flip functions and + * cancelled as soon as busyness is detected. + * + * Note that there's also an older frontbuffer activity tracking scheme which + * just trackings general activity. This is done by the various mark_busy and + * mark_idle functions. For display power management features using these + * functions is deprecated and should be avoided. + */ + +#include + +#include "intel_drv.h" +#include "i915_drv.h" + +static void intel_increase_pllclock(struct drm_device *dev, + enum pipe pipe) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int dpll_reg = DPLL(pipe); + int dpll; + + if (!HAS_GMCH_DISPLAY(dev)) + return; + + if (!dev_priv->lvds_downclock_avail) + return; + + dpll = I915_READ(dpll_reg); + if (!HAS_PIPE_CXSR(dev) && (dpll & DISPLAY_RATE_SELECT_FPA1)) { + DRM_DEBUG_DRIVER("upclocking LVDS\n"); + + assert_panel_unlocked(dev_priv, pipe); + + dpll &= ~DISPLAY_RATE_SELECT_FPA1; + I915_WRITE(dpll_reg, dpll); + intel_wait_for_vblank(dev, pipe); + + dpll = I915_READ(dpll_reg); + if (dpll & DISPLAY_RATE_SELECT_FPA1) + DRM_DEBUG_DRIVER("failed to upclock LVDS!\n"); + } +} + +/** + * intel_mark_fb_busy - mark given planes as busy + * @dev: DRM device + * @frontbuffer_bits: bits for the affected planes + * @ring: optional ring for asynchronous commands + * + * This function gets called every time the screen contents change. It can be + * used to keep e.g. the update rate at the nominal refresh rate with DRRS. + */ +static void intel_mark_fb_busy(struct drm_device *dev, + unsigned frontbuffer_bits, + struct intel_engine_cs *ring) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + enum pipe pipe; + + if (!i915.powersave) + return; + + for_each_pipe(dev_priv, pipe) { + if (!(frontbuffer_bits & INTEL_FRONTBUFFER_ALL_MASK(pipe))) + continue; + + intel_increase_pllclock(dev, pipe); + if (ring && intel_fbc_enabled(dev)) + ring->fbc_dirty = true; + } +} + +/** + * intel_fb_obj_invalidate - invalidate frontbuffer object + * @obj: GEM object to invalidate + * @ring: set for asynchronous rendering + * + * This function gets called every time rendering on the given object starts and + * frontbuffer caching (fbc, low refresh rate for DRRS, panel self refresh) must + * be invalidated. If @ring is non-NULL any subsequent invalidation will be delayed + * until the rendering completes or a flip on this frontbuffer plane is + * scheduled. + */ +void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, + struct intel_engine_cs *ring) +{ + struct drm_device *dev = obj->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + + if (!obj->frontbuffer_bits) + return; + + if (ring) { + mutex_lock(&dev_priv->fb_tracking.lock); + dev_priv->fb_tracking.busy_bits + |= obj->frontbuffer_bits; + dev_priv->fb_tracking.flip_bits + &= ~obj->frontbuffer_bits; + mutex_unlock(&dev_priv->fb_tracking.lock); + } + + intel_mark_fb_busy(dev, obj->frontbuffer_bits, ring); + + intel_edp_psr_invalidate(dev, obj->frontbuffer_bits); +} + +/** + * intel_frontbuffer_flush - flush frontbuffer + * @dev: DRM device + * @frontbuffer_bits: frontbuffer plane tracking bits + * + * This function gets called every time rendering on the given planes has + * completed and frontbuffer caching can be started again. Flushes will get + * delayed if they're blocked by some oustanding asynchronous rendering. + * + * Can be called without any locks held. + */ +void intel_frontbuffer_flush(struct drm_device *dev, + unsigned frontbuffer_bits) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + /* Delay flushing when rings are still busy.*/ + mutex_lock(&dev_priv->fb_tracking.lock); + frontbuffer_bits &= ~dev_priv->fb_tracking.busy_bits; + mutex_unlock(&dev_priv->fb_tracking.lock); + + intel_mark_fb_busy(dev, frontbuffer_bits, NULL); + + intel_edp_psr_flush(dev, frontbuffer_bits); + + /* + * FIXME: Unconditional fbc flushing here is a rather gross hack and + * needs to be reworked into a proper frontbuffer tracking scheme like + * psr employs. + */ + if (IS_BROADWELL(dev)) + gen8_fbc_sw_flush(dev, FBC_REND_CACHE_CLEAN); +} + +/** + * intel_fb_obj_flush - flush frontbuffer object + * @obj: GEM object to flush + * @retire: set when retiring asynchronous rendering + * + * This function gets called every time rendering on the given object has + * completed and frontbuffer caching can be started again. If @retire is true + * then any delayed flushes will be unblocked. + */ +void intel_fb_obj_flush(struct drm_i915_gem_object *obj, + bool retire) +{ + struct drm_device *dev = obj->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned frontbuffer_bits; + + WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + + if (!obj->frontbuffer_bits) + return; + + frontbuffer_bits = obj->frontbuffer_bits; + + if (retire) { + mutex_lock(&dev_priv->fb_tracking.lock); + /* Filter out new bits since rendering started. */ + frontbuffer_bits &= dev_priv->fb_tracking.busy_bits; + + dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits; + mutex_unlock(&dev_priv->fb_tracking.lock); + } + + intel_frontbuffer_flush(dev, frontbuffer_bits); +} + +/** + * intel_frontbuffer_flip_prepare - prepare asnychronous frontbuffer flip + * @dev: DRM device + * @frontbuffer_bits: frontbuffer plane tracking bits + * + * This function gets called after scheduling a flip on @obj. The actual + * frontbuffer flushing will be delayed until completion is signalled with + * intel_frontbuffer_flip_complete. If an invalidate happens in between this + * flush will be cancelled. + * + * Can be called without any locks held. + */ +void intel_frontbuffer_flip_prepare(struct drm_device *dev, + unsigned frontbuffer_bits) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + mutex_lock(&dev_priv->fb_tracking.lock); + dev_priv->fb_tracking.flip_bits + |= frontbuffer_bits; + mutex_unlock(&dev_priv->fb_tracking.lock); +} + +/** + * intel_frontbuffer_flip_complete - complete asynchronous frontbuffer flush + * @dev: DRM device + * @frontbuffer_bits: frontbuffer plane tracking bits + * + * This function gets called after the flip has been latched and will complete + * on the next vblank. It will execute the fush if it hasn't been cancalled yet. + * + * Can be called without any locks held. + */ +void intel_frontbuffer_flip_complete(struct drm_device *dev, + unsigned frontbuffer_bits) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + mutex_lock(&dev_priv->fb_tracking.lock); + /* Mask any cancelled flips. */ + frontbuffer_bits &= dev_priv->fb_tracking.flip_bits; + dev_priv->fb_tracking.flip_bits &= ~frontbuffer_bits; + mutex_unlock(&dev_priv->fb_tracking.lock); + + intel_frontbuffer_flush(dev, frontbuffer_bits); +} + -- cgit v1.2.3 From 1d73c2a8f218be3e8b6aa884740fc67110660b54 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 24 Sep 2014 19:50:59 -0400 Subject: drm/i915: Minimize the huge amount of unecessary fbc sw cache clean. The sw cache clean on BDW is a tempoorary workaround because we cannot set cache clean on blt ring with risk of hungs. So we are doing the cache clean on sw. However we are doing much more than needed. Not only when using blt ring. So, with this extra w/a we minimize the ammount of cache cleans and call it only on same cases that it was being called on gen7. The traditional FBC Cache clean happens over LRI on BLT ring when there is a frontbuffer touch happening. frontbuffer tracking set fbc_dirty variable to let BLT flush that it must clean FBC cache. fbc.need_sw_cache_clean works in the opposite information direction of ring->fbc_dirty telling software on frontbuffer tracking to perform the cache clean on sw side. v2: Clean it a little bit and fully check for Broadwell instead of gen8. v3: Rebase after frontbuffer organization. v4: Wiggle confused me. So fixing v3! Cc: Daniel Vetter Cc: Paulo Zanoni Reviewed-by: Paulo Zanoni Signed-off-by: Rodrigo Vivi Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 10 +++++++++- drivers/gpu/drm/i915/intel_frontbuffer.c | 6 ++++-- drivers/gpu/drm/i915/intel_pm.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 9 +++++++-- 4 files changed, 21 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_frontbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 999bd57cab65..cccb7767e837 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -666,6 +666,14 @@ struct i915_fbc { * possible. */ bool enabled; + /* On gen8 some rings cannont perform fbc clean operation so for now + * we are doing this on SW with mmio. + * This variable works in the opposite information direction + * of ring->fbc_dirty telling software on frontbuffer tracking + * to perform the cache clean on sw side. + */ + bool need_sw_cache_clean; + struct intel_fbc_work { struct delayed_work work; struct drm_crtc *crtc; @@ -2825,7 +2833,7 @@ extern void intel_modeset_setup_hw_state(struct drm_device *dev, extern void i915_redisable_vga(struct drm_device *dev); extern void i915_redisable_vga_power_on(struct drm_device *dev); extern bool intel_fbc_enabled(struct drm_device *dev); -extern void gen8_fbc_sw_flush(struct drm_device *dev, u32 value); +extern void bdw_fbc_sw_flush(struct drm_device *dev, u32 value); extern void intel_disable_fbc(struct drm_device *dev); extern bool ironlake_set_drps(struct drm_device *dev, u8 val); extern void intel_init_pch_refclk(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c index f74744c091cb..7eb74a62117f 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c @@ -189,8 +189,10 @@ void intel_frontbuffer_flush(struct drm_device *dev, * needs to be reworked into a proper frontbuffer tracking scheme like * psr employs. */ - if (IS_BROADWELL(dev)) - gen8_fbc_sw_flush(dev, FBC_REND_CACHE_CLEAN); + if (dev_priv->fbc.need_sw_cache_clean) { + dev_priv->fbc.need_sw_cache_clean = false; + bdw_fbc_sw_flush(dev, FBC_REND_CACHE_CLEAN); + } } /** diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 6b416201240f..011892d5356e 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -356,7 +356,7 @@ bool intel_fbc_enabled(struct drm_device *dev) return dev_priv->fbc.enabled; } -void gen8_fbc_sw_flush(struct drm_device *dev, u32 value) +void bdw_fbc_sw_flush(struct drm_device *dev, u32 value) { struct drm_i915_private *dev_priv = dev->dev_private; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 922d6bc1a1b3..620a89dc868b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2237,6 +2237,7 @@ static int gen6_ring_flush(struct intel_engine_cs *ring, u32 invalidate, u32 flush) { struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; uint32_t cmd; int ret; @@ -2267,8 +2268,12 @@ static int gen6_ring_flush(struct intel_engine_cs *ring, } intel_ring_advance(ring); - if (IS_GEN7(dev) && !invalidate && flush) - return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN); + if (!invalidate && flush) { + if (IS_GEN7(dev)) + return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN); + else if (IS_BROADWELL(dev)) + dev_priv->fbc.need_sw_cache_clean = true; + } return 0; } -- cgit v1.2.3 From 11c9b6c628c646894e6ef53f92cfd33a814ee553 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 30 Sep 2014 22:10:52 +0200 Subject: drm/i915: Tighting frontbuffer tracking around flips So I think I've spotted a small gap in the frontbuffer tracking while discussing the logic with Paulo on irc: 1. Userspace schedules gpu rendering to the current frontbuffer. This gets tracked in dev_priv->fb_tracking.busy_bits. 2. We pageflip a fully rendered buffer before the frontbuffer rendering completes. 3. The request retiring will never clear busy_bits (since at retire time the old frontbuffer won't have obj->frontbuffer_bits set), so these bits now are stuck until someone again does a bit of frontbuffer tracking. If we clear stale busy_bits in flip_prepare this gap is closed. Reviewed-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_frontbuffer.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_frontbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c index 7eb74a62117f..c5a312d218f7 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c @@ -248,8 +248,9 @@ void intel_frontbuffer_flip_prepare(struct drm_device *dev, struct drm_i915_private *dev_priv = dev->dev_private; mutex_lock(&dev_priv->fb_tracking.lock); - dev_priv->fb_tracking.flip_bits - |= frontbuffer_bits; + dev_priv->fb_tracking.flip_bits |= frontbuffer_bits; + /* Remove stale busy bits due to the old buffer. */ + dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits; mutex_unlock(&dev_priv->fb_tracking.lock); } -- cgit v1.2.3 From 5c323b2a78fade2df08a51497fc943a6fd1b868b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 30 Sep 2014 22:10:53 +0200 Subject: drm/i915: spelling fixes for frontbuffer tracking kerneldoc Oh well. v2: Fix one more spelling fail Paulo spotted. Reviewed-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_drv.h | 2 +- drivers/gpu/drm/i915/intel_frontbuffer.c | 13 ++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_frontbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 072e69f4080e..f14f456e08f9 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -819,7 +819,7 @@ void intel_frontbuffer_flip_complete(struct drm_device *dev, void intel_frontbuffer_flush(struct drm_device *dev, unsigned frontbuffer_bits); /** - * intel_frontbuffer_flip - prepare frontbuffer flip + * intel_frontbuffer_flip - synchronous frontbuffer flip * @dev: DRM device * @frontbuffer_bits: frontbuffer plane tracking bits * diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c index c5a312d218f7..58cf2e6b78f4 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c @@ -28,7 +28,7 @@ * DOC: frontbuffer tracking * * Many features require us to track changes to the currently active - * frontbuffer, especially rendering targetted at the frontbuffer. + * frontbuffer, especially rendering targeted at the frontbuffer. * * To be able to do so GEM tracks frontbuffers using a bitmask for all possible * frontbuffer slots through i915_gem_track_fb(). The function in this file are @@ -55,7 +55,7 @@ * cancelled as soon as busyness is detected. * * Note that there's also an older frontbuffer activity tracking scheme which - * just trackings general activity. This is done by the various mark_busy and + * just tracks general activity. This is done by the various mark_busy and * mark_idle functions. For display power management features using these * functions is deprecated and should be avoided. */ @@ -166,7 +166,7 @@ void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, * * This function gets called every time rendering on the given planes has * completed and frontbuffer caching can be started again. Flushes will get - * delayed if they're blocked by some oustanding asynchronous rendering. + * delayed if they're blocked by some outstanding asynchronous rendering. * * Can be called without any locks held. */ @@ -231,7 +231,7 @@ void intel_fb_obj_flush(struct drm_i915_gem_object *obj, } /** - * intel_frontbuffer_flip_prepare - prepare asnychronous frontbuffer flip + * intel_frontbuffer_flip_prepare - prepare asynchronous frontbuffer flip * @dev: DRM device * @frontbuffer_bits: frontbuffer plane tracking bits * @@ -255,12 +255,12 @@ void intel_frontbuffer_flip_prepare(struct drm_device *dev, } /** - * intel_frontbuffer_flip_complete - complete asynchronous frontbuffer flush + * intel_frontbuffer_flip_complete - complete asynchronous frontbuffer flip * @dev: DRM device * @frontbuffer_bits: frontbuffer plane tracking bits * * This function gets called after the flip has been latched and will complete - * on the next vblank. It will execute the fush if it hasn't been cancalled yet. + * on the next vblank. It will execute the flush if it hasn't been cancelled yet. * * Can be called without any locks held. */ @@ -277,4 +277,3 @@ void intel_frontbuffer_flip_complete(struct drm_device *dev, intel_frontbuffer_flush(dev, frontbuffer_bits); } - -- cgit v1.2.3