From 856974a401bacf4fee1f3e3b2f601b7abfd5fa2a Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 2 Jul 2015 16:05:28 +0300 Subject: drm/i915/hotplug: document the hotplug handling in the driver Add an overview of the drm/i915 hotplug handling. Signed-off-by: Jani Nikula Signed-off-by: Daniel Vetter --- Documentation/DocBook/drm.tmpl | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'Documentation/DocBook') diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl index c0312cbd023d..e82205ee3d5f 100644 --- a/Documentation/DocBook/drm.tmpl +++ b/Documentation/DocBook/drm.tmpl @@ -4044,6 +4044,11 @@ int num_ioctls; probing, so those sections fully apply. + + Hotplug +!Pdrivers/gpu/drm/i915/intel_hotplug.c Hotplug +!Idrivers/gpu/drm/i915/intel_hotplug.c + High Definition Audio !Pdrivers/gpu/drm/i915/intel_audio.c High Definition Audio over HDMI and Display Port -- cgit v1.2.3 From cc2e26a7c57924b31e9c5ac7b3d0d814253c9285 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Wed, 8 Jul 2015 18:08:39 -0300 Subject: drm/i915: fix intel_frontbuffer_flip documentation Reported by the kbuild test robot. Regression introduced by: commit fdbff9282c0f5f61ffc87d57461b04d943250910 Author: Daniel Vetter Date: Thu Jun 18 11:23:24 2015 +0200 drm/i915: Clear fb_tracking.busy_bits also for synchronous flips (I reviewed this commit, so it's also my fault) Signed-off-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- Documentation/DocBook/drm.tmpl | 1 - drivers/gpu/drm/i915/intel_frontbuffer.c | 1 - 2 files changed, 2 deletions(-) (limited to 'Documentation/DocBook') diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl index e82205ee3d5f..1ca1171b16e5 100644 --- a/Documentation/DocBook/drm.tmpl +++ b/Documentation/DocBook/drm.tmpl @@ -4012,7 +4012,6 @@ int num_ioctls; Frontbuffer Tracking !Pdrivers/gpu/drm/i915/intel_frontbuffer.c frontbuffer tracking !Idrivers/gpu/drm/i915/intel_frontbuffer.c -!Fdrivers/gpu/drm/i915/intel_drv.h intel_frontbuffer_flip !Fdrivers/gpu/drm/i915/i915_gem.c i915_gem_track_fb diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c index c405c2deb8bf..777b1d3ccd41 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c @@ -229,7 +229,6 @@ void intel_frontbuffer_flip_complete(struct drm_device *dev, * * Can be called without any locks held. */ - void intel_frontbuffer_flip(struct drm_device *dev, unsigned frontbuffer_bits) { -- cgit v1.2.3 From a794f62a300194dbc53165f10cc5ad236a6a43c2 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 24 Jul 2015 17:40:12 +0200 Subject: drm/i915: kerneldoc for fences v2: Clarify that this is about fence _registers_. Also clarify that the fence code revokes cpu ptes and not gtt ptes. Both suggested by Chris. Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- Documentation/DocBook/drm.tmpl | 5 +++ drivers/gpu/drm/i915/i915_gem_fence.c | 75 +++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) (limited to 'Documentation/DocBook') diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl index 1ca1171b16e5..ee2ed7ed27d1 100644 --- a/Documentation/DocBook/drm.tmpl +++ b/Documentation/DocBook/drm.tmpl @@ -4197,6 +4197,11 @@ int num_ioctls; Global GTT views !Pdrivers/gpu/drm/i915/i915_gem_gtt.c Global GTT views !Idrivers/gpu/drm/i915/i915_gem_gtt.c + + + Global GTT Fence Handling +!Pdrivers/gpu/drm/i915/i915_gem_fence.c fence register handling +!Idrivers/gpu/drm/i915/i915_gem_fence.c Buffer Object Eviction diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c index d3284ee04794..0434c42d8c11 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence.c +++ b/drivers/gpu/drm/i915/i915_gem_fence.c @@ -25,6 +25,36 @@ #include #include "i915_drv.h" +/** + * DOC: fence register handling + * + * Important to avoid confusions: "fences" in the i915 driver are not execution + * fences used to track command completion but hardware detiler objects which + * wrap a given range of the global GTT. Each platform has only a fairly limited + * set of these objects. + * + * Fences are used to detile GTT memory mappings. They're also connected to the + * hardware frontbuffer render tracking and hence interract with frontbuffer + * conmpression. Furthermore on older platforms fences are required for tiled + * objects used by the display engine. They can also be used by the render + * engine - they're required for blitter commands and are optional for render + * commands. But on gen4+ both display (with the exception of fbc) and rendering + * have their own tiling state bits and don't need fences. + * + * Also note that fences only support X and Y tiling and hence can't be used for + * the fancier new tiling formats like W, Ys and Yf. + * + * Finally note that because fences are such a restricted resource they're + * dynamically associated with objects. Furthermore fence state is committed to + * the hardware lazily to avoid unecessary stalls on gen2/3. Therefore code must + * explictly call i915_gem_object_get_fence() to synchronize fencing status + * for cpu access. Also note that some code wants an unfenced view, for those + * cases the fence can be removed forcefully with i915_gem_object_put_fence(). + * + * Internally these functions will synchronize with userspace access by removing + * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed. + */ + static void i965_write_fence_reg(struct drm_device *dev, int reg, struct drm_i915_gem_object *obj) { @@ -247,6 +277,17 @@ i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) return 0; } +/** + * i915_gem_object_put_fence - force-remove fence for an object + * @obj: object to map through a fence reg + * + * This function force-removes any fence from the given object, which is useful + * if the kernel wants to do untiled GTT access. + * + * Returns: + * + * 0 on success, negative error code on failure. + */ int i915_gem_object_put_fence(struct drm_i915_gem_object *obj) { @@ -322,6 +363,10 @@ deadlock: * and tiling format. * * For an untiled surface, this removes any existing fence. + * + * Returns: + * + * 0 on success, negative error code on failure. */ int i915_gem_object_get_fence(struct drm_i915_gem_object *obj) @@ -374,6 +419,21 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj) return 0; } +/** + * i915_gem_object_pin_fence - pin fencing state + * @obj: object to pin fencing for + * + * This pins the fencing state (whether tiled or untiled) to make sure the + * object is ready to be used as a scanout target. Fencing status must be + * synchronize first by calling i915_gem_object_get_fence(): + * + * The resulting fence pin reference must be released again with + * i915_gem_object_unpin_fence(). + * + * Returns: + * + * True if the object has a fence, false otherwise. + */ bool i915_gem_object_pin_fence(struct drm_i915_gem_object *obj) { @@ -390,6 +450,14 @@ i915_gem_object_pin_fence(struct drm_i915_gem_object *obj) return false; } +/** + * i915_gem_object_unpin_fence - unpin fencing state + * @obj: object to unpin fencing for + * + * This releases the fence pin reference acquired through + * i915_gem_object_pin_fence. It will handle both objects with and without an + * attached fence correctly, callers do not need to distinguish this. + */ void i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj) { @@ -400,6 +468,13 @@ i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj) } } +/** + * i915_gem_restore_fences - restore fence state + * @dev: DRM device + * + * Restore the hw fence state to match the software tracking again, to be called + * after a gpu reset and on resume. + */ void i915_gem_restore_fences(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; -- cgit v1.2.3 From e562b9739eda7399d1ec6737bc5f8803f92569e3 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 24 Jul 2015 17:40:13 +0200 Subject: drm/i915: Remove bogus kerneldoc include directive Afaict intel_irq_fini never existed. No idea how that one came about. Note: Chris thinks that an irq_fini would be nice and I agree, but this is just to remove some ugly from generated docs. Signed-off-by: Daniel Vetter --- Documentation/DocBook/drm.tmpl | 1 - 1 file changed, 1 deletion(-) (limited to 'Documentation/DocBook') diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl index ee2ed7ed27d1..20c39b7e21ee 100644 --- a/Documentation/DocBook/drm.tmpl +++ b/Documentation/DocBook/drm.tmpl @@ -3982,7 +3982,6 @@ int num_ioctls; Interrupt Handling !Pdrivers/gpu/drm/i915/i915_irq.c interrupt handling !Fdrivers/gpu/drm/i915/i915_irq.c intel_irq_init intel_irq_init_hw intel_hpd_init -!Fdrivers/gpu/drm/i915/i915_irq.c intel_irq_fini !Fdrivers/gpu/drm/i915/i915_irq.c intel_runtime_pm_disable_interrupts !Fdrivers/gpu/drm/i915/i915_irq.c intel_runtime_pm_enable_interrupts -- cgit v1.2.3 From 3271dca483a48759cdbdb5f16a13125ab91f6c05 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 24 Jul 2015 17:40:15 +0200 Subject: drm/i915: kerneldoc for tiling IOCTL and swizzle functions Chris rightfully suggested that documenting fences without documenting the BO tiling tracking doesn't make much sense, so fix that. The important bit to stress here (since it lead to some confusion) is the GEM doesn't really care about tiling. Except for a few select cases where the kernel needs to manage something that userspace can't take care of: Namely the limited number of fences and fixing up swizzling, although we still fail at the later. v2: Move the low-level tiling/swizzling functions and kerneldoc to i915_gem_fence.c and leave only the userspace interface here. Suggested by Chris. Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- Documentation/DocBook/drm.tmpl | 16 ++++++- drivers/gpu/drm/i915/i915_gem_fence.c | 28 ++++++++++-- drivers/gpu/drm/i915/i915_gem_tiling.c | 84 +++++++++++++++++----------------- 3 files changed, 80 insertions(+), 48 deletions(-) (limited to 'Documentation/DocBook') diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl index 20c39b7e21ee..43042739ae63 100644 --- a/Documentation/DocBook/drm.tmpl +++ b/Documentation/DocBook/drm.tmpl @@ -4198,9 +4198,21 @@ int num_ioctls; !Idrivers/gpu/drm/i915/i915_gem_gtt.c - Global GTT Fence Handling -!Pdrivers/gpu/drm/i915/i915_gem_fence.c fence register handling + GTT Fences and Swizzling !Idrivers/gpu/drm/i915/i915_gem_fence.c + + Global GTT Fence Handling +!Pdrivers/gpu/drm/i915/i915_gem_fence.c fence register handling + + + Hardware Tiling and Swizzling Details +!Pdrivers/gpu/drm/i915/i915_gem_fence.c tiling swizzling details + + + + Object Tiling IOCTLs +!Idrivers/gpu/drm/i915/i915_gem_tiling.c +!Pdrivers/gpu/drm/i915/i915_gem_tiling.c buffer object tiling Buffer Object Eviction diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c index c643260a90c5..af1f8c461060 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence.c +++ b/drivers/gpu/drm/i915/i915_gem_fence.c @@ -497,8 +497,7 @@ void i915_gem_restore_fences(struct drm_device *dev) } /** - * - * Support for managing tiling state of buffer objects. + * DOC: tiling swizzling details * * The idea behind tiling is to increase cache hit rates by rearranging * pixel data so that a group of pixel accesses are in the same cacheline. @@ -546,6 +545,9 @@ void i915_gem_restore_fences(struct drm_device *dev) */ /** + * i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern + * @dev: DRM device + * * Detects bit 6 swizzling of address lookup between IGD access and CPU * access through main memory. */ @@ -692,7 +694,7 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev) dev_priv->mm.bit_6_swizzle_y = swizzle_y; } -/** +/* * Swap every 64 bytes of this page around, to account for it having a new * bit 17 of its physical address and therefore being interpreted differently * by the GPU. @@ -715,6 +717,18 @@ i915_gem_swizzle_page(struct page *page) kunmap(page); } +/** + * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling + * @obj: i915 GEM buffer object + * + * This function fixes up the swizzling in case any page frame number for this + * object has changed in bit 17 since that state has been saved with + * i915_gem_object_save_bit_17_swizzle(). + * + * This is called when pinning backing storage again, since the kernel is free + * to move unpinned backing storage around (either by directly moving pages or + * by swapping them out and back in again). + */ void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj) { @@ -737,6 +751,14 @@ i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj) } } +/** + * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling + * @obj: i915 GEM buffer object + * + * This function saves the bit 17 of each page frame number so that swizzling + * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must + * be called before the backing storage can be unpinned. + */ void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj) { diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index fa7a8d7a24e0..ac3eb566c9d2 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -31,53 +31,31 @@ #include #include "i915_drv.h" -/** @file i915_gem_tiling.c - * - * Support for managing tiling state of buffer objects. - * - * The idea behind tiling is to increase cache hit rates by rearranging - * pixel data so that a group of pixel accesses are in the same cacheline. - * Performance improvement from doing this on the back/depth buffer are on - * the order of 30%. - * - * Intel architectures make this somewhat more complicated, though, by - * adjustments made to addressing of data when the memory is in interleaved - * mode (matched pairs of DIMMS) to improve memory bandwidth. - * For interleaved memory, the CPU sends every sequential 64 bytes - * to an alternate memory channel so it can get the bandwidth from both. - * - * The GPU also rearranges its accesses for increased bandwidth to interleaved - * memory, and it matches what the CPU does for non-tiled. However, when tiled - * it does it a little differently, since one walks addresses not just in the - * X direction but also Y. So, along with alternating channels when bit - * 6 of the address flips, it also alternates when other bits flip -- Bits 9 - * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines) - * are common to both the 915 and 965-class hardware. - * - * The CPU also sometimes XORs in higher bits as well, to improve - * bandwidth doing strided access like we do so frequently in graphics. This - * is called "Channel XOR Randomization" in the MCH documentation. The result - * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address - * decode. +/** + * DOC: buffer object tiling * - * All of this bit 6 XORing has an effect on our memory management, - * as we need to make sure that the 3d driver can correctly address object - * contents. + * i915_gem_set_tiling() and i915_gem_get_tiling() is the userspace interface to + * declare fence register requirements. * - * If we don't have interleaved memory, all tiling is safe and no swizzling is - * required. + * In principle GEM doesn't care at all about the internal data layout of an + * object, and hence it also doesn't care about tiling or swizzling. There's two + * exceptions: * - * When bit 17 is XORed in, we simply refuse to tile at all. Bit - * 17 is not just a page offset, so as we page an objet out and back in, - * individual pages in it will have different bit 17 addresses, resulting in - * each 64 bytes being swapped with its neighbor! + * - For X and Y tiling the hardware provides detilers for CPU access, so called + * fences. Since there's only a limited amount of them the kernel must manage + * these, and therefore userspace must tell the kernel the object tiling if it + * wants to use fences for detiling. + * - On gen3 and gen4 platforms have a swizzling pattern for tiled objects which + * depends upon the physical page frame number. When swapping such objects the + * page frame number might change and the kernel must be able to fix this up + * and hence now the tiling. Note that on a subset of platforms with + * asymmetric memory channel population the swizzling pattern changes in an + * unknown way, and for those the kernel simply forbids swapping completely. * - * Otherwise, if interleaved, we have to tell the 3d driver what the address - * swizzling it needs to do is, since it's writing with the CPU to the pages - * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the - * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling - * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order - * to match what the GPU expects. + * Since neither of this applies for new tiling layouts on modern platforms like + * W, Ys and Yf tiling GEM only allows object tiling to be set to X or Y tiled. + * Anything else can be handled in userspace entirely without the kernel's + * invovlement. */ /* Check pitch constriants for all chips & tiling formats */ @@ -166,8 +144,18 @@ i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode) } /** + * i915_gem_set_tiling - IOCTL handler to set tiling mode + * @dev: DRM device + * @data: data pointer for the ioctl + * @file: DRM file for the ioctl call + * * Sets the tiling mode of an object, returning the required swizzling of * bit 6 of addresses in the object. + * + * Called by the user via ioctl. + * + * Returns: + * Zero on success, negative errno on failure. */ int i915_gem_set_tiling(struct drm_device *dev, void *data, @@ -285,7 +273,17 @@ err: } /** + * i915_gem_get_tiling - IOCTL handler to get tiling mode + * @dev: DRM device + * @data: data pointer for the ioctl + * @file: DRM file for the ioctl call + * * Returns the current tiling mode and required bit 6 swizzling for the object. + * + * Called by the user via ioctl. + * + * Returns: + * Zero on success, negative errno on failure. */ int i915_gem_get_tiling(struct drm_device *dev, void *data, -- cgit v1.2.3