diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-15 20:42:10 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-15 20:42:10 -0800 |
commit | e60e1ee60630cafef5e430c2ae364877e061d980 (patch) | |
tree | 816aeef8fe8d4a2c6a1ebbc7a350839bac8dd4c2 /drivers/gpu/drm/msm/msm_gpu.c | |
parent | 5d352e69c60e54b5f04d6e337a1d2bf0dbf3d94a (diff) | |
parent | f150891fd9878ef0d9197c4e8451ce67c3bdd014 (diff) | |
download | linux-e60e1ee60630cafef5e430c2ae364877e061d980.tar.bz2 |
Merge tag 'drm-for-v4.15' of git://people.freedesktop.org/~airlied/linux
Pull drm updates from Dave Airlie:
"This is the main drm pull request for v4.15.
Core:
- Atomic object lifetime fixes
- Atomic iterator improvements
- Sparse/smatch fixes
- Legacy kms ioctls to be interruptible
- EDID override improvements
- fb/gem helper cleanups
- Simple outreachy patches
- Documentation improvements
- Fix dma-buf rcu races
- DRM mode object leasing for improving VR use cases.
- vgaarb improvements for non-x86 platforms.
New driver:
- tve200: Faraday Technology TVE200 block.
This "TV Encoder" encodes a ITU-T BT.656 stream and can be found in
the StorLink SL3516 (later Cortina Systems CS3516) as well as the
Grain Media GM8180.
New bridges:
- SiI9234 support
New panels:
- S6E63J0X03, OTM8009A, Seiko 43WVF1G, 7" rpi touch panel, Toshiba
LT089AC19000, Innolux AT043TN24
i915:
- Remove Coffeelake from alpha support
- Cannonlake workarounds
- Infoframe refactoring for DisplayPort
- VBT updates
- DisplayPort vswing/emph/buffer translation refactoring
- CCS fixes
- Restore GPU clock boost on missed vblanks
- Scatter list updates for userptr allocations
- Gen9+ transition watermarks
- Display IPC (Isochronous Priority Control)
- Private PAT management
- GVT: improved error handling and pci config sanitizing
- Execlist refactoring
- Transparent Huge Page support
- User defined priorities support
- HuC/GuC firmware refactoring
- DP MST fixes
- eDP power sequencing fixes
- Use RCU instead of stop_machine
- PSR state tracking support
- Eviction fixes
- BDW DP aux channel timeout fixes
- LSPCON fixes
- Cannonlake PLL fixes
amdgpu:
- Per VM BO support
- Powerplay cleanups
- CI powerplay support
- PASID mgr for kfd
- SR-IOV fixes
- initial GPU reset for vega10
- Prime mmap support
- TTM updates
- Clock query interface for Raven
- Fence to handle ioctl
- UVD encode ring support on Polaris
- Transparent huge page DMA support
- Compute LRU pipe tweaks
- BO flag to allow buffers to opt out of implicit sync
- CTX priority setting API
- VRAM lost infrastructure plumbing
qxl:
- fix flicker since atomic rework
amdkfd:
- Further improvements from internal AMD tree
- Usermode events
- Drop radeon support
nouveau:
- Pascal temperature sensor support
- Improved BAR2 handling
- MMU rework to support Pascal MMU
exynos:
- Improved HDMI/mixer support
- HDMI audio interface support
tegra:
- Prep work for tegra186
- Cleanup/fixes
msm:
- Preemption support for a5xx
- Display fixes for 8x96 (snapdragon 820)
- Async cursor plane fixes
- FW loading rework
- GPU debugging improvements
vc4:
- Prep for DSI panels
- fix T-format tiling scanout
- New madvise ioctl
Rockchip:
- LVDS support
omapdrm:
- omap4 HDMI CEC support
etnaviv:
- GPU performance counters groundwork
sun4i:
- refactor driver load + TCON backend
- HDMI improvements
- A31 support
- Misc fixes
udl:
- Probe/EDID read fixes.
tilcdc:
- Misc fixes.
pl111:
- Support more variants
adv7511:
- Improve EDID handling.
- HDMI CEC support
sii8620:
- Add remote control support"
* tag 'drm-for-v4.15' of git://people.freedesktop.org/~airlied/linux: (1480 commits)
drm/rockchip: analogix_dp: Use mutex rather than spinlock
drm/mode_object: fix documentation for object lookups.
drm/i915: Reorder context-close to avoid calling i915_vma_close() under RCU
drm/i915: Move init_clock_gating() back to where it was
drm/i915: Prune the reservation shared fence array
drm/i915: Idle the GPU before shinking everything
drm/i915: Lock llist_del_first() vs llist_del_all()
drm/i915: Calculate ironlake intermediate watermarks correctly, v2.
drm/i915: Disable lazy PPGTT page table optimization for vGPU
drm/i915/execlists: Remove the priority "optimisation"
drm/i915: Filter out spurious execlists context-switch interrupts
drm/amdgpu: use irq-safe lock for kiq->ring_lock
drm/amdgpu: bypass lru touch for KIQ ring submission
drm/amdgpu: Potential uninitialized variable in amdgpu_vm_update_directories()
drm/amdgpu: potential uninitialized variable in amdgpu_vce_ring_parse_cs()
drm/amd/powerplay: initialize a variable before using it
drm/amd/powerplay: suppress KASAN out of bounds warning in vega10_populate_all_memory_levels
drm/amd/amdgpu: fix evicted VRAM bo adjudgement condition
drm/vblank: Tune drm_crtc_accurate_vblank_count() WARN down to a debug
drm/rockchip: add CONFIG_OF dependency for lvds
...
Diffstat (limited to 'drivers/gpu/drm/msm/msm_gpu.c')
-rw-r--r-- | drivers/gpu/drm/msm/msm_gpu.c | 235 |
1 files changed, 177 insertions, 58 deletions
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 6a887032c66a..8d4477818ec2 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -20,6 +20,8 @@ #include "msm_mmu.h" #include "msm_fence.h" +#include <linux/string_helpers.h> + /* * Power Management: @@ -221,33 +223,102 @@ int msm_gpu_hw_init(struct msm_gpu *gpu) * Hangcheck detection for locked gpu: */ +static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring, + uint32_t fence) +{ + struct msm_gem_submit *submit; + + list_for_each_entry(submit, &ring->submits, node) { + if (submit->seqno > fence) + break; + + msm_update_fence(submit->ring->fctx, + submit->fence->seqno); + } +} + +static struct msm_gem_submit * +find_submit(struct msm_ringbuffer *ring, uint32_t fence) +{ + struct msm_gem_submit *submit; + + WARN_ON(!mutex_is_locked(&ring->gpu->dev->struct_mutex)); + + list_for_each_entry(submit, &ring->submits, node) + if (submit->seqno == fence) + return submit; + + return NULL; +} + static void retire_submits(struct msm_gpu *gpu); static void recover_worker(struct work_struct *work) { struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work); struct drm_device *dev = gpu->dev; + struct msm_drm_private *priv = dev->dev_private; struct msm_gem_submit *submit; - uint32_t fence = gpu->funcs->last_fence(gpu); - - msm_update_fence(gpu->fctx, fence + 1); + struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu); + int i; mutex_lock(&dev->struct_mutex); dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name); - list_for_each_entry(submit, &gpu->submit_list, node) { - if (submit->fence->seqno == (fence + 1)) { - struct task_struct *task; - - rcu_read_lock(); - task = pid_task(submit->pid, PIDTYPE_PID); - if (task) { - dev_err(dev->dev, "%s: offending task: %s\n", - gpu->name, task->comm); - } - rcu_read_unlock(); - break; + + submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1); + if (submit) { + struct task_struct *task; + + rcu_read_lock(); + task = pid_task(submit->pid, PIDTYPE_PID); + if (task) { + char *cmd; + + /* + * So slightly annoying, in other paths like + * mmap'ing gem buffers, mmap_sem is acquired + * before struct_mutex, which means we can't + * hold struct_mutex across the call to + * get_cmdline(). But submits are retired + * from the same in-order workqueue, so we can + * safely drop the lock here without worrying + * about the submit going away. + */ + mutex_unlock(&dev->struct_mutex); + cmd = kstrdup_quotable_cmdline(task, GFP_KERNEL); + mutex_lock(&dev->struct_mutex); + + dev_err(dev->dev, "%s: offending task: %s (%s)\n", + gpu->name, task->comm, cmd); + + msm_rd_dump_submit(priv->hangrd, submit, + "offending task: %s (%s)", task->comm, cmd); + } else { + msm_rd_dump_submit(priv->hangrd, submit, NULL); } + rcu_read_unlock(); + } + + + /* + * Update all the rings with the latest and greatest fence.. this + * needs to happen after msm_rd_dump_submit() to ensure that the + * bo's referenced by the offending submit are still around. + */ + for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + uint32_t fence = ring->memptrs->fence; + + /* + * For the current (faulting?) ring/submit advance the fence by + * one more to clear the faulting submit + */ + if (ring == cur_ring) + fence++; + + update_fences(gpu, ring, fence); } if (msm_gpu_active(gpu)) { @@ -258,9 +329,15 @@ static void recover_worker(struct work_struct *work) gpu->funcs->recover(gpu); pm_runtime_put_sync(&gpu->pdev->dev); - /* replay the remaining submits after the one that hung: */ - list_for_each_entry(submit, &gpu->submit_list, node) { - gpu->funcs->submit(gpu, submit, NULL); + /* + * Replay all remaining submits starting with highest priority + * ring + */ + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + list_for_each_entry(submit, &ring->submits, node) + gpu->funcs->submit(gpu, submit, NULL); } } @@ -281,25 +358,27 @@ static void hangcheck_handler(unsigned long data) struct msm_gpu *gpu = (struct msm_gpu *)data; struct drm_device *dev = gpu->dev; struct msm_drm_private *priv = dev->dev_private; - uint32_t fence = gpu->funcs->last_fence(gpu); + struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); + uint32_t fence = ring->memptrs->fence; - if (fence != gpu->hangcheck_fence) { + if (fence != ring->hangcheck_fence) { /* some progress has been made.. ya! */ - gpu->hangcheck_fence = fence; - } else if (fence < gpu->fctx->last_fence) { + ring->hangcheck_fence = fence; + } else if (fence < ring->seqno) { /* no progress and not done.. hung! */ - gpu->hangcheck_fence = fence; - dev_err(dev->dev, "%s: hangcheck detected gpu lockup!\n", - gpu->name); + ring->hangcheck_fence = fence; + dev_err(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n", + gpu->name, ring->id); dev_err(dev->dev, "%s: completed fence: %u\n", gpu->name, fence); dev_err(dev->dev, "%s: submitted fence: %u\n", - gpu->name, gpu->fctx->last_fence); + gpu->name, ring->seqno); + queue_work(priv->wq, &gpu->recover_work); } /* if still more pending work, reset the hangcheck timer: */ - if (gpu->fctx->last_fence > gpu->hangcheck_fence) + if (ring->seqno > ring->hangcheck_fence) hangcheck_timer_reset(gpu); /* workaround for missing irq: */ @@ -428,19 +507,18 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) static void retire_submits(struct msm_gpu *gpu) { struct drm_device *dev = gpu->dev; + struct msm_gem_submit *submit, *tmp; + int i; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - while (!list_empty(&gpu->submit_list)) { - struct msm_gem_submit *submit; - - submit = list_first_entry(&gpu->submit_list, - struct msm_gem_submit, node); + /* Retire the commits starting with highest priority */ + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; - if (dma_fence_is_signaled(submit->fence)) { - retire_submit(gpu, submit); - } else { - break; + list_for_each_entry_safe(submit, tmp, &ring->submits, node) { + if (dma_fence_is_signaled(submit->fence)) + retire_submit(gpu, submit); } } } @@ -449,9 +527,10 @@ static void retire_worker(struct work_struct *work) { struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work); struct drm_device *dev = gpu->dev; - uint32_t fence = gpu->funcs->last_fence(gpu); + int i; - msm_update_fence(gpu->fctx, fence); + for (i = 0; i < gpu->nr_rings; i++) + update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence); mutex_lock(&dev->struct_mutex); retire_submits(gpu); @@ -472,6 +551,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, { struct drm_device *dev = gpu->dev; struct msm_drm_private *priv = dev->dev_private; + struct msm_ringbuffer *ring = submit->ring; int i; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); @@ -480,9 +560,11 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, msm_gpu_hw_init(gpu); - list_add_tail(&submit->node, &gpu->submit_list); + submit->seqno = ++ring->seqno; + + list_add_tail(&submit->node, &ring->submits); - msm_rd_dump_submit(submit); + msm_rd_dump_submit(priv->rd, submit, NULL); update_sw_cntrs(gpu); @@ -605,7 +687,9 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, const char *name, struct msm_gpu_config *config) { - int ret; + int i, ret, nr_rings = config->nr_rings; + void *memptrs; + uint64_t memptrs_iova; if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs))) gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs); @@ -613,18 +697,11 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, gpu->dev = drm; gpu->funcs = funcs; gpu->name = name; - gpu->fctx = msm_fence_context_alloc(drm, name); - if (IS_ERR(gpu->fctx)) { - ret = PTR_ERR(gpu->fctx); - gpu->fctx = NULL; - goto fail; - } INIT_LIST_HEAD(&gpu->active_list); INIT_WORK(&gpu->retire_work, retire_worker); INIT_WORK(&gpu->recover_work, recover_worker); - INIT_LIST_HEAD(&gpu->submit_list); setup_timer(&gpu->hangcheck_timer, hangcheck_handler, (unsigned long)gpu); @@ -689,34 +766,76 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, goto fail; } - /* Create ringbuffer: */ - gpu->rb = msm_ringbuffer_new(gpu, config->ringsz); - if (IS_ERR(gpu->rb)) { - ret = PTR_ERR(gpu->rb); - gpu->rb = NULL; - dev_err(drm->dev, "could not create ringbuffer: %d\n", ret); + memptrs = msm_gem_kernel_new(drm, sizeof(*gpu->memptrs_bo), + MSM_BO_UNCACHED, gpu->aspace, &gpu->memptrs_bo, + &memptrs_iova); + + if (IS_ERR(memptrs)) { + ret = PTR_ERR(memptrs); + dev_err(drm->dev, "could not allocate memptrs: %d\n", ret); goto fail; } + if (nr_rings > ARRAY_SIZE(gpu->rb)) { + DRM_DEV_INFO_ONCE(drm->dev, "Only creating %zu ringbuffers\n", + ARRAY_SIZE(gpu->rb)); + nr_rings = ARRAY_SIZE(gpu->rb); + } + + /* Create ringbuffer(s): */ + for (i = 0; i < nr_rings; i++) { + gpu->rb[i] = msm_ringbuffer_new(gpu, i, memptrs, memptrs_iova); + + if (IS_ERR(gpu->rb[i])) { + ret = PTR_ERR(gpu->rb[i]); + dev_err(drm->dev, + "could not create ringbuffer %d: %d\n", i, ret); + goto fail; + } + + memptrs += sizeof(struct msm_rbmemptrs); + memptrs_iova += sizeof(struct msm_rbmemptrs); + } + + gpu->nr_rings = nr_rings; + return 0; fail: + for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { + msm_ringbuffer_destroy(gpu->rb[i]); + gpu->rb[i] = NULL; + } + + if (gpu->memptrs_bo) { + msm_gem_put_vaddr(gpu->memptrs_bo); + msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace); + drm_gem_object_unreference_unlocked(gpu->memptrs_bo); + } + platform_set_drvdata(pdev, NULL); return ret; } void msm_gpu_cleanup(struct msm_gpu *gpu) { + int i; + DBG("%s", gpu->name); WARN_ON(!list_empty(&gpu->active_list)); bs_fini(gpu); - if (gpu->rb) { - if (gpu->rb_iova) - msm_gem_put_iova(gpu->rb->bo, gpu->aspace); - msm_ringbuffer_destroy(gpu->rb); + for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { + msm_ringbuffer_destroy(gpu->rb[i]); + gpu->rb[i] = NULL; + } + + if (gpu->memptrs_bo) { + msm_gem_put_vaddr(gpu->memptrs_bo); + msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace); + drm_gem_object_unreference_unlocked(gpu->memptrs_bo); } if (!IS_ERR_OR_NULL(gpu->aspace)) { |