diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-09 18:48:37 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-09 18:48:37 -0700 |
commit | af3c8d98508d37541d4bf57f13a984a7f73a328c (patch) | |
tree | e8dd974d6ebccd38b1e373be8a5e4a2f8bf3c6ce /drivers/gpu/host1x/job.c | |
parent | d3e3b7eac886fb1383db2f22b81550fa6d87f62f (diff) | |
parent | 00fc2c26bc46a64545cdf95a1511461ea9acecb4 (diff) | |
download | linux-af3c8d98508d37541d4bf57f13a984a7f73a328c.tar.bz2 |
Merge tag 'drm-for-v4.13' of git://people.freedesktop.org/~airlied/linux
Pull drm updates from Dave Airlie:
"This is the main pull request for the drm, I think I've got one later
driver pull for mediatek SoC driver, I'm undecided on if it needs to
go to you yet.
Otherwise summary below:
Core drm:
- Atomic add driver private objects
- Deprecate preclose hook in modern drivers
- MST bandwidth tracking
- Use kvmalloc in more places
- Add mode_valid hook for crtc/encoder/bridge
- Reduce sync_file construction time
- Documentation updates
- New DRM synchronisation object support
New drivers:
- pl111 - pl111 CLCD display controller
Panel:
- Innolux P079ZCA panel driver
- Add NL12880B20-05, NL192108AC18-02D, P320HVN03 panels
- panel-samsung-s6e3ha2: Add s6e3hf2 panel support
i915:
- SKL+ watermark fixes
- G4x/G33 reset improvements
- DP AUX backlight improvements
- Buffer based GuC/host communication
- New getparam for (sub)slice infomation
- Cannonlake and Coffeelake initial patches
- Execbuf optimisations
radeon/amdgpu:
- Lots of Vega10 bug fixes
- Preliminary raven support
- KIQ support for compute rings
- MEC queue management rework
- DCE6 Audio support
- SR-IOV improvements
- Better radeon/amdgpu selection support
nouveau:
- HDMI stereoscopic support
- Display code rework for >= GM20x GPUs
msm:
- GEM rework for fine-grained locking
- Per-process pagetable work
- HDMI fixes for Snapdragon 820.
vc4:
- Remove 256MB CMA limit from vc4
- Add out-fence support
- Add support for cygnus
- Get/set tiling ioctls support
- Add T-format tiling support for scanout
zte:
- add VGA support.
etnaviv:
- Thermal throttle support for newer GPUs
- Restore userspace buffer cache performance
- dma-buf sync fix
stm:
- add stm32f429 display support
exynos:
- Rework vblank handling
- Fixup sw-trigger code
sun4i:
- V3s display engine support
- HDMI support for older SoCs
- Preliminary work on dual-pipeline SoCs.
rcar-du:
- VSP work
imx-drm:
- Remove counter load enable from PRE
- Double read/write reduction flag support
tegra:
- Documentation for the host1x and drm driver.
- Lots of staging ioctl fixes due to grate project work.
omapdrm:
- dma-buf fence support
- TILER rotation fixes"
* tag 'drm-for-v4.13' of git://people.freedesktop.org/~airlied/linux: (1270 commits)
drm: Remove unused drm_file parameter to drm_syncobj_replace_fence()
drm/amd/powerplay: fix bug fail to remove sysfs when rmmod amdgpu.
amdgpu: Set cik/si_support to 1 by default if radeon isn't built
drm/amdgpu/gfx9: fix driver reload with KIQ
drm/amdgpu/gfx8: fix driver reload with KIQ
drm/amdgpu: Don't call amd_powerplay_destroy() if we don't have powerplay
drm/ttm: Fix use-after-free in ttm_bo_clean_mm
drm/amd/amdgpu: move get memory type function from early init to sw init
drm/amdgpu/cgs: always set reference clock in mode_info
drm/amdgpu: fix vblank_time when displays are off
drm/amd/powerplay: power value format change for Vega10
drm/amdgpu/gfx9: support the amdgpu.disable_cu option
drm/amd/powerplay: change PPSMC_MSG_GetCurrPkgPwr for Vega10
drm/amdgpu: Make amdgpu_cs_parser_init static (v2)
drm/amdgpu/cs: fix a typo in a comment
drm/amdgpu: Fix the exported always on CU bitmap
drm/amdgpu/gfx9: gfx_v9_0_enable_gfx_static_mg_power_gating() can be static
drm/amdgpu/psp: upper_32_bits/lower_32_bits for address setup
drm/amd/powerplay/cz: print message if smc message fails
drm/amdgpu: fix typo in amdgpu_debugfs_test_ib_init
...
Diffstat (limited to 'drivers/gpu/host1x/job.c')
-rw-r--r-- | drivers/gpu/host1x/job.c | 124 |
1 files changed, 98 insertions, 26 deletions
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index 5f5f8ee6143d..bee504406cfc 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -31,6 +31,8 @@ #include "job.h" #include "syncpt.h" +#define HOST1X_WAIT_SYNCPT_OFFSET 0x8 + struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, u32 num_cmdbufs, u32 num_relocs, u32 num_waitchks) @@ -137,8 +139,9 @@ static void host1x_syncpt_patch_offset(struct host1x_syncpt *sp, * avoid a wrap condition in the HW). */ static int do_waitchks(struct host1x_job *job, struct host1x *host, - struct host1x_bo *patch) + struct host1x_job_gather *g) { + struct host1x_bo *patch = g->bo; int i; /* compare syncpt vs wait threshold */ @@ -165,7 +168,8 @@ static int do_waitchks(struct host1x_job *job, struct host1x *host, wait->syncpt_id, sp->name, wait->thresh, host1x_syncpt_read_min(sp)); - host1x_syncpt_patch_offset(sp, patch, wait->offset); + host1x_syncpt_patch_offset(sp, patch, + g->offset + wait->offset); } wait->bo = NULL; @@ -269,11 +273,12 @@ unpin: return err; } -static int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf) +static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g) { int i = 0; u32 last_page = ~0; void *cmdbuf_page_addr = NULL; + struct host1x_bo *cmdbuf = g->bo; /* pin & patch the relocs for one gather */ for (i = 0; i < job->num_relocs; i++) { @@ -286,6 +291,13 @@ static int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf) if (cmdbuf != reloc->cmdbuf.bo) continue; + if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) { + target = (u32 *)job->gather_copy_mapped + + reloc->cmdbuf.offset / sizeof(u32) + + g->offset / sizeof(u32); + goto patch_reloc; + } + if (last_page != reloc->cmdbuf.offset >> PAGE_SHIFT) { if (cmdbuf_page_addr) host1x_bo_kunmap(cmdbuf, last_page, @@ -302,6 +314,7 @@ static int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf) } target = cmdbuf_page_addr + (reloc->cmdbuf.offset & ~PAGE_MASK); +patch_reloc: *target = reloc_addr; } @@ -319,6 +332,21 @@ static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf, if (reloc->cmdbuf.bo != cmdbuf || reloc->cmdbuf.offset != offset) return false; + /* relocation shift value validation isn't implemented yet */ + if (reloc->shift) + return false; + + return true; +} + +static bool check_wait(struct host1x_waitchk *wait, struct host1x_bo *cmdbuf, + unsigned int offset) +{ + offset *= sizeof(u32); + + if (wait->bo != cmdbuf || wait->offset != offset) + return false; + return true; } @@ -329,6 +357,9 @@ struct host1x_firewall { unsigned int num_relocs; struct host1x_reloc *reloc; + unsigned int num_waitchks; + struct host1x_waitchk *waitchk; + struct host1x_bo *cmdbuf; unsigned int offset; @@ -341,6 +372,9 @@ struct host1x_firewall { static int check_register(struct host1x_firewall *fw, unsigned long offset) { + if (!fw->job->is_addr_reg) + return 0; + if (fw->job->is_addr_reg(fw->dev, fw->class, offset)) { if (!fw->num_relocs) return -EINVAL; @@ -352,6 +386,33 @@ static int check_register(struct host1x_firewall *fw, unsigned long offset) fw->reloc++; } + if (offset == HOST1X_WAIT_SYNCPT_OFFSET) { + if (fw->class != HOST1X_CLASS_HOST1X) + return -EINVAL; + + if (!fw->num_waitchks) + return -EINVAL; + + if (!check_wait(fw->waitchk, fw->cmdbuf, fw->offset)) + return -EINVAL; + + fw->num_waitchks--; + fw->waitchk++; + } + + return 0; +} + +static int check_class(struct host1x_firewall *fw, u32 class) +{ + if (!fw->job->is_valid_class) { + if (fw->class != class) + return -EINVAL; + } else { + if (!fw->job->is_valid_class(fw->class)) + return -EINVAL; + } + return 0; } @@ -428,11 +489,9 @@ static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g) { u32 *cmdbuf_base = (u32 *)fw->job->gather_copy_mapped + (g->offset / sizeof(u32)); + u32 job_class = fw->class; int err = 0; - if (!fw->job->is_addr_reg) - return 0; - fw->words = g->words; fw->cmdbuf = g->bo; fw->offset = 0; @@ -452,7 +511,9 @@ static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g) fw->class = word >> 6 & 0x3ff; fw->mask = word & 0x3f; fw->reg = word >> 16 & 0xfff; - err = check_mask(fw); + err = check_class(fw, job_class); + if (!err) + err = check_mask(fw); if (err) goto out; break; @@ -480,7 +541,6 @@ static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g) goto out; break; case 4: - case 5: case 14: break; default: @@ -504,7 +564,9 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev) fw.dev = dev; fw.reloc = job->relocarray; fw.num_relocs = job->num_relocs; - fw.class = 0; + fw.waitchk = job->waitchk; + fw.num_waitchks = job->num_waitchk; + fw.class = job->class; for (i = 0; i < job->num_gathers; i++) { struct host1x_job_gather *g = &job->gathers[i]; @@ -512,12 +574,20 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev) size += g->words * sizeof(u32); } + /* + * Try a non-blocking allocation from a higher priority pools first, + * as awaiting for the allocation here is a major performance hit. + */ job->gather_copy_mapped = dma_alloc_wc(dev, size, &job->gather_copy, - GFP_KERNEL); - if (!job->gather_copy_mapped) { - job->gather_copy_mapped = NULL; + GFP_NOWAIT); + + /* the higher priority allocation failed, try the generic-blocking */ + if (!job->gather_copy_mapped) + job->gather_copy_mapped = dma_alloc_wc(dev, size, + &job->gather_copy, + GFP_KERNEL); + if (!job->gather_copy_mapped) return -ENOMEM; - } job->gather_copy_size = size; @@ -542,8 +612,8 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev) offset += g->words * sizeof(u32); } - /* No relocs should remain at this point */ - if (fw.num_relocs) + /* No relocs and waitchks should remain at this point */ + if (fw.num_relocs || fw.num_waitchks) return -EINVAL; return 0; @@ -573,6 +643,12 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) if (err) goto out; + if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) { + err = copy_gathers(job, dev); + if (err) + goto out; + } + /* patch gathers */ for (i = 0; i < job->num_gathers; i++) { struct host1x_job_gather *g = &job->gathers[i]; @@ -581,7 +657,9 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) if (g->handled) continue; - g->base = job->gather_addr_phys[i]; + /* copy_gathers() sets gathers base if firewall is enabled */ + if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) + g->base = job->gather_addr_phys[i]; for (j = i + 1; j < job->num_gathers; j++) { if (job->gathers[j].bo == g->bo) { @@ -590,24 +668,18 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) } } - err = do_relocs(job, g->bo); + err = do_relocs(job, g); if (err) break; - err = do_waitchks(job, host, g->bo); + err = do_waitchks(job, host, g); if (err) break; } - if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && !err) { - err = copy_gathers(job, dev); - if (err) { - host1x_job_unpin(job); - return err; - } - } - out: + if (err) + host1x_job_unpin(job); wmb(); return err; |