summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
diff options
context:
space:
mode:
authorChristian König <christian.koenig@amd.com>2021-06-09 13:51:36 +0200
committerChristian König <christian.koenig@amd.com>2021-06-22 11:05:05 +0200
commit8c505bdc9c8b955223b054e34a0be9c3d841cd20 (patch)
tree8c250885aa9872de706787183d97fdac3bd059a1 /drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
parent22f0463ae606a34134fdabacd27ede59e535a7cc (diff)
downloadlinux-8c505bdc9c8b955223b054e34a0be9c3d841cd20.tar.bz2
drm/amdgpu: rework dma_resv handling v3
Drop the workaround and instead implement a better solution. Basically we are now chaining all submissions using a dma_fence_chain container and adding them as exclusive fence to the dma_resv object. This way other drivers can still sync to the single exclusive fence while amdgpu only sync to fences from different processes. v3: add the shared fence first before the exclusive one Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: https://patchwork.freedesktop.org/patch/msgid/20210614174536.5188-2-christian.koenig@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c62
1 files changed, 51 insertions, 11 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9ce649a1a8d3..25655414e9c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -572,6 +572,20 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
goto out;
}
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+
+ e->bo_va = amdgpu_vm_bo_find(vm, bo);
+
+ if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
+ e->chain = dma_fence_chain_alloc();
+ if (!e->chain) {
+ r = -ENOMEM;
+ goto error_validate;
+ }
+ }
+ }
+
amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
&p->bytes_moved_vis_threshold);
p->bytes_moved = 0;
@@ -599,15 +613,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
gws = p->bo_list->gws_obj;
oa = p->bo_list->oa_obj;
- amdgpu_bo_list_for_each_entry(e, p->bo_list) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
-
- /* Make sure we use the exclusive slot for shared BOs */
- if (bo->prime_shared_count)
- e->tv.num_shared = 0;
- e->bo_va = amdgpu_vm_bo_find(vm, bo);
- }
-
if (gds) {
p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
@@ -629,8 +634,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
}
error_validate:
- if (r)
+ if (r) {
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+ dma_fence_chain_free(e->chain);
+ e->chain = NULL;
+ }
ttm_eu_backoff_reservation(&p->ticket, &p->validated);
+ }
out:
return r;
}
@@ -670,9 +680,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
{
unsigned i;
- if (error && backoff)
+ if (error && backoff) {
+ struct amdgpu_bo_list_entry *e;
+
+ amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
+ dma_fence_chain_free(e->chain);
+ e->chain = NULL;
+ }
+
ttm_eu_backoff_reservation(&parser->ticket,
&parser->validated);
+ }
for (i = 0; i < parser->num_post_deps; i++) {
drm_syncobj_put(parser->post_deps[i].syncobj);
@@ -1245,6 +1263,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+ struct dma_resv *resv = e->tv.bo->base.resv;
+ struct dma_fence_chain *chain = e->chain;
+
+ if (!chain)
+ continue;
+
+ /*
+ * Work around dma_resv shortcommings by wrapping up the
+ * submission in a dma_fence_chain and add it as exclusive
+ * fence, but first add the submission as shared fence to make
+ * sure that shared fences never signal before the exclusive
+ * one.
+ */
+ dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
+ dma_fence_get(p->fence), 1);
+
+ dma_resv_add_shared_fence(resv, p->fence);
+ rcu_assign_pointer(resv->fence_excl, &chain->base);
+ e->chain = NULL;
+ }
+
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
mutex_unlock(&p->adev->notifier_lock);