From 7ca24cf2d2269bde25e21c02a77fe81995a081ae Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 12 Sep 2017 22:42:14 +0200 Subject: drm/amdgpu: add FENCE_TO_HANDLE ioctl that returns syncobj or sync_file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit for being able to convert an amdgpu fence into one of the handles. Mesa will use this. Reviewed-by: Dave Airlie Signed-off-by: Marek Olšák Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 61 ++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index c6a214f1e991..ab83dfcabb41 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -25,6 +25,7 @@ * Jerome Glisse */ #include +#include #include #include #include @@ -1330,6 +1331,66 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev, return fence; } +int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_fpriv *fpriv = filp->driver_priv; + union drm_amdgpu_fence_to_handle *info = data; + struct dma_fence *fence; + struct drm_syncobj *syncobj; + struct sync_file *sync_file; + int fd, r; + + if (amdgpu_kms_vram_lost(adev, fpriv)) + return -ENODEV; + + fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence); + if (IS_ERR(fence)) + return PTR_ERR(fence); + + switch (info->in.what) { + case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ: + r = drm_syncobj_create(&syncobj, 0, fence); + dma_fence_put(fence); + if (r) + return r; + r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle); + drm_syncobj_put(syncobj); + return r; + + case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD: + r = drm_syncobj_create(&syncobj, 0, fence); + dma_fence_put(fence); + if (r) + return r; + r = drm_syncobj_get_fd(syncobj, (int*)&info->out.handle); + drm_syncobj_put(syncobj); + return r; + + case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD: + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) { + dma_fence_put(fence); + return fd; + } + + sync_file = sync_file_create(fence); + dma_fence_put(fence); + if (!sync_file) { + put_unused_fd(fd); + return -ENOMEM; + } + + fd_install(fd, sync_file->file); + info->out.handle = fd; + return 0; + + default: + return -EINVAL; + } +} + /** * amdgpu_cs_wait_all_fence - wait on all fences to signal * -- cgit v1.2.3 From 177ae09b5d699a5ebd1cafcee78889db968abf54 Mon Sep 17 00:00:00 2001 From: Andres Rodriguez Date: Fri, 15 Sep 2017 20:44:06 -0400 Subject: drm/amdgpu: introduce AMDGPU_GEM_CREATE_EXPLICIT_SYNC v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a flag to signal that access to a BO will be synchronized through an external mechanism. Currently all buffers shared between contexts are subject to implicit synchronization. However, this is only required for protocols that currently don't support an explicit synchronization mechanism (DRI2/3). This patch introduces the AMDGPU_GEM_CREATE_EXPLICIT_SYNC, so that users can specify when it is safe to disable implicit sync. v2: only disable explicit sync in amdgpu_cs_ioctl Reviewed-by: Christian König Signed-off-by: Andres Rodriguez Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 7 +++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++++---- include/uapi/drm/amdgpu_drm.h | 2 ++ 8 files changed, 29 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index ab83dfcabb41..38027a00f8ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -705,7 +705,8 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) list_for_each_entry(e, &p->validated, tv.head) { struct reservation_object *resv = e->robj->tbo.resv; - r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp); + r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp, + amdgpu_bo_explicit_sync(e->robj)); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index b0d45c8e6bb3..21e99366cab3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -212,7 +212,9 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, AMDGPU_GEM_CREATE_NO_CPU_ACCESS | AMDGPU_GEM_CREATE_CPU_GTT_USWC | AMDGPU_GEM_CREATE_VRAM_CLEARED | - AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)) + AMDGPU_GEM_CREATE_VM_ALWAYS_VALID | + AMDGPU_GEM_CREATE_EXPLICIT_SYNC)) + return -EINVAL; /* reject invalid gem domains */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index c26ef53604af..428aae048f4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -193,6 +193,14 @@ static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo) } } +/** + * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced + */ +static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) +{ + return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC; +} + int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, int byte_align, bool kernel, u32 domain, u64 flags, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index c586f44312f9..a4bf21f8f1c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -169,14 +169,14 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, * * @sync: sync object to add fences from reservation object to * @resv: reservation object with embedded fence - * @shared: true if we should only sync to the exclusive fence + * @explicit_sync: true if we should only sync to the exclusive fence * * Sync to the fence */ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct reservation_object *resv, - void *owner) + void *owner, bool explicit_sync) { struct reservation_object_list *flist; struct dma_fence *f; @@ -191,6 +191,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, f = reservation_object_get_excl(resv); r = amdgpu_sync_fence(adev, sync, f); + if (explicit_sync) + return r; + flist = reservation_object_get_list(resv); if (!flist || r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index dc7687993317..70d7e3a279a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -45,7 +45,8 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct reservation_object *resv, - void *owner); + void *owner, + bool explicit_sync); struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct amdgpu_ring *ring); struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 10952c3e5eb6..a2282bacf960 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1489,7 +1489,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, job->vm_needs_flush = vm_needs_flush; if (resv) { r = amdgpu_sync_resv(adev, &job->sync, resv, - AMDGPU_FENCE_OWNER_UNDEFINED); + AMDGPU_FENCE_OWNER_UNDEFINED, + false); if (r) { DRM_ERROR("sync failed (%d).\n", r); goto error_free; @@ -1581,7 +1582,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, if (resv) { r = amdgpu_sync_resv(adev, &job->sync, resv, - AMDGPU_FENCE_OWNER_UNDEFINED); + AMDGPU_FENCE_OWNER_UNDEFINED, false); if (r) { DRM_ERROR("sync failed (%d).\n", r); goto error_free; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index eb4a01c14eee..c559d76ff695 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1035,7 +1035,7 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, int r; amdgpu_sync_create(&sync); - amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner); + amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner, false); r = amdgpu_sync_wait(&sync, true); amdgpu_sync_free(&sync); @@ -1176,11 +1176,11 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, amdgpu_ring_pad_ib(ring, params.ib); amdgpu_sync_resv(adev, &job->sync, parent->base.bo->tbo.resv, - AMDGPU_FENCE_OWNER_VM); + AMDGPU_FENCE_OWNER_VM, false); if (shadow) amdgpu_sync_resv(adev, &job->sync, shadow->tbo.resv, - AMDGPU_FENCE_OWNER_VM); + AMDGPU_FENCE_OWNER_VM, false); WARN_ON(params.ib->length_dw > ndw); r = amdgpu_job_submit(job, ring, &vm->entity, @@ -1644,7 +1644,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, goto error_free; r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv, - owner); + owner, false); if (r) goto error_free; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 4c6e8c482ee4..b62484af8ccb 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -91,6 +91,8 @@ extern "C" { #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS (1 << 5) /* Flag that BO is always valid in this VM */ #define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID (1 << 6) +/* Flag that BO sharing will be explicitly synchronized */ +#define AMDGPU_GEM_CREATE_EXPLICIT_SYNC (1 << 7) struct drm_amdgpu_gem_create_in { /** the requested memory size */ -- cgit v1.2.3 From b2ff0e8ac4ce1fb647ae40feb4cf26bc9301e0c9 Mon Sep 17 00:00:00 2001 From: Andres Rodriguez Date: Mon, 20 Feb 2017 17:53:19 -0500 Subject: drm/amdgpu: add framework for HW specific priority settings v9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an initial framework for changing the HW priorities of rings. The framework allows requesting priority changes for the lifetime of an amdgpu_job. After the job completes the priority will decay to the next lowest priority for which a request is still valid. A new ring function set_priority() can now be populated to take care of the HW specific programming sequence for priority changes. v2: set priority before emitting IB, and take a ref on amdgpu_job v3: use AMD_SCHED_PRIORITY_* instead of AMDGPU_CTX_PRIORITY_* v4: plug amdgpu_ring_restore_priority_cb into amdgpu_job_free_cb v5: use atomic for tracking job priorities instead of last_job v6: rename amdgpu_ring_priority_[get/put]() and align parameters v7: replace spinlocks with mutexes for KIQ compatibility v8: raise ring priority during cs_ioctl, instead of job_run v9: priority_get() before push_job() Reviewed-by: Christian König Acked-by: Christian König Signed-off-by: Andres Rodriguez Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 4 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 76 ++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 15 ++++++ drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 7 +++ 5 files changed, 103 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 38027a00f8ab..fe7dd44ac9fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1177,6 +1177,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, job->uf_sequence = seq; amdgpu_job_free_resources(job); + amdgpu_ring_priority_get(job->ring, + amd_sched_get_job_priority(&job->base)); trace_amdgpu_cs_ioctl(job); amd_sched_entity_push_job(&job->base); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 4510627ae83e..83d13431cbdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -103,6 +103,7 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job) { struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base); + amdgpu_ring_priority_put(job->ring, amd_sched_get_job_priority(s_job)); dma_fence_put(job->fence); amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->dep_sync); @@ -139,6 +140,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, job->fence_ctx = entity->fence_context; *f = dma_fence_get(&job->base.s_fence->finished); amdgpu_job_free_resources(job); + amdgpu_ring_priority_get(job->ring, + amd_sched_get_job_priority(&job->base)); amd_sched_entity_push_job(&job->base); return 0; @@ -203,6 +206,7 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) /* if gpu reset, hw fence will be replaced here */ dma_fence_put(job->fence); job->fence = dma_fence_get(fence); + amdgpu_job_free_resources(job); return fence; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 019932a7ea3a..e5ece1fae149 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -154,6 +154,75 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring) ring->funcs->end_use(ring); } +/** + * amdgpu_ring_priority_put - restore a ring's priority + * + * @ring: amdgpu_ring structure holding the information + * @priority: target priority + * + * Release a request for executing at @priority + */ +void amdgpu_ring_priority_put(struct amdgpu_ring *ring, + enum amd_sched_priority priority) +{ + int i; + + if (!ring->funcs->set_priority) + return; + + if (atomic_dec_return(&ring->num_jobs[priority]) > 0) + return; + + /* no need to restore if the job is already at the lowest priority */ + if (priority == AMD_SCHED_PRIORITY_NORMAL) + return; + + mutex_lock(&ring->priority_mutex); + /* something higher prio is executing, no need to decay */ + if (ring->priority > priority) + goto out_unlock; + + /* decay priority to the next level with a job available */ + for (i = priority; i >= AMD_SCHED_PRIORITY_MIN; i--) { + if (i == AMD_SCHED_PRIORITY_NORMAL + || atomic_read(&ring->num_jobs[i])) { + ring->priority = i; + ring->funcs->set_priority(ring, i); + break; + } + } + +out_unlock: + mutex_unlock(&ring->priority_mutex); +} + +/** + * amdgpu_ring_priority_get - change the ring's priority + * + * @ring: amdgpu_ring structure holding the information + * @priority: target priority + * + * Request a ring's priority to be raised to @priority (refcounted). + */ +void amdgpu_ring_priority_get(struct amdgpu_ring *ring, + enum amd_sched_priority priority) +{ + if (!ring->funcs->set_priority) + return; + + atomic_inc(&ring->num_jobs[priority]); + + mutex_lock(&ring->priority_mutex); + if (priority <= ring->priority) + goto out_unlock; + + ring->priority = priority; + ring->funcs->set_priority(ring, priority); + +out_unlock: + mutex_unlock(&ring->priority_mutex); +} + /** * amdgpu_ring_init - init driver ring struct. * @@ -169,7 +238,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned max_dw, struct amdgpu_irq_src *irq_src, unsigned irq_type) { - int r; + int r, i; int sched_hw_submission = amdgpu_sched_hw_submission; /* Set the hw submission limit higher for KIQ because @@ -247,9 +316,14 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, } ring->max_dw = max_dw; + ring->priority = AMD_SCHED_PRIORITY_NORMAL; + mutex_init(&ring->priority_mutex); INIT_LIST_HEAD(&ring->lru_list); amdgpu_ring_lru_touch(adev, ring); + for (i = 0; i < AMD_SCHED_PRIORITY_MAX; ++i) + atomic_set(&ring->num_jobs[i], 0); + if (amdgpu_debugfs_ring_init(adev, ring)) { DRM_ERROR("Failed to register debugfs file for rings !\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 491bd5512dcc..0d9ce141404c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -24,6 +24,7 @@ #ifndef __AMDGPU_RING_H__ #define __AMDGPU_RING_H__ +#include #include "gpu_scheduler.h" /* max number of rings */ @@ -56,6 +57,7 @@ struct amdgpu_device; struct amdgpu_ring; struct amdgpu_ib; struct amdgpu_cs_parser; +struct amdgpu_job; /* * Fences. @@ -147,6 +149,9 @@ struct amdgpu_ring_funcs { void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg); void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void (*emit_tmz)(struct amdgpu_ring *ring, bool start); + /* priority functions */ + void (*set_priority) (struct amdgpu_ring *ring, + enum amd_sched_priority priority); }; struct amdgpu_ring { @@ -187,6 +192,12 @@ struct amdgpu_ring { volatile u32 *cond_exe_cpu_addr; unsigned vm_inv_eng; bool has_compute_vm_bug; + + atomic_t num_jobs[AMD_SCHED_PRIORITY_MAX]; + struct mutex priority_mutex; + /* protected by priority_mutex */ + int priority; + #if defined(CONFIG_DEBUG_FS) struct dentry *ent; #endif @@ -197,6 +208,10 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); void amdgpu_ring_commit(struct amdgpu_ring *ring); void amdgpu_ring_undo(struct amdgpu_ring *ring); +void amdgpu_ring_priority_get(struct amdgpu_ring *ring, + enum amd_sched_priority priority); +void amdgpu_ring_priority_put(struct amdgpu_ring *ring, + enum amd_sched_priority priority); int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned ring_size, struct amdgpu_irq_src *irq_src, unsigned irq_type); diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 38e622ce06de..dbcaa2e1c5c7 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -170,4 +170,11 @@ void amd_sched_job_recovery(struct amd_gpu_scheduler *sched); bool amd_sched_dependency_optimized(struct dma_fence* fence, struct amd_sched_entity *entity); void amd_sched_job_kickout(struct amd_sched_job *s_job); + +static inline enum amd_sched_priority +amd_sched_get_job_priority(struct amd_sched_job *job) +{ + return (job->s_entity->rq - job->sched->sched_rq); +} + #endif -- cgit v1.2.3 From ad864d243826cedc53404a1c0db7d1e38ddceb84 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Tue, 10 Oct 2017 16:50:16 -0400 Subject: drm/amdgpu: Refactor amdgpu_cs_ib_vm_chunk and amdgpu_cs_ib_fill. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This enables old fence waiting before reservation lock is aquired which in turn is part of a bigger solution to deadlock happening when gpu reset with VRAM recovery accures during intensive rendering. Signed-off-by: Andrey Grodzovsky Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 112 ++++++++++++++++++--------------- 1 file changed, 61 insertions(+), 51 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index fe7dd44ac9fe..9166d5e1e557 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -845,15 +845,60 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_ring *ring = p->job->ring; - int i, r; + int i, j, r; + + for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { + + struct amdgpu_cs_chunk *chunk; + struct amdgpu_ib *ib; + struct drm_amdgpu_cs_chunk_ib *chunk_ib; + + chunk = &p->chunks[i]; + ib = &p->job->ibs[j]; + chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; + + if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) + continue; + + if (p->job->ring->funcs->parse_cs) { + struct amdgpu_bo_va_mapping *m; + struct amdgpu_bo *aobj = NULL; + uint64_t offset; + uint8_t *kptr; + + r = amdgpu_cs_find_mapping(p, chunk_ib->va_start, + &aobj, &m); + if (r) { + DRM_ERROR("IB va_start is invalid\n"); + return r; + } - /* Only for UVD/VCE VM emulation */ - if (ring->funcs->parse_cs) { - for (i = 0; i < p->job->num_ibs; i++) { - r = amdgpu_ring_parse_cs(ring, p, i); + if ((chunk_ib->va_start + chunk_ib->ib_bytes) > + (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { + DRM_ERROR("IB va_start+ib_bytes is invalid\n"); + return -EINVAL; + } + + /* the IB should be reserved at this point */ + r = amdgpu_bo_kmap(aobj, (void **)&kptr); + if (r) { + return r; + } + + offset = m->start * AMDGPU_GPU_PAGE_SIZE; + kptr += chunk_ib->va_start - offset; + + memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); + amdgpu_bo_kunmap(aobj); + + /* Only for UVD/VCE VM emulation */ + r = amdgpu_ring_parse_cs(ring, p, j); if (r) return r; + } + + j++; } if (p->job->vm) { @@ -919,54 +964,18 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, parser->job->ring = ring; - if (ring->funcs->parse_cs) { - struct amdgpu_bo_va_mapping *m; - struct amdgpu_bo *aobj = NULL; - uint64_t offset; - uint8_t *kptr; - - r = amdgpu_cs_find_mapping(parser, chunk_ib->va_start, - &aobj, &m); - if (r) { - DRM_ERROR("IB va_start is invalid\n"); - return r; - } - - if ((chunk_ib->va_start + chunk_ib->ib_bytes) > - (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { - DRM_ERROR("IB va_start+ib_bytes is invalid\n"); - return -EINVAL; - } - - /* the IB should be reserved at this point */ - r = amdgpu_bo_kmap(aobj, (void **)&kptr); - if (r) { - return r; - } - - offset = m->start * AMDGPU_GPU_PAGE_SIZE; - kptr += chunk_ib->va_start - offset; - - r = amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib); - if (r) { - DRM_ERROR("Failed to get ib !\n"); - return r; - } - - memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); - amdgpu_bo_kunmap(aobj); - } else { - r = amdgpu_ib_get(adev, vm, 0, ib); - if (r) { - DRM_ERROR("Failed to get ib !\n"); - return r; - } - + r = amdgpu_ib_get(adev, vm, + ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0, + ib); + if (r) { + DRM_ERROR("Failed to get ib !\n"); + return r; } ib->gpu_addr = chunk_ib->va_start; ib->length_dw = chunk_ib->ib_bytes / 4; ib->flags = chunk_ib->flags; + j++; } @@ -1212,6 +1221,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) goto out; } + r = amdgpu_cs_ib_fill(adev, &parser); + if (r) + goto out; + r = amdgpu_cs_parser_bos(&parser, data); if (r) { if (r == -ENOMEM) @@ -1222,9 +1235,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) } reserved_buffers = true; - r = amdgpu_cs_ib_fill(adev, &parser); - if (r) - goto out; r = amdgpu_cs_dependencies(adev, &parser); if (r) { -- cgit v1.2.3 From 0ae94444c08a0adf2fab4aab26be0646ee445a19 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Tue, 10 Oct 2017 16:50:17 -0400 Subject: drm/amdgpu: Move old fence waiting before reservation lock is aquired v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Helps avoiding deadlock during GPU reset. Added mutex to amdgpu_ctx to preserve order of fences on a ring. v2: Put waiting logic in a function in a seperate function in amdgpu_ctx.c Signed-off-by: Andrey Grodzovsky Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 10 ++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 30 ++++++++++++++++++++++++------ 3 files changed, 34 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 951c8db01412..76033e2cdba8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -738,6 +738,7 @@ struct amdgpu_ctx { bool preamble_presented; enum amd_sched_priority init_priority; enum amd_sched_priority override_priority; + struct mutex lock; }; struct amdgpu_ctx_mgr { @@ -760,9 +761,12 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id); + void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); + /* * file private structure */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9166d5e1e557..5de092eab0fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -90,6 +90,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) goto free_chunk; } + mutex_lock(&p->ctx->lock); + /* get chunks */ chunk_array_user = u64_to_user_ptr(cs->in.chunks); if (copy_from_user(chunk_array, chunk_array_user, @@ -737,8 +739,10 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, dma_fence_put(parser->fence); - if (parser->ctx) + if (parser->ctx) { + mutex_unlock(&parser->ctx->lock); amdgpu_ctx_put(parser->ctx); + } if (parser->bo_list) amdgpu_bo_list_put(parser->bo_list); @@ -895,9 +899,7 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, r = amdgpu_ring_parse_cs(ring, p, j); if (r) return r; - } - j++; } @@ -985,7 +987,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE)) return -EINVAL; - return 0; + return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->job->ring->idx); } static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index a78b03f65c69..4309820658c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -67,6 +67,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, if (!ctx->fences) return -ENOMEM; + mutex_init(&ctx->lock); + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { ctx->rings[i].sequence = 1; ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i]; @@ -126,6 +128,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) &ctx->rings[i].entity); amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr); + + mutex_destroy(&ctx->lock); } static int amdgpu_ctx_alloc(struct amdgpu_device *adev, @@ -296,12 +300,8 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, idx = seq & (amdgpu_sched_jobs - 1); other = cring->fences[idx]; - if (other) { - signed long r; - r = dma_fence_wait_timeout(other, true, MAX_SCHEDULE_TIMEOUT); - if (r < 0) - return r; - } + if (other) + BUG_ON(!dma_fence_is_signaled(other)); dma_fence_get(fence); @@ -372,6 +372,24 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, } } +int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id) +{ + struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id]; + unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1); + struct dma_fence *other = cring->fences[idx]; + + if (other) { + signed long r; + r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT); + if (r < 0) { + DRM_ERROR("Error (%ld) waiting for fence!\n", r); + return r; + } + } + + return 0; +} + void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) { mutex_init(&mgr->lock); -- cgit v1.2.3 From 396bcb41e035df7b98fb150ca950bf213e70ae7b Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 9 Oct 2017 14:45:09 +0200 Subject: drm/amdgpu: partial revert VRAM lost handling v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Keep blocking the CS, but revert everything else. Mapping BOs and info IOCTL are harmless and can still happen even when VRAM content ist lost. Signed-off-by: Christian König Reviewed-by: Nicolai Hähnle Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 11 ----------- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 5 ----- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 14 ++++++++++---- 3 files changed, 10 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 5de092eab0fa..0c07df72743c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1272,16 +1272,12 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, { union drm_amdgpu_wait_cs *wait = data; struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_fpriv *fpriv = filp->driver_priv; unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); struct amdgpu_ring *ring = NULL; struct amdgpu_ctx *ctx; struct dma_fence *fence; long r; - if (amdgpu_kms_vram_lost(adev, fpriv)) - return -ENODEV; - ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); if (ctx == NULL) return -EINVAL; @@ -1350,16 +1346,12 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_fpriv *fpriv = filp->driver_priv; union drm_amdgpu_fence_to_handle *info = data; struct dma_fence *fence; struct drm_syncobj *syncobj; struct sync_file *sync_file; int fd, r; - if (amdgpu_kms_vram_lost(adev, fpriv)) - return -ENODEV; - fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence); if (IS_ERR(fence)) return PTR_ERR(fence); @@ -1521,15 +1513,12 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_fpriv *fpriv = filp->driver_priv; union drm_amdgpu_wait_fences *wait = data; uint32_t fence_count = wait->in.fence_count; struct drm_amdgpu_fence *fences_user; struct drm_amdgpu_fence *fences; int r; - if (amdgpu_kms_vram_lost(adev, fpriv)) - return -ENODEV; /* Get the fences from userspace */ fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence), GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 21e99366cab3..fb72edc4c026 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -579,11 +579,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, args->operation); return -EINVAL; } - if ((args->operation == AMDGPU_VA_OP_MAP) || - (args->operation == AMDGPU_VA_OP_REPLACE)) { - if (amdgpu_kms_vram_lost(adev, fpriv)) - return -ENODEV; - } INIT_LIST_HEAD(&list); INIT_LIST_HEAD(&duplicates); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 82e8d43b235a..f759836d10ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -270,7 +270,6 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_fpriv *fpriv = filp->driver_priv; struct drm_amdgpu_info *info = data; struct amdgpu_mode_info *minfo = &adev->mode_info; void __user *out = (void __user *)(uintptr_t)info->return_pointer; @@ -283,8 +282,6 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file if (!info->return_size || !info->return_pointer) return -EINVAL; - if (amdgpu_kms_vram_lost(adev, fpriv)) - return -ENODEV; switch (info->query) { case AMDGPU_INFO_ACCEL_WORKING: @@ -792,10 +789,19 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev) vga_switcheroo_process_delayed_switch(); } +/** + * amdgpu_kms_vram_lost - check if VRAM was lost for this client + * + * @adev: amdgpu device + * @fpriv: client private + * + * Check if all CS is blocked for the client because of lost VRAM + */ bool amdgpu_kms_vram_lost(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv) { - return fpriv->vram_lost_counter != atomic_read(&adev->vram_lost_counter); + return fpriv->vram_lost_counter != + atomic_read(&adev->vram_lost_counter); } /** -- cgit v1.2.3 From 14e47f93c5cc4a1237dbacc137e174706093b69c Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 9 Oct 2017 15:04:41 +0200 Subject: drm/amdgpu: keep copy of VRAM lost counter in job MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of reading the current counter from fpriv. Signed-off-by: Christian König Reviewed-by: Nicolai Hähnle Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 13 +++++++------ 3 files changed, 10 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 76033e2cdba8..aa70f8c045b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1125,6 +1125,7 @@ struct amdgpu_job { uint32_t gds_base, gds_size; uint32_t gws_base, gws_size; uint32_t oa_base, oa_size; + uint32_t vram_lost_counter; /* user fence handling */ uint64_t uf_addr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 0c07df72743c..9daa7cac0ffb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -172,6 +172,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) if (ret) goto free_all_kdata; + p->job->vram_lost_counter = fpriv->vram_lost_counter; + if (p->uf_entry.robj) p->job->uf_addr = uf_offset; kfree(chunk_array); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 83d13431cbdd..4f2b5acc8743 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -61,6 +61,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, (*job)->vm = vm; (*job)->ibs = (void *)&(*job)[1]; (*job)->num_ibs = num_ibs; + (*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter); amdgpu_sync_create(&(*job)->sync); amdgpu_sync_create(&(*job)->dep_sync); @@ -180,8 +181,8 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) { struct dma_fence *fence = NULL; + struct amdgpu_device *adev; struct amdgpu_job *job; - struct amdgpu_fpriv *fpriv = NULL; int r; if (!sched_job) { @@ -189,17 +190,17 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) return NULL; } job = to_amdgpu_job(sched_job); + adev = job->adev; BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL)); trace_amdgpu_sched_run_job(job); - if (job->vm) - fpriv = container_of(job->vm, struct amdgpu_fpriv, vm); /* skip ib schedule when vram is lost */ - if (fpriv && amdgpu_kms_vram_lost(job->adev, fpriv)) + if (job->vram_lost_counter != atomic_read(&adev->vram_lost_counter)) { DRM_ERROR("Skip scheduling IBs!\n"); - else { - r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, &fence); + } else { + r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, + &fence); if (r) DRM_ERROR("Error scheduling IBs (%d)\n", r); } -- cgit v1.2.3 From e55f2b646df3318e24f12b8388ab6e5cccb3e92d Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 9 Oct 2017 15:18:43 +0200 Subject: drm/amdgpu: move the VRAM lost counter per context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of per device track the VRAM lost per context and return ECANCELED instead of ENODEV. Signed-off-by: Christian König Reviewed-by: Nicolai Hähnle Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 ++---- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 9 +++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 16 ---------------- 4 files changed, 8 insertions(+), 24 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index aa70f8c045b1..67b864436be1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -732,10 +732,11 @@ struct amdgpu_ctx { struct amdgpu_device *adev; struct amdgpu_queue_mgr queue_mgr; unsigned reset_counter; + uint32_t vram_lost_counter; spinlock_t ring_lock; struct dma_fence **fences; struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS]; - bool preamble_presented; + bool preamble_presented; enum amd_sched_priority init_priority; enum amd_sched_priority override_priority; struct mutex lock; @@ -778,7 +779,6 @@ struct amdgpu_fpriv { struct mutex bo_list_lock; struct idr bo_list_handles; struct amdgpu_ctx_mgr ctx_mgr; - u32 vram_lost_counter; }; /* @@ -1860,8 +1860,6 @@ static inline bool amdgpu_has_atpx(void) { return false; } extern const struct drm_ioctl_desc amdgpu_ioctls_kms[]; extern const int amdgpu_max_kms_ioctl; -bool amdgpu_kms_vram_lost(struct amdgpu_device *adev, - struct amdgpu_fpriv *fpriv); int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags); void amdgpu_driver_unload_kms(struct drm_device *dev); void amdgpu_driver_lastclose_kms(struct drm_device *dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9daa7cac0ffb..b355189533d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -172,7 +172,11 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) if (ret) goto free_all_kdata; - p->job->vram_lost_counter = fpriv->vram_lost_counter; + p->job->vram_lost_counter = atomic_read(&p->adev->vram_lost_counter); + if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) { + ret = -ECANCELED; + goto free_all_kdata; + } if (p->uf_entry.robj) p->job->uf_addr = uf_offset; @@ -1205,7 +1209,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_fpriv *fpriv = filp->driver_priv; union drm_amdgpu_cs *cs = data; struct amdgpu_cs_parser parser = {}; bool reserved_buffers = false; @@ -1213,8 +1216,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (!adev->accel_working) return -EBUSY; - if (amdgpu_kms_vram_lost(adev, fpriv)) - return -ENODEV; parser.adev = adev; parser.filp = filp; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 4309820658c4..c184468e2b2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -75,6 +75,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, } ctx->reset_counter = atomic_read(&adev->gpu_reset_counter); + ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter); ctx->init_priority = priority; ctx->override_priority = AMD_SCHED_PRIORITY_UNSET; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index f759836d10ef..ff1a416a66c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -789,21 +789,6 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev) vga_switcheroo_process_delayed_switch(); } -/** - * amdgpu_kms_vram_lost - check if VRAM was lost for this client - * - * @adev: amdgpu device - * @fpriv: client private - * - * Check if all CS is blocked for the client because of lost VRAM - */ -bool amdgpu_kms_vram_lost(struct amdgpu_device *adev, - struct amdgpu_fpriv *fpriv) -{ - return fpriv->vram_lost_counter != - atomic_read(&adev->vram_lost_counter); -} - /** * amdgpu_driver_open_kms - drm callback for open * @@ -860,7 +845,6 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) amdgpu_ctx_mgr_init(&fpriv->ctx_mgr); - fpriv->vram_lost_counter = atomic_read(&adev->vram_lost_counter); file_priv->driver_priv = fpriv; out_suspend: -- cgit v1.2.3 From 7a0a48ddf63bc9944b9690c6fa043ea4305f7f79 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 9 Oct 2017 15:51:10 +0200 Subject: drm/amdgpu: set -ECANCELED when dropping jobs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit And return from the wait functions the fence error code. Signed-off-by: Christian König Reviewed-by: Nicolai Hähnle Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 7 ++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b355189533d2..2ae5d523ca10 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1298,6 +1298,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, r = PTR_ERR(fence); else if (fence) { r = dma_fence_wait_timeout(fence, true, timeout); + if (r > 0 && fence->error) + r = fence->error; dma_fence_put(fence); } else r = 1; @@ -1435,6 +1437,9 @@ static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev, if (r == 0) break; + + if (fence->error) + return fence->error; } memset(wait, 0, sizeof(*wait)); @@ -1495,7 +1500,7 @@ out: wait->out.status = (r > 0); wait->out.first_signaled = first; /* set return value 0 to indicate success */ - r = 0; + r = array[first]->error; err_free_fence_array: for (i = 0; i < fence_count; i++) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 4f2b5acc8743..a8357885776e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -197,6 +197,7 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) trace_amdgpu_sched_run_job(job); /* skip ib schedule when vram is lost */ if (job->vram_lost_counter != atomic_read(&adev->vram_lost_counter)) { + dma_fence_set_error(&job->base.s_fence->finished, -ECANCELED); DRM_ERROR("Skip scheduling IBs!\n"); } else { r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, -- cgit v1.2.3 From 26eedf6daec4e7937c8f0f1dde5e9b8e3dcebfd3 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Wed, 11 Oct 2017 17:02:02 -0400 Subject: drm/amdgpu: Fix extra call to amdgpu_ctx_put. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In amdgpu_cs_parser_init() in case of error handling amdgpu_ctx_put() is called without setting p->ctx to NULL after that, later amdgpu_cs_parser_fini() also calls amdgpu_ctx_put() again and mess up the reference count. Signed-off-by: Andrey Grodzovsky Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 2ae5d523ca10..dfd37785563f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -97,7 +97,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) if (copy_from_user(chunk_array, chunk_array_user, sizeof(uint64_t)*cs->in.num_chunks)) { ret = -EFAULT; - goto put_ctx; + goto free_chunk; } p->nchunks = cs->in.num_chunks; @@ -105,7 +105,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) GFP_KERNEL); if (!p->chunks) { ret = -ENOMEM; - goto put_ctx; + goto free_chunk; } for (i = 0; i < p->nchunks; i++) { @@ -191,8 +191,6 @@ free_partial_kdata: kfree(p->chunks); p->chunks = NULL; p->nchunks = 0; -put_ctx: - amdgpu_ctx_put(p->ctx); free_chunk: kfree(chunk_array); -- cgit v1.2.3 From c5795c555bbaca51192ffc6164bb85845ecdf717 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 12 Oct 2017 12:16:33 +0200 Subject: drm/amdgpu: minor CS optimization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We only need to loop over all IBs for old UVD/VCE command stream patching. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 37 +++++++++++++++++----------------- 1 file changed, 19 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index dfd37785563f..52dd78ee8fd0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -853,36 +853,37 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_ring *ring = p->job->ring; - int i, j, r; - - for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { - - struct amdgpu_cs_chunk *chunk; - struct amdgpu_ib *ib; - struct drm_amdgpu_cs_chunk_ib *chunk_ib; - - chunk = &p->chunks[i]; - ib = &p->job->ibs[j]; - chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; + int r; - if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) - continue; + /* Only for UVD/VCE VM emulation */ + if (p->job->ring->funcs->parse_cs) { + unsigned i, j; - if (p->job->ring->funcs->parse_cs) { + for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { + struct drm_amdgpu_cs_chunk_ib *chunk_ib; struct amdgpu_bo_va_mapping *m; struct amdgpu_bo *aobj = NULL; + struct amdgpu_cs_chunk *chunk; + struct amdgpu_ib *ib; uint64_t offset; uint8_t *kptr; + chunk = &p->chunks[i]; + ib = &p->job->ibs[j]; + chunk_ib = chunk->kdata; + + if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) + continue; + r = amdgpu_cs_find_mapping(p, chunk_ib->va_start, - &aobj, &m); + &aobj, &m); if (r) { DRM_ERROR("IB va_start is invalid\n"); return r; } if ((chunk_ib->va_start + chunk_ib->ib_bytes) > - (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { + (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { DRM_ERROR("IB va_start+ib_bytes is invalid\n"); return -EINVAL; } @@ -899,12 +900,12 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); amdgpu_bo_kunmap(aobj); - /* Only for UVD/VCE VM emulation */ r = amdgpu_ring_parse_cs(ring, p, j); if (r) return r; + + j++; } - j++; } if (p->job->vm) { -- cgit v1.2.3 From c70b78a71e9a283240f72dfdfff8fd2388db51da Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Mon, 16 Oct 2017 20:02:08 +0800 Subject: drm/amdgpu:fix duplicated setting job's vram_lost MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 52dd78ee8fd0..32cf83e2f2d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -172,7 +172,6 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) if (ret) goto free_all_kdata; - p->job->vram_lost_counter = atomic_read(&p->adev->vram_lost_counter); if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) { ret = -ECANCELED; goto free_all_kdata; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index a8357885776e..0cfc68db575b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -61,11 +61,11 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, (*job)->vm = vm; (*job)->ibs = (void *)&(*job)[1]; (*job)->num_ibs = num_ibs; - (*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter); amdgpu_sync_create(&(*job)->sync); amdgpu_sync_create(&(*job)->dep_sync); amdgpu_sync_create(&(*job)->sched_sync); + (*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter); return 0; } -- cgit v1.2.3 From 4b6b691ee38abae8842aed61d442dfb315c45789 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 16 Oct 2017 10:32:04 +0200 Subject: drm/amdgpu: linear validate first then bind to GART MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For VM emulation for old UVD/VCE we need to validate the BO with linear VRAM flag set first and then eventually bind it to GART. Validating with linear VRAM flag set can move the BO to GART making UVD/VCE read/write from an unbound GART BO. Signed-off-by: Christian König Reviewed-by: Alex Deucher CC: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 32cf83e2f2d9..f7fceb63413c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1582,14 +1582,14 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket) return -EINVAL; - r = amdgpu_ttm_bind(&(*bo)->tbo, &(*bo)->tbo.mem); - if (unlikely(r)) - return r; - - if ((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) - return 0; + if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { + (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains); + r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, false, + false); + if (r) + return r; + } - (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains); - return ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, false, false); + return amdgpu_ttm_bind(&(*bo)->tbo, &(*bo)->tbo.mem); } -- cgit v1.2.3