summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristian König <christian.koenig@amd.com>2019-11-11 14:42:13 +0100
committerChristian König <christian.koenig@amd.com>2020-02-12 13:03:27 +0100
commit1ec39923ef24f55eaa12cbdbdd9bb7e4ba29f6a7 (patch)
treedb1e8f34b59de4896c52a252fd785cdef0510907
parentb1df3a2b24a917f8853d43fe9683c0e360d2c33a (diff)
downloadlinux-1ec39923ef24f55eaa12cbdbdd9bb7e4ba29f6a7.tar.bz2
drm/ttm: rework BO delayed delete. v2
This patch reworks the whole delayed deletion of BOs which aren't idle. Instead of having two counters for the BO structure we resurrect the BO when we find that a deleted BO is not idle yet. This has many advantages, especially that we don't need to increment/decrement the BOs reference counter any more when it moves on the LRUs. v2: remove duplicate ttm_tt_destroy, fix holde lock for LRU move Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: xinhui pan <xinhui.pan@amd.com> Link: https://patchwork.freedesktop.org/patch/352912/
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo.c217
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo_util.c1
-rw-r--r--include/drm/ttm/ttm_bo_api.h11
3 files changed, 97 insertions, 132 deletions
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index e12fc2c2d165..28fd91ca6806 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -145,26 +145,6 @@ static inline uint32_t ttm_bo_type_flags(unsigned type)
return 1 << (type);
}
-static void ttm_bo_release_list(struct kref *list_kref)
-{
- struct ttm_buffer_object *bo =
- container_of(list_kref, struct ttm_buffer_object, list_kref);
- size_t acc_size = bo->acc_size;
-
- BUG_ON(kref_read(&bo->list_kref));
- BUG_ON(kref_read(&bo->kref));
- BUG_ON(bo->mem.mm_node != NULL);
- BUG_ON(!list_empty(&bo->lru));
- BUG_ON(!list_empty(&bo->ddestroy));
- ttm_tt_destroy(bo->ttm);
- atomic_dec(&ttm_bo_glob.bo_count);
- dma_fence_put(bo->moving);
- if (!ttm_bo_uses_embedded_gem_object(bo))
- dma_resv_fini(&bo->base._resv);
- bo->destroy(bo);
- ttm_mem_global_free(&ttm_mem_glob, acc_size);
-}
-
static void ttm_bo_add_mem_to_lru(struct ttm_buffer_object *bo,
struct ttm_mem_reg *mem)
{
@@ -181,21 +161,14 @@ static void ttm_bo_add_mem_to_lru(struct ttm_buffer_object *bo,
man = &bdev->man[mem->mem_type];
list_add_tail(&bo->lru, &man->lru[bo->priority]);
- kref_get(&bo->list_kref);
if (!(man->flags & TTM_MEMTYPE_FLAG_FIXED) && bo->ttm &&
!(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG |
TTM_PAGE_FLAG_SWAPPED))) {
list_add_tail(&bo->swap, &ttm_bo_glob.swap_lru[bo->priority]);
- kref_get(&bo->list_kref);
}
}
-static void ttm_bo_ref_bug(struct kref *list_kref)
-{
- BUG();
-}
-
static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
{
struct ttm_bo_device *bdev = bo->bdev;
@@ -203,12 +176,10 @@ static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
if (!list_empty(&bo->swap)) {
list_del_init(&bo->swap);
- kref_put(&bo->list_kref, ttm_bo_ref_bug);
notify = true;
}
if (!list_empty(&bo->lru)) {
list_del_init(&bo->lru);
- kref_put(&bo->list_kref, ttm_bo_ref_bug);
notify = true;
}
@@ -421,8 +392,7 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
BUG_ON(!dma_resv_trylock(&bo->base._resv));
r = dma_resv_copy_fences(&bo->base._resv, bo->base.resv);
- if (r)
- dma_resv_unlock(&bo->base._resv);
+ dma_resv_unlock(&bo->base._resv);
return r;
}
@@ -449,68 +419,10 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo)
rcu_read_unlock();
}
-static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
-{
- struct ttm_bo_device *bdev = bo->bdev;
- int ret;
-
- ret = ttm_bo_individualize_resv(bo);
- if (ret) {
- /* Last resort, if we fail to allocate memory for the
- * fences block for the BO to become idle
- */
- dma_resv_wait_timeout_rcu(bo->base.resv, true, false,
- 30 * HZ);
- spin_lock(&ttm_bo_glob.lru_lock);
- goto error;
- }
-
- spin_lock(&ttm_bo_glob.lru_lock);
- ret = dma_resv_trylock(bo->base.resv) ? 0 : -EBUSY;
- if (!ret) {
- if (dma_resv_test_signaled_rcu(&bo->base._resv, true)) {
- ttm_bo_del_from_lru(bo);
- spin_unlock(&ttm_bo_glob.lru_lock);
- if (bo->base.resv != &bo->base._resv)
- dma_resv_unlock(&bo->base._resv);
-
- ttm_bo_cleanup_memtype_use(bo);
- dma_resv_unlock(bo->base.resv);
- return;
- }
-
- ttm_bo_flush_all_fences(bo);
-
- /*
- * Make NO_EVICT bos immediately available to
- * shrinkers, now that they are queued for
- * destruction.
- */
- if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT) {
- bo->mem.placement &= ~TTM_PL_FLAG_NO_EVICT;
- ttm_bo_move_to_lru_tail(bo, NULL);
- }
-
- dma_resv_unlock(bo->base.resv);
- }
- if (bo->base.resv != &bo->base._resv) {
- ttm_bo_flush_all_fences(bo);
- dma_resv_unlock(&bo->base._resv);
- }
-
-error:
- kref_get(&bo->list_kref);
- list_add_tail(&bo->ddestroy, &bdev->ddestroy);
- spin_unlock(&ttm_bo_glob.lru_lock);
-
- schedule_delayed_work(&bdev->wq,
- ((HZ / 100) < 1) ? 1 : HZ / 100);
-}
-
/**
* function ttm_bo_cleanup_refs
- * If bo idle, remove from delayed- and lru lists, and unref.
- * If not idle, do nothing.
+ * If bo idle, remove from lru lists, and unref.
+ * If not idle, block if possible.
*
* Must be called with lru_lock and reservation held, this function
* will drop the lru lock and optionally the reservation lock before returning.
@@ -572,14 +484,14 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
ttm_bo_del_from_lru(bo);
list_del_init(&bo->ddestroy);
- kref_put(&bo->list_kref, ttm_bo_ref_bug);
-
spin_unlock(&ttm_bo_glob.lru_lock);
ttm_bo_cleanup_memtype_use(bo);
if (unlock_resv)
dma_resv_unlock(bo->base.resv);
+ ttm_bo_put(bo);
+
return 0;
}
@@ -601,8 +513,9 @@ static bool ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
bo = list_first_entry(&bdev->ddestroy, struct ttm_buffer_object,
ddestroy);
- kref_get(&bo->list_kref);
list_move_tail(&bo->ddestroy, &removed);
+ if (!ttm_bo_get_unless_zero(bo))
+ continue;
if (remove_all || bo->base.resv != &bo->base._resv) {
spin_unlock(&glob->lru_lock);
@@ -617,7 +530,7 @@ static bool ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
spin_unlock(&glob->lru_lock);
}
- kref_put(&bo->list_kref, ttm_bo_release_list);
+ ttm_bo_put(bo);
spin_lock(&glob->lru_lock);
}
list_splice_tail(&removed, &bdev->ddestroy);
@@ -643,16 +556,68 @@ static void ttm_bo_release(struct kref *kref)
container_of(kref, struct ttm_buffer_object, kref);
struct ttm_bo_device *bdev = bo->bdev;
struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type];
+ size_t acc_size = bo->acc_size;
+ int ret;
- if (bo->bdev->driver->release_notify)
- bo->bdev->driver->release_notify(bo);
+ if (!bo->deleted) {
+ if (bo->bdev->driver->release_notify)
+ bo->bdev->driver->release_notify(bo);
- drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node);
- ttm_mem_io_lock(man, false);
- ttm_mem_io_free_vm(bo);
- ttm_mem_io_unlock(man);
- ttm_bo_cleanup_refs_or_queue(bo);
- kref_put(&bo->list_kref, ttm_bo_release_list);
+ drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node);
+ ttm_mem_io_lock(man, false);
+ ttm_mem_io_free_vm(bo);
+ ttm_mem_io_unlock(man);
+
+ ret = ttm_bo_individualize_resv(bo);
+ if (ret) {
+ /* Last resort, if we fail to allocate memory for the
+ * fences block for the BO to become idle
+ */
+ dma_resv_wait_timeout_rcu(bo->base.resv, true, false,
+ 30 * HZ);
+ }
+ }
+
+ if (!dma_resv_test_signaled_rcu(bo->base.resv, true)) {
+ /* The BO is not idle, resurrect it for delayed destroy */
+ ttm_bo_flush_all_fences(bo);
+ bo->deleted = true;
+
+ spin_lock(&ttm_bo_glob.lru_lock);
+
+ /*
+ * Make NO_EVICT bos immediately available to
+ * shrinkers, now that they are queued for
+ * destruction.
+ */
+ if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT) {
+ bo->mem.placement &= ~TTM_PL_FLAG_NO_EVICT;
+ ttm_bo_move_to_lru_tail(bo, NULL);
+ }
+
+ kref_init(&bo->kref);
+ list_add_tail(&bo->ddestroy, &bdev->ddestroy);
+ spin_unlock(&ttm_bo_glob.lru_lock);
+
+ schedule_delayed_work(&bdev->wq,
+ ((HZ / 100) < 1) ? 1 : HZ / 100);
+ return;
+ }
+
+ spin_lock(&ttm_bo_glob.lru_lock);
+ ttm_bo_del_from_lru(bo);
+ list_del(&bo->ddestroy);
+ spin_unlock(&ttm_bo_glob.lru_lock);
+
+ ttm_bo_cleanup_memtype_use(bo);
+
+ BUG_ON(bo->mem.mm_node != NULL);
+ atomic_dec(&ttm_bo_glob.bo_count);
+ dma_fence_put(bo->moving);
+ if (!ttm_bo_uses_embedded_gem_object(bo))
+ dma_resv_fini(&bo->base._resv);
+ bo->destroy(bo);
+ ttm_mem_global_free(&ttm_mem_glob, acc_size);
}
void ttm_bo_put(struct ttm_buffer_object *bo)
@@ -755,8 +720,7 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
if (bo->base.resv == ctx->resv) {
dma_resv_assert_held(bo->base.resv);
- if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
- || !list_empty(&bo->ddestroy))
+ if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT || bo->deleted)
ret = true;
*locked = false;
if (busy)
@@ -837,6 +801,11 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
dma_resv_unlock(bo->base.resv);
continue;
}
+ if (!ttm_bo_get_unless_zero(bo)) {
+ if (locked)
+ dma_resv_unlock(bo->base.resv);
+ continue;
+ }
break;
}
@@ -848,21 +817,19 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
}
if (!bo) {
- if (busy_bo)
- kref_get(&busy_bo->list_kref);
+ if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
+ busy_bo = NULL;
spin_unlock(&ttm_bo_glob.lru_lock);
ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
if (busy_bo)
- kref_put(&busy_bo->list_kref, ttm_bo_release_list);
+ ttm_bo_put(busy_bo);
return ret;
}
- kref_get(&bo->list_kref);
-
- if (!list_empty(&bo->ddestroy)) {
+ if (bo->deleted) {
ret = ttm_bo_cleanup_refs(bo, ctx->interruptible,
ctx->no_wait_gpu, locked);
- kref_put(&bo->list_kref, ttm_bo_release_list);
+ ttm_bo_put(bo);
return ret;
}
@@ -872,7 +839,7 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
if (locked)
ttm_bo_unreserve(bo);
- kref_put(&bo->list_kref, ttm_bo_release_list);
+ ttm_bo_put(bo);
return ret;
}
@@ -1284,7 +1251,6 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev,
bo->destroy = destroy ? destroy : ttm_bo_default_destroy;
kref_init(&bo->kref);
- kref_init(&bo->list_kref);
INIT_LIST_HEAD(&bo->lru);
INIT_LIST_HEAD(&bo->ddestroy);
INIT_LIST_HEAD(&bo->swap);
@@ -1804,11 +1770,18 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx)
spin_lock(&glob->lru_lock);
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
list_for_each_entry(bo, &glob->swap_lru[i], swap) {
- if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
- NULL)) {
- ret = 0;
- break;
+ if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
+ NULL))
+ continue;
+
+ if (!ttm_bo_get_unless_zero(bo)) {
+ if (locked)
+ dma_resv_unlock(bo->base.resv);
+ continue;
}
+
+ ret = 0;
+ break;
}
if (!ret)
break;
@@ -1819,11 +1792,9 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx)
return ret;
}
- kref_get(&bo->list_kref);
-
- if (!list_empty(&bo->ddestroy)) {
+ if (bo->deleted) {
ret = ttm_bo_cleanup_refs(bo, false, false, locked);
- kref_put(&bo->list_kref, ttm_bo_release_list);
+ ttm_bo_put(bo);
return ret;
}
@@ -1877,7 +1848,7 @@ out:
*/
if (locked)
dma_resv_unlock(bo->base.resv);
- kref_put(&bo->list_kref, ttm_bo_release_list);
+ ttm_bo_put(bo);
return ret;
}
EXPORT_SYMBOL(ttm_bo_swapout);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 86d152472f38..c8e359ded1df 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -507,7 +507,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
fbo->base.moving = NULL;
drm_vma_node_reset(&fbo->base.base.vma_node);
- kref_init(&fbo->base.list_kref);
kref_init(&fbo->base.kref);
fbo->base.destroy = &ttm_transfered_destroy;
fbo->base.acc_size = 0;
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 66ca49db9633..b9bc1b00142e 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -135,18 +135,14 @@ struct ttm_tt;
* @num_pages: Actual number of pages.
* @acc_size: Accounted size for this object.
* @kref: Reference count of this buffer object. When this refcount reaches
- * zero, the object is put on the delayed delete list.
- * @list_kref: List reference count of this buffer object. This member is
- * used to avoid destruction while the buffer object is still on a list.
- * Lru lists may keep one refcount, the delayed delete list, and kref != 0
- * keeps one refcount. When this refcount reaches zero,
- * the object is destroyed.
+ * zero, the object is destroyed or put on the delayed delete list.
* @mem: structure describing current placement.
* @persistent_swap_storage: Usually the swap storage is deleted for buffers
* pinned in physical memory. If this behaviour is not desired, this member
* holds a pointer to a persistent shmem object.
* @ttm: TTM structure holding system pages.
* @evicted: Whether the object was evicted without user-space knowing.
+ * @deleted: True if the object is only a zombie and already deleted.
* @lru: List head for the lru list.
* @ddestroy: List head for the delayed destroy list.
* @swap: List head for swap LRU list.
@@ -183,9 +179,7 @@ struct ttm_buffer_object {
/**
* Members not needing protection.
*/
-
struct kref kref;
- struct kref list_kref;
/**
* Members protected by the bo::resv::reserved lock.
@@ -195,6 +189,7 @@ struct ttm_buffer_object {
struct file *persistent_swap_storage;
struct ttm_tt *ttm;
bool evicted;
+ bool deleted;
/**
* Members protected by the bdev::lru_lock.