From 403797925768d9fa870f5b1ebcd20016b397083b Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Tue, 22 Jun 2021 12:23:39 -0400 Subject: drm/ttm: Fix multihop assert on eviction. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problem: Under memory pressure when GTT domain is almost full multihop assert will come up when trying to evict LRU BO from VRAM to SYSTEM. Fix: Don't assert on multihop error in evict code but rather do a retry as we do in ttm_bo_move_buffer Signed-off-by: Andrey Grodzovsky Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20210622162339.761651-6-andrey.grodzovsky@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 63 ++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 29 deletions(-) (limited to 'drivers/gpu/drm/ttm') diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index db53fecca696..6c78149360c4 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -485,6 +485,31 @@ void ttm_bo_unlock_delayed_workqueue(struct ttm_device *bdev, int resched) } EXPORT_SYMBOL(ttm_bo_unlock_delayed_workqueue); +static int ttm_bo_bounce_temp_buffer(struct ttm_buffer_object *bo, + struct ttm_resource **mem, + struct ttm_operation_ctx *ctx, + struct ttm_place *hop) +{ + struct ttm_placement hop_placement; + struct ttm_resource *hop_mem; + int ret; + + hop_placement.num_placement = hop_placement.num_busy_placement = 1; + hop_placement.placement = hop_placement.busy_placement = hop; + + /* find space in the bounce domain */ + ret = ttm_bo_mem_space(bo, &hop_placement, &hop_mem, ctx); + if (ret) + return ret; + /* move to the bounce domain */ + ret = ttm_bo_handle_move_mem(bo, hop_mem, false, ctx, NULL); + if (ret) { + ttm_resource_free(bo, &hop_mem); + return ret; + } + return 0; +} + static int ttm_bo_evict(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx) { @@ -524,12 +549,17 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, goto out; } +bounce: ret = ttm_bo_handle_move_mem(bo, evict_mem, true, ctx, &hop); - if (unlikely(ret)) { - WARN(ret == -EMULTIHOP, "Unexpected multihop in eviction - likely driver bug\n"); - if (ret != -ERESTARTSYS) + if (ret == -EMULTIHOP) { + ret = ttm_bo_bounce_temp_buffer(bo, &evict_mem, ctx, &hop); + if (ret) { pr_err("Buffer eviction failed\n"); - ttm_resource_free(bo, &evict_mem); + ttm_resource_free(bo, &evict_mem); + goto out; + } + /* try and move to final place now. */ + goto bounce; } out: return ret; @@ -844,31 +874,6 @@ error: } EXPORT_SYMBOL(ttm_bo_mem_space); -static int ttm_bo_bounce_temp_buffer(struct ttm_buffer_object *bo, - struct ttm_resource **mem, - struct ttm_operation_ctx *ctx, - struct ttm_place *hop) -{ - struct ttm_placement hop_placement; - struct ttm_resource *hop_mem; - int ret; - - hop_placement.num_placement = hop_placement.num_busy_placement = 1; - hop_placement.placement = hop_placement.busy_placement = hop; - - /* find space in the bounce domain */ - ret = ttm_bo_mem_space(bo, &hop_placement, &hop_mem, ctx); - if (ret) - return ret; - /* move to the bounce domain */ - ret = ttm_bo_handle_move_mem(bo, hop_mem, false, ctx, NULL); - if (ret) { - ttm_resource_free(bo, &hop_mem); - return ret; - } - return 0; -} - static int ttm_bo_move_buffer(struct ttm_buffer_object *bo, struct ttm_placement *placement, struct ttm_operation_ctx *ctx) -- cgit v1.2.3 From ae1bef72c20f9231898e2f5595751a2635d49db8 Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Tue, 22 Jun 2021 12:23:34 -0400 Subject: drm/ttm: add TTM_PL_FLAG_TEMPORARY flag v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sometimes drivers need to use bounce buffers to evict BOs. While those reside in some domain they are not necessarily suitable for CS. Add a flag so that drivers can note that a bounce buffers needs to be reallocated during validation. v2: add detailed comments v3 (chk): merge commits and rework commit message Suggested-by: Christian König Signed-off-by: Lang Yu Signed-off-by: Christian König Signed-off-by: Andrey Grodzovsky Acked-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20210622162339.761651-1-andrey.grodzovsky@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 3 +++ include/drm/ttm/ttm_placement.h | 7 +++++-- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/ttm') diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 6c78149360c4..5a2dc712c632 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -918,6 +918,9 @@ static bool ttm_bo_places_compat(const struct ttm_place *places, { unsigned i; + if (mem->placement & TTM_PL_FLAG_TEMPORARY) + return false; + for (i = 0; i < num_placement; i++) { const struct ttm_place *heap = &places[i]; diff --git a/include/drm/ttm/ttm_placement.h b/include/drm/ttm/ttm_placement.h index aa6ba4d0cf78..8995c9e4ec1b 100644 --- a/include/drm/ttm/ttm_placement.h +++ b/include/drm/ttm/ttm_placement.h @@ -47,8 +47,11 @@ * top of the memory area, instead of the bottom. */ -#define TTM_PL_FLAG_CONTIGUOUS (1 << 19) -#define TTM_PL_FLAG_TOPDOWN (1 << 22) +#define TTM_PL_FLAG_CONTIGUOUS (1 << 0) +#define TTM_PL_FLAG_TOPDOWN (1 << 1) + +/* For multihop handling */ +#define TTM_PL_FLAG_TEMPORARY (1 << 2) /** * struct ttm_place -- cgit v1.2.3 From 9e5c772954406829e928dbe59891d08938ead04b Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Wed, 14 Jul 2021 14:54:19 +0000 Subject: drm/ttm: add a check against null pointer dereference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When calling ttm_range_man_fini(), 'man' may be uninitialized, which may cause a null pointer dereference bug. Fix this by checking if it is a null pointer. This log reveals it: [ 7.902580 ] BUG: kernel NULL pointer dereference, address: 0000000000000058 [ 7.905721 ] RIP: 0010:ttm_range_man_fini+0x40/0x160 [ 7.911826 ] Call Trace: [ 7.911826 ] radeon_ttm_fini+0x167/0x210 [ 7.911826 ] radeon_bo_fini+0x15/0x40 [ 7.913767 ] rs400_fini+0x55/0x80 [ 7.914358 ] radeon_device_fini+0x3c/0x140 [ 7.914358 ] radeon_driver_unload_kms+0x5c/0xe0 [ 7.914358 ] radeon_driver_load_kms+0x13a/0x200 [ 7.914358 ] ? radeon_driver_unload_kms+0xe0/0xe0 [ 7.914358 ] drm_dev_register+0x1db/0x290 [ 7.914358 ] radeon_pci_probe+0x16a/0x230 [ 7.914358 ] local_pci_probe+0x4a/0xb0 Signed-off-by: Zheyu Ma Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/1626274459-8148-1-git-send-email-zheyuma97@gmail.com Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_range_manager.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/ttm') diff --git a/drivers/gpu/drm/ttm/ttm_range_manager.c b/drivers/gpu/drm/ttm/ttm_range_manager.c index 03395386e8a7..f4b08a8705b3 100644 --- a/drivers/gpu/drm/ttm/ttm_range_manager.c +++ b/drivers/gpu/drm/ttm/ttm_range_manager.c @@ -181,6 +181,9 @@ int ttm_range_man_fini(struct ttm_device *bdev, struct drm_mm *mm = &rman->mm; int ret; + if (!man) + return 0; + ttm_resource_manager_set_used(man, false); ret = ttm_resource_manager_evict_all(bdev, man); -- cgit v1.2.3 From 235c3610d5f02ee91244239b43cd9ae8b4859dff Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 20 Jul 2021 13:13:55 -0500 Subject: drm/ttm: Force re-init if ttm_global_init() fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we have a failure, decrement the reference count so that the next call to ttm_global_init() will actually do something instead of assume everything is all set up. Signed-off-by: Jason Ekstrand Fixes: 62b53b37e4b1 ("drm/ttm: use a static ttm_bo_global instance") Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20210720181357.2760720-5-jason@jlekstrand.net Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_device.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/ttm') diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 5f31acec3ad7..519deea8e39b 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -100,6 +100,8 @@ static int ttm_global_init(void) debugfs_create_atomic_t("buffer_objects", 0444, ttm_debugfs_root, &glob->bo_count); out: + if (ret) + --ttm_glob_use_count; mutex_unlock(&ttm_global_mutex); return ret; } -- cgit v1.2.3 From 2dbd9c27eda5cf83aa990266a3355960d860da71 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Thu, 8 Jul 2021 14:25:18 +0300 Subject: drm/ttm: add missing NULL checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My local syzbot instance hit GPF in ttm_bo_release(). Unfortunately, syzbot didn't produce a reproducer for this, but I found out possible scenario: drm_gem_vram_create() <-- drm_gem_vram_object kzalloced (bo embedded in this object) ttm_bo_init() ttm_bo_init_reserved() ttm_resource_alloc() man->func->alloc() <-- allocation failure ttm_bo_put() ttm_bo_release() ttm_mem_io_free() <-- bo->resource == NULL passed as second argument *GPF* Added NULL check inside ttm_mem_io_free() to prevent reported GPF and make this function NULL save in future. Same problem was in ttm_bo_move_to_lru_tail() as Christian reported. ttm_bo_move_to_lru_tail() is called in ttm_bo_release() and mem pointer can be NULL as well as in ttm_mem_io_free(). Fail log: KASAN: null-ptr-deref in range [0x0000000000000020-0x0000000000000027] ... RIP: 0010:ttm_mem_io_free+0x28/0x170 drivers/gpu/drm/ttm/ttm_bo_util.c:66 .. Call Trace: ttm_bo_release+0xd94/0x10a0 drivers/gpu/drm/ttm/ttm_bo.c:422 kref_put include/linux/kref.h:65 [inline] ttm_bo_put drivers/gpu/drm/ttm/ttm_bo.c:470 [inline] ttm_bo_init_reserved+0x7cb/0x960 drivers/gpu/drm/ttm/ttm_bo.c:1050 ttm_bo_init+0x105/0x270 drivers/gpu/drm/ttm/ttm_bo.c:1074 drm_gem_vram_create+0x332/0x4c0 drivers/gpu/drm/drm_gem_vram_helper.c:228 Fixes: d3116756a710 ("drm/ttm: rename bo->mem and make it a pointer") Signed-off-by: Pavel Skripkin Reviewed-by: Christian König Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20210708112518.17271-1-paskripkin@gmail.com --- drivers/gpu/drm/ttm/ttm_bo.c | 3 +++ drivers/gpu/drm/ttm/ttm_bo_util.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'drivers/gpu/drm/ttm') diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 1b950b45cf4b..8d7fd65ccced 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -102,6 +102,9 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo, return; } + if (!mem) + return; + man = ttm_manager_type(bdev, mem->mem_type); list_move_tail(&bo->lru, &man->lru[bo->priority]); diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 2f57f824e6db..763fa6f4e07d 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -63,6 +63,9 @@ int ttm_mem_io_reserve(struct ttm_device *bdev, void ttm_mem_io_free(struct ttm_device *bdev, struct ttm_resource *mem) { + if (!mem) + return; + if (!mem->bus.offset && !mem->bus.addr) return; -- cgit v1.2.3 From 69de4421bb4c103ef42a32bafc596e23918c106f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 21 Jul 2021 10:23:57 -0500 Subject: drm/ttm: Initialize debugfs from ttm_global_init() We create a bunch of debugfs entries as a side-effect of ttm_global_init() and then never clean them up. This isn't usually a problem because we free the whole debugfs directory on module unload. However, if the global reference count ever goes to zero and then ttm_global_init() is called again, we'll re-create those debugfs entries and debugfs will complain in dmesg that we're creating entries that already exist. This patch fixes this problem by changing the lifetime of the whole TTM debugfs directory to match that of the TTM global state. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210721152358.2893314-6-jason@jlekstrand.net --- drivers/gpu/drm/ttm/ttm_device.c | 12 ++++++++++++ drivers/gpu/drm/ttm/ttm_module.c | 16 ---------------- 2 files changed, 12 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm/ttm') diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 519deea8e39b..74e3b460132b 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -44,6 +44,8 @@ static unsigned ttm_glob_use_count; struct ttm_global ttm_glob; EXPORT_SYMBOL(ttm_glob); +struct dentry *ttm_debugfs_root; + static void ttm_global_release(void) { struct ttm_global *glob = &ttm_glob; @@ -53,6 +55,7 @@ static void ttm_global_release(void) goto out; ttm_pool_mgr_fini(); + debugfs_remove(ttm_debugfs_root); __free_page(glob->dummy_read_page); memset(glob, 0, sizeof(*glob)); @@ -73,6 +76,13 @@ static int ttm_global_init(void) si_meminfo(&si); + ttm_debugfs_root = debugfs_create_dir("ttm", NULL); + if (IS_ERR(ttm_debugfs_root)) { + ret = PTR_ERR(ttm_debugfs_root); + ttm_debugfs_root = NULL; + goto out; + } + /* Limit the number of pages in the pool to about 50% of the total * system memory. */ @@ -100,6 +110,8 @@ static int ttm_global_init(void) debugfs_create_atomic_t("buffer_objects", 0444, ttm_debugfs_root, &glob->bo_count); out: + if (ret && ttm_debugfs_root) + debugfs_remove(ttm_debugfs_root); if (ret) --ttm_glob_use_count; mutex_unlock(&ttm_global_mutex); diff --git a/drivers/gpu/drm/ttm/ttm_module.c b/drivers/gpu/drm/ttm/ttm_module.c index 997c458f68a9..7fcdef278c74 100644 --- a/drivers/gpu/drm/ttm/ttm_module.c +++ b/drivers/gpu/drm/ttm/ttm_module.c @@ -72,22 +72,6 @@ pgprot_t ttm_prot_from_caching(enum ttm_caching caching, pgprot_t tmp) return tmp; } -struct dentry *ttm_debugfs_root; - -static int __init ttm_init(void) -{ - ttm_debugfs_root = debugfs_create_dir("ttm", NULL); - return 0; -} - -static void __exit ttm_exit(void) -{ - debugfs_remove(ttm_debugfs_root); -} - -module_init(ttm_init); -module_exit(ttm_exit); - MODULE_AUTHOR("Thomas Hellstrom, Jerome Glisse"); MODULE_DESCRIPTION("TTM memory manager subsystem (for DRM device)"); MODULE_LICENSE("GPL and additional rights"); -- cgit v1.2.3 From 958f44255058338f4b370d8e4100e1e7d72db0cc Mon Sep 17 00:00:00 2001 From: Dan Moulding Date: Tue, 10 Aug 2021 13:59:06 -0600 Subject: drm: ttm: Don't bail from ttm_global_init if debugfs_create_dir fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In 69de4421bb4c ("drm/ttm: Initialize debugfs from ttm_global_init()"), ttm_global_init was changed so that if creation of the debugfs global root directory fails, ttm_global_init will bail out early and return an error, leading to initialization failure of DRM drivers. However, not every system will be using debugfs. On such a system, debugfs directory creation can be expected to fail, but DRM drivers must still be usable. This changes it so that if creation of TTM's debugfs root directory fails, then no biggie: keep calm and carry on. Fixes: 69de4421bb4c ("drm/ttm: Initialize debugfs from ttm_global_init()") Signed-off-by: Dan Moulding Tested-by: Huacai Chen Reviewed-by: Huang Rui Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20210810195906.22220-2-dmoulding@me.com Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_device.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/gpu/drm/ttm') diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 74e3b460132b..2df59b3c2ea1 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -78,9 +78,7 @@ static int ttm_global_init(void) ttm_debugfs_root = debugfs_create_dir("ttm", NULL); if (IS_ERR(ttm_debugfs_root)) { - ret = PTR_ERR(ttm_debugfs_root); ttm_debugfs_root = NULL; - goto out; } /* Limit the number of pages in the pool to about 50% of the total -- cgit v1.2.3 From 80cbd8808f85017b8aff4b223db68926b470be12 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 12 Aug 2021 15:34:43 -0500 Subject: drm/ttm: Include pagemap.h from ttm_tt.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's needed for pgprot_t which is used in the header. Signed-off-by: Jason Ekstrand Cc: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20210812203443.1725307-2-jason@jlekstrand.net Reviewed-by: Christian König Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_tt.c | 1 - include/drm/ttm/ttm_tt.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/ttm') diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 24031a8acd2d..d5cd8b5dc0bf 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -32,7 +32,6 @@ #define pr_fmt(fmt) "[TTM] " fmt #include -#include #include #include #include diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h index 0d97967bf955..b20e89d321b0 100644 --- a/include/drm/ttm/ttm_tt.h +++ b/include/drm/ttm/ttm_tt.h @@ -27,6 +27,7 @@ #ifndef _TTM_TT_H_ #define _TTM_TT_H_ +#include #include #include #include -- cgit v1.2.3 From efcefc7127290e7e9fa98dea029163ad8eda8fb3 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Tue, 31 Aug 2021 09:15:36 +0200 Subject: drm/ttm: Fix ttm_bo_move_memcpy() for subclassed struct ttm_resource MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code was making a copy of a struct ttm_resource. However, recently the struct ttm_resources were allowed to be subclassed and also were allowed to be malloced, hence the driver could end up assuming the copy we handed it was subclassed and worse, the original could have been freed at this point. Fix this by using the original struct ttm_resource before it is potentially freed in ttm_bo_move_sync_cleanup() v2: Base on drm-misc-next-fixes rather than drm-tip. Reported-by: Ben Skeggs Reported-by: Dave Airlie Cc: Christian König Cc: Fixes: 3bf3710e3718 ("drm/ttm: Add a generic TTM memcpy move for page-based iomem") Signed-off-by: Thomas Hellström Reviewed-by: Christian König Reviewed-by: Ben Skeggs Link: https://patchwork.freedesktop.org/patch/msgid/20210831071536.80636-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/ttm/ttm_bo_util.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/ttm') diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 763fa6f4e07d..1c5ffe2935af 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -143,7 +143,6 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, struct ttm_resource *src_mem = bo->resource; struct ttm_resource_manager *src_man = ttm_manager_type(bdev, src_mem->mem_type); - struct ttm_resource src_copy = *src_mem; union { struct ttm_kmap_iter_tt tt; struct ttm_kmap_iter_linear_io io; @@ -173,11 +172,11 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, } ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); - src_copy = *src_mem; - ttm_bo_move_sync_cleanup(bo, dst_mem); if (!src_iter->ops->maps_tt) - ttm_kmap_iter_linear_io_fini(&_src_iter.io, bdev, &src_copy); + ttm_kmap_iter_linear_io_fini(&_src_iter.io, bdev, src_mem); + ttm_bo_move_sync_cleanup(bo, dst_mem); + out_src_iter: if (!dst_iter->ops->maps_tt) ttm_kmap_iter_linear_io_fini(&_dst_iter.io, bdev, dst_mem); -- cgit v1.2.3 From 70982eef4d7eebb47a3b1ef25ec1bc742f3a21cf Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Tue, 7 Sep 2021 12:08:32 +0800 Subject: drm/ttm: Fix a deadlock if the target BO is not idle during swap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ret value might be -EBUSY, caller will think lru lock is still locked but actually NOT. So return -ENOSPC instead. Otherwise we hit list corruption. ttm_bo_cleanup_refs might fail too if BO is not idle. If we return 0, caller(ttm_tt_populate -> ttm_global_swapout ->ttm_device_swapout) will be stuck as we actually did not free any BO memory. This usually happens when the fence is not signaled for a long time. Signed-off-by: xinhui pan Reviewed-by: Christian König Fixes: ebd59851c796 ("drm/ttm: move swapout logic around v3") Link: https://patchwork.freedesktop.org/patch/msgid/20210907040832.1107747-1-xinhui.pan@amd.com Signed-off-by: Christian König Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_bo.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/ttm') diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index ea4add2b9717..bb9e02c31946 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1160,9 +1160,9 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, } if (bo->deleted) { - ttm_bo_cleanup_refs(bo, false, false, locked); + ret = ttm_bo_cleanup_refs(bo, false, false, locked); ttm_bo_put(bo); - return 0; + return ret == -EBUSY ? -ENOSPC : ret; } ttm_bo_del_from_lru(bo); @@ -1216,7 +1216,7 @@ out: if (locked) dma_resv_unlock(bo->base.resv); ttm_bo_put(bo); - return ret; + return ret == -EBUSY ? -ENOSPC : ret; } void ttm_bo_tt_destroy(struct ttm_buffer_object *bo) -- cgit v1.2.3