From d3116756a710e3cd51293a9d58b525957ab7e784 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 12 Apr 2021 15:11:47 +0200 Subject: drm/ttm: rename bo->mem and make it a pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we want to decouble resource management from buffer management we need to be able to handle resources separately. Add a resource pointer and rename bo->mem so that all code needs to change to access the pointer instead. No functional change. Signed-off-by: Christian König Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210430092508.60710-4-christian.koenig@amd.com --- include/drm/ttm/ttm_bo_api.h | 3 ++- include/drm/ttm/ttm_bo_driver.h | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index f2a5f37c61b7..291a339a7e08 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -136,7 +136,8 @@ struct ttm_buffer_object { * Members protected by the bo::resv::reserved lock. */ - struct ttm_resource mem; + struct ttm_resource *resource; + struct ttm_resource _mem; struct ttm_tt *ttm; bool deleted; diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index dbccac957f8f..1a9ba0b13622 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -181,14 +181,14 @@ static inline void ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo) { spin_lock(&bo->bdev->lru_lock); - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); + ttm_bo_move_to_lru_tail(bo, bo->resource, NULL); spin_unlock(&bo->bdev->lru_lock); } static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo, struct ttm_resource *new_mem) { - bo->mem = *new_mem; + bo->_mem = *new_mem; new_mem->mm_node = NULL; } @@ -202,7 +202,7 @@ static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo, static inline void ttm_bo_move_null(struct ttm_buffer_object *bo, struct ttm_resource *new_mem) { - struct ttm_resource *old_mem = &bo->mem; + struct ttm_resource *old_mem = bo->resource; WARN_ON(old_mem->mm_node != NULL); ttm_bo_assign_mem(bo, new_mem); -- cgit v1.2.3 From 95b2151fec3e62ba0033c61bd388ff0111884972 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 28 May 2021 19:51:52 -0400 Subject: drm/sched: Fix inverted comment for hang_limit The hang_limit is the threshold after which the kernel no longer attempts to schedule a job. Its documentation stated the opposite due to a typo. Correct the wording to indicate the actual purpose of the field. Signed-off-by: Alyssa Rosenzweig Cc: David Airlie Cc: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210528235152.38447-1-alyssa.rosenzweig@collabora.com --- include/drm/gpu_scheduler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 10225a0a35d0..d18af49fd009 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -275,7 +275,7 @@ struct drm_sched_backend_ops { * @pending_list: the list of jobs which are currently in the job queue. * @job_list_lock: lock to protect the pending_list. * @hang_limit: once the hangs by a job crosses this limit then it is marked - * guilty and it will be considered for scheduling further. + * guilty and it will no longer be considered for scheduling. * @score: score to help loadbalancer pick a idle sched * @_score: score used when the driver doesn't provide one * @ready: marks if the underlying HW is ready to work -- cgit v1.2.3 From 0c6b522abc2a592468992780babd3c3629c7ceac Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 6 May 2021 14:16:01 +0200 Subject: dma-buf: cleanup dma-resv shared fence debugging a bit v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make that a function instead of inline. v2: improve the kerneldoc wording as suggested by Daniel Signed-off-by: Christian König Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210602111714.212426-3-christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 20 ++++++++++++++++++++ include/linux/dma-resv.h | 15 +++++++-------- 2 files changed, 27 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 87f5d82d992a..3964df438505 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -208,6 +208,26 @@ int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences) } EXPORT_SYMBOL(dma_resv_reserve_shared); +#ifdef CONFIG_DEBUG_MUTEXES +/** + * dma_resv_reset_shared_max - reset shared fences for debugging + * @obj: the dma_resv object to reset + * + * Reset the number of pre-reserved shared slots to test that drivers do + * correct slot allocation using dma_resv_reserve_shared(). See also + * &dma_resv_list.shared_max. + */ +void dma_resv_reset_shared_max(struct dma_resv *obj) +{ + /* Test shared fence slot reservation */ + if (rcu_access_pointer(obj->fence)) { + struct dma_resv_list *fence = dma_resv_get_list(obj); + + fence->shared_max = fence->shared_count; + } +} +#endif + /** * dma_resv_add_shared_fence - Add a fence to a shared slot * @obj: the reservation object diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index d44a77e8a7e3..f32a3d176513 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -92,6 +92,12 @@ static inline struct dma_resv_list *dma_resv_get_list(struct dma_resv *obj) dma_resv_held(obj)); } +#ifdef CONFIG_DEBUG_MUTEXES +void dma_resv_reset_shared_max(struct dma_resv *obj); +#else +static inline void dma_resv_reset_shared_max(struct dma_resv *obj) {} +#endif + /** * dma_resv_lock - lock the reservation object * @obj: the reservation object @@ -215,14 +221,7 @@ static inline struct ww_acquire_ctx *dma_resv_locking_ctx(struct dma_resv *obj) */ static inline void dma_resv_unlock(struct dma_resv *obj) { -#ifdef CONFIG_DEBUG_MUTEXES - /* Test shared fence slot reservation */ - if (rcu_access_pointer(obj->fence)) { - struct dma_resv_list *fence = dma_resv_get_list(obj); - - fence->shared_max = fence->shared_count; - } -#endif + dma_resv_reset_shared_max(obj); ww_mutex_unlock(&obj->lock); } -- cgit v1.2.3 From bfa3357ef9abc9d56a2910222d2deeb9f15c91ff Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 15 Apr 2021 09:52:58 +0200 Subject: drm/ttm: allocate resource object instead of embedding it v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To improve the handling we want the establish the resource object as base class for the backend allocations. v2: add missing error handling Signed-off-by: Christian König Acked-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20210602100914.46246-1-christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 54 +++++++++---------- drivers/gpu/drm/nouveau/nouveau_bo.c | 2 +- drivers/gpu/drm/radeon/radeon_ttm.c | 2 +- drivers/gpu/drm/ttm/ttm_bo.c | 83 ++++++++++-------------------- drivers/gpu/drm/ttm/ttm_bo_util.c | 43 ++++++++-------- drivers/gpu/drm/ttm/ttm_resource.c | 31 ++++++++--- drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c | 2 +- include/drm/ttm/ttm_bo_api.h | 1 - include/drm/ttm/ttm_bo_driver.h | 10 ++-- include/drm/ttm/ttm_resource.h | 4 +- 11 files changed, 110 insertions(+), 126 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 03c6b63d1d54..59723c3d5826 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -362,14 +362,14 @@ int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev, if (cpu_addr) amdgpu_bo_kunmap(*bo_ptr); - ttm_resource_free(&(*bo_ptr)->tbo, (*bo_ptr)->tbo.resource); + ttm_resource_free(&(*bo_ptr)->tbo, &(*bo_ptr)->tbo.resource); for (i = 0; i < (*bo_ptr)->placement.num_placement; ++i) { (*bo_ptr)->placements[i].fpfn = offset >> PAGE_SHIFT; (*bo_ptr)->placements[i].lpfn = (offset + size) >> PAGE_SHIFT; } r = ttm_bo_mem_space(&(*bo_ptr)->tbo, &(*bo_ptr)->placement, - (*bo_ptr)->tbo.resource, &ctx); + &(*bo_ptr)->tbo.resource, &ctx); if (r) goto error; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 663aa7d2e2ea..69db89261650 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -491,7 +491,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, return r; amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm); - ttm_resource_free(bo, bo->resource); + ttm_resource_free(bo, &bo->resource); ttm_bo_assign_mem(bo, new_mem); goto out; } @@ -950,9 +950,9 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct ttm_operation_ctx ctx = { false, false }; struct amdgpu_ttm_tt *gtt = (void *)bo->ttm; - struct ttm_resource tmp; struct ttm_placement placement; struct ttm_place placements; + struct ttm_resource *tmp; uint64_t addr, flags; int r; @@ -962,37 +962,37 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) addr = amdgpu_gmc_agp_addr(bo); if (addr != AMDGPU_BO_INVALID_OFFSET) { bo->resource->start = addr >> PAGE_SHIFT; - } else { + return 0; + } - /* allocate GART space */ - placement.num_placement = 1; - placement.placement = &placements; - placement.num_busy_placement = 1; - placement.busy_placement = &placements; - placements.fpfn = 0; - placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT; - placements.mem_type = TTM_PL_TT; - placements.flags = bo->resource->placement; - - r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx); - if (unlikely(r)) - return r; + /* allocate GART space */ + placement.num_placement = 1; + placement.placement = &placements; + placement.num_busy_placement = 1; + placement.busy_placement = &placements; + placements.fpfn = 0; + placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT; + placements.mem_type = TTM_PL_TT; + placements.flags = bo->resource->placement; - /* compute PTE flags for this buffer object */ - flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp); + r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx); + if (unlikely(r)) + return r; - /* Bind pages */ - gtt->offset = (u64)tmp.start << PAGE_SHIFT; - r = amdgpu_ttm_gart_bind(adev, bo, flags); - if (unlikely(r)) { - ttm_resource_free(bo, &tmp); - return r; - } + /* compute PTE flags for this buffer object */ + flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, tmp); - ttm_resource_free(bo, bo->resource); - ttm_bo_assign_mem(bo, &tmp); + /* Bind pages */ + gtt->offset = (u64)tmp->start << PAGE_SHIFT; + r = amdgpu_ttm_gart_bind(adev, bo, flags); + if (unlikely(r)) { + ttm_resource_free(bo, &tmp); + return r; } + ttm_resource_free(bo, &bo->resource); + ttm_bo_assign_mem(bo, tmp); + return 0; } diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index e688ca77483d..3a0d9b3bf991 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1009,7 +1009,7 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, if (old_reg->mem_type == TTM_PL_TT && new_reg->mem_type == TTM_PL_SYSTEM) { nouveau_ttm_tt_unbind(bo->bdev, bo->ttm); - ttm_resource_free(bo, bo->resource); + ttm_resource_free(bo, &bo->resource); ttm_bo_assign_mem(bo, new_reg); goto out; } diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 2507c1741681..cdffa9b65108 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -229,7 +229,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict, if (old_mem->mem_type == TTM_PL_TT && new_mem->mem_type == TTM_PL_SYSTEM) { radeon_ttm_tt_unbind(bo->bdev, bo->ttm); - ttm_resource_free(bo, bo->resource); + ttm_resource_free(bo, &bo->resource); ttm_bo_assign_mem(bo, new_mem); goto out; } diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 5a7ab4b35b2d..4ed56520b81d 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -223,7 +223,7 @@ static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo) bo->bdev->funcs->delete_mem_notify(bo); ttm_bo_tt_destroy(bo); - ttm_resource_free(bo, bo->resource); + ttm_resource_free(bo, &bo->resource); } static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) @@ -489,7 +489,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx) { struct ttm_device *bdev = bo->bdev; - struct ttm_resource evict_mem; + struct ttm_resource *evict_mem; struct ttm_placement placement; struct ttm_place hop; int ret = 0; @@ -519,7 +519,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, goto out; } - ret = ttm_bo_handle_move_mem(bo, &evict_mem, true, ctx, &hop); + ret = ttm_bo_handle_move_mem(bo, evict_mem, true, ctx, &hop); if (unlikely(ret)) { WARN(ret == -EMULTIHOP, "Unexpected multihop in eviction - likely driver bug\n"); if (ret != -ERESTARTSYS) @@ -728,14 +728,15 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo, */ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo, const struct ttm_place *place, - struct ttm_resource *mem, + struct ttm_resource **mem, struct ttm_operation_ctx *ctx) { struct ttm_device *bdev = bo->bdev; - struct ttm_resource_manager *man = ttm_manager_type(bdev, mem->mem_type); + struct ttm_resource_manager *man; struct ww_acquire_ctx *ticket; int ret; + man = ttm_manager_type(bdev, (*mem)->mem_type); ticket = dma_resv_locking_ctx(bo->base.resv); do { ret = ttm_resource_alloc(bo, place, mem); @@ -749,37 +750,7 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo, return ret; } while (1); - return ttm_bo_add_move_fence(bo, man, mem, ctx->no_wait_gpu); -} - -/** - * ttm_bo_mem_placement - check if placement is compatible - * @bo: BO to find memory for - * @place: where to search - * @mem: the memory object to fill in - * - * Check if placement is compatible and fill in mem structure. - * Returns -EBUSY if placement won't work or negative error code. - * 0 when placement can be used. - */ -static int ttm_bo_mem_placement(struct ttm_buffer_object *bo, - const struct ttm_place *place, - struct ttm_resource *mem) -{ - struct ttm_device *bdev = bo->bdev; - struct ttm_resource_manager *man; - - man = ttm_manager_type(bdev, place->mem_type); - if (!man || !ttm_resource_manager_used(man)) - return -EBUSY; - - mem->mem_type = place->mem_type; - mem->placement = place->flags; - - spin_lock(&bo->bdev->lru_lock); - ttm_bo_move_to_lru_tail(bo, mem, NULL); - spin_unlock(&bo->bdev->lru_lock); - return 0; + return ttm_bo_add_move_fence(bo, man, *mem, ctx->no_wait_gpu); } /* @@ -792,7 +763,7 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo, */ int ttm_bo_mem_space(struct ttm_buffer_object *bo, struct ttm_placement *placement, - struct ttm_resource *mem, + struct ttm_resource **mem, struct ttm_operation_ctx *ctx) { struct ttm_device *bdev = bo->bdev; @@ -807,8 +778,8 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, const struct ttm_place *place = &placement->placement[i]; struct ttm_resource_manager *man; - ret = ttm_bo_mem_placement(bo, place, mem); - if (ret) + man = ttm_manager_type(bdev, place->mem_type); + if (!man || !ttm_resource_manager_used(man)) continue; type_found = true; @@ -818,8 +789,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, if (unlikely(ret)) goto error; - man = ttm_manager_type(bdev, mem->mem_type); - ret = ttm_bo_add_move_fence(bo, man, mem, ctx->no_wait_gpu); + ret = ttm_bo_add_move_fence(bo, man, *mem, ctx->no_wait_gpu); if (unlikely(ret)) { ttm_resource_free(bo, mem); if (ret == -EBUSY) @@ -832,9 +802,10 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, for (i = 0; i < placement->num_busy_placement; ++i) { const struct ttm_place *place = &placement->busy_placement[i]; + struct ttm_resource_manager *man; - ret = ttm_bo_mem_placement(bo, place, mem); - if (ret) + man = ttm_manager_type(bdev, place->mem_type); + if (!man || !ttm_resource_manager_used(man)) continue; type_found = true; @@ -861,12 +832,12 @@ error: EXPORT_SYMBOL(ttm_bo_mem_space); static int ttm_bo_bounce_temp_buffer(struct ttm_buffer_object *bo, - struct ttm_resource *mem, + struct ttm_resource **mem, struct ttm_operation_ctx *ctx, struct ttm_place *hop) { struct ttm_placement hop_placement; - struct ttm_resource hop_mem; + struct ttm_resource *hop_mem; int ret; hop_placement.num_placement = hop_placement.num_busy_placement = 1; @@ -877,7 +848,7 @@ static int ttm_bo_bounce_temp_buffer(struct ttm_buffer_object *bo, if (ret) return ret; /* move to the bounce domain */ - ret = ttm_bo_handle_move_mem(bo, &hop_mem, false, ctx, NULL); + ret = ttm_bo_handle_move_mem(bo, hop_mem, false, ctx, NULL); if (ret) { ttm_resource_free(bo, &hop_mem); return ret; @@ -889,14 +860,12 @@ static int ttm_bo_move_buffer(struct ttm_buffer_object *bo, struct ttm_placement *placement, struct ttm_operation_ctx *ctx) { + struct ttm_resource *mem; struct ttm_place hop; - struct ttm_resource mem; int ret; dma_resv_assert_held(bo->base.resv); - memset(&hop, 0, sizeof(hop)); - /* * Determine where to move the buffer. * @@ -910,7 +879,7 @@ static int ttm_bo_move_buffer(struct ttm_buffer_object *bo, if (ret) return ret; bounce: - ret = ttm_bo_handle_move_mem(bo, &mem, false, ctx, &hop); + ret = ttm_bo_handle_move_mem(bo, mem, false, ctx, &hop); if (ret == -EMULTIHOP) { ret = ttm_bo_bounce_temp_buffer(bo, &mem, ctx, &hop); if (ret) @@ -1019,7 +988,7 @@ int ttm_bo_init_reserved(struct ttm_device *bdev, { static const struct ttm_place sys_mem = { .mem_type = TTM_PL_SYSTEM }; bool locked; - int ret = 0; + int ret; bo->destroy = destroy ? destroy : ttm_bo_default_destroy; @@ -1029,8 +998,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev, bo->bdev = bdev; bo->type = type; bo->page_alignment = page_alignment; - bo->resource = &bo->_mem; - ttm_resource_alloc(bo, &sys_mem, bo->resource); bo->moving = NULL; bo->pin_count = 0; bo->sg = sg; @@ -1042,6 +1009,12 @@ int ttm_bo_init_reserved(struct ttm_device *bdev, } atomic_inc(&ttm_glob.bo_count); + ret = ttm_resource_alloc(bo, &sys_mem, &bo->resource); + if (unlikely(ret)) { + ttm_bo_put(bo); + return ret; + } + /* * For ttm_bo_type_device buffers, allocate * address space from the device. @@ -1170,7 +1143,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, */ if (bo->resource->mem_type != TTM_PL_SYSTEM) { struct ttm_operation_ctx ctx = { false, false }; - struct ttm_resource evict_mem; + struct ttm_resource *evict_mem; struct ttm_place place, hop; memset(&place, 0, sizeof(place)); @@ -1182,7 +1155,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, if (unlikely(ret)) goto out; - ret = ttm_bo_handle_move_mem(bo, &evict_mem, true, &ctx, &hop); + ret = ttm_bo_handle_move_mem(bo, evict_mem, true, &ctx, &hop); if (unlikely(ret != 0)) { WARN(ret == -EMULTIHOP, "Unexpected multihop in swaput - likely driver bug.\n"); goto out; diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index aedf02a31c70..1b326e70cb02 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -176,16 +176,17 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, struct ttm_resource *new_mem) { + struct ttm_resource *old_mem = bo->resource; struct ttm_device *bdev = bo->bdev; - struct ttm_resource_manager *man = ttm_manager_type(bdev, new_mem->mem_type); + struct ttm_resource_manager *man; struct ttm_tt *ttm = bo->ttm; - struct ttm_resource *old_mem = bo->resource; - struct ttm_resource old_copy = *old_mem; void *old_iomap; void *new_iomap; int ret; unsigned long i; + man = ttm_manager_type(bdev, new_mem->mem_type); + ret = ttm_bo_wait_ctx(bo, ctx); if (ret) return ret; @@ -201,7 +202,7 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, * Single TTM move. NOP. */ if (old_iomap == NULL && new_iomap == NULL) - goto out2; + goto out1; /* * Don't move nonexistent data. Clear destination instead. @@ -210,7 +211,7 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, (ttm == NULL || (!ttm_tt_is_populated(ttm) && !(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)))) { memset_io(new_iomap, 0, new_mem->num_pages*PAGE_SIZE); - goto out2; + goto out1; } /* @@ -235,27 +236,25 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, ret = ttm_copy_io_page(new_iomap, old_iomap, i); } if (ret) - goto out1; + break; } mb(); -out2: - old_copy = *old_mem; +out1: + ttm_resource_iounmap(bdev, new_mem, new_iomap); +out: + ttm_resource_iounmap(bdev, old_mem, old_iomap); + + if (ret) { + ttm_resource_free(bo, &new_mem); + return ret; + } + ttm_resource_free(bo, &bo->resource); ttm_bo_assign_mem(bo, new_mem); if (!man->use_tt) ttm_bo_tt_destroy(bo); -out1: - ttm_resource_iounmap(bdev, old_mem, new_iomap); -out: - ttm_resource_iounmap(bdev, &old_copy, old_iomap); - - /* - * On error, keep the mm node! - */ - if (!ret) - ttm_resource_free(bo, &old_copy); return ret; } EXPORT_SYMBOL(ttm_bo_move_memcpy); @@ -566,7 +565,7 @@ static int ttm_bo_wait_free_node(struct ttm_buffer_object *bo, if (!dst_use_tt) ttm_bo_tt_destroy(bo); - ttm_resource_free(bo, bo->resource); + ttm_resource_free(bo, &bo->resource); return 0; } @@ -629,7 +628,7 @@ static void ttm_bo_move_pipeline_evict(struct ttm_buffer_object *bo, } spin_unlock(&from->move_lock); - ttm_resource_free(bo, bo->resource); + ttm_resource_free(bo, &bo->resource); dma_fence_put(bo->moving); bo->moving = dma_fence_get(fence); @@ -678,11 +677,11 @@ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo) if (ret) ttm_bo_wait(bo, false, false); - ttm_resource_alloc(bo, &sys_mem, bo->resource); + ret = ttm_resource_alloc(bo, &sys_mem, &bo->resource); bo->ttm = NULL; dma_resv_unlock(&ghost->base._resv); ttm_bo_put(ghost); - return 0; + return ret; } diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index 59e2b7157e41..65451e1bc303 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -27,10 +27,16 @@ int ttm_resource_alloc(struct ttm_buffer_object *bo, const struct ttm_place *place, - struct ttm_resource *res) + struct ttm_resource **res_ptr) { struct ttm_resource_manager *man = ttm_manager_type(bo->bdev, place->mem_type); + struct ttm_resource *res; + int r; + + res = kmalloc(sizeof(*res), GFP_KERNEL); + if (!res) + return -ENOMEM; res->mm_node = NULL; res->start = 0; @@ -41,18 +47,27 @@ int ttm_resource_alloc(struct ttm_buffer_object *bo, res->bus.offset = 0; res->bus.is_iomem = false; res->bus.caching = ttm_cached; + r = man->func->alloc(man, bo, place, res); + if (r) { + kfree(res); + return r; + } - return man->func->alloc(man, bo, place, res); + *res_ptr = res; + return 0; } -void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource *res) +void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource **res) { - struct ttm_resource_manager *man = - ttm_manager_type(bo->bdev, res->mem_type); + struct ttm_resource_manager *man; - man->func->free(man, res); - res->mm_node = NULL; - res->mem_type = TTM_PL_SYSTEM; + if (!*res) + return; + + man = ttm_manager_type(bo->bdev, (*res)->mem_type); + man->func->free(man, *res); + kfree(*res); + *res = NULL; } EXPORT_SYMBOL(ttm_resource_free); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c index ed8563ef9a3b..bfcf31bf7e37 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c @@ -741,7 +741,7 @@ static int vmw_move(struct ttm_buffer_object *bo, goto fail; vmw_ttm_unbind(bo->bdev, bo->ttm); - ttm_resource_free(bo, bo->resource); + ttm_resource_free(bo, &bo->resource); ttm_bo_assign_mem(bo, new_mem); return 0; } else { diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 291a339a7e08..f681bbdbc698 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -137,7 +137,6 @@ struct ttm_buffer_object { */ struct ttm_resource *resource; - struct ttm_resource _mem; struct ttm_tt *ttm; bool deleted; diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 1a9ba0b13622..ead0ef7136c8 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -96,7 +96,7 @@ struct ttm_lru_bulk_move { */ int ttm_bo_mem_space(struct ttm_buffer_object *bo, struct ttm_placement *placement, - struct ttm_resource *mem, + struct ttm_resource **mem, struct ttm_operation_ctx *ctx); /** @@ -188,8 +188,8 @@ ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo) static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo, struct ttm_resource *new_mem) { - bo->_mem = *new_mem; - new_mem->mm_node = NULL; + WARN_ON(bo->resource); + bo->resource = new_mem; } /** @@ -202,9 +202,7 @@ static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo, static inline void ttm_bo_move_null(struct ttm_buffer_object *bo, struct ttm_resource *new_mem) { - struct ttm_resource *old_mem = bo->resource; - - WARN_ON(old_mem->mm_node != NULL); + ttm_resource_free(bo, &bo->resource); ttm_bo_assign_mem(bo, new_mem); } diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index 890b9d369519..c17c1a52070d 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -225,8 +225,8 @@ ttm_resource_manager_cleanup(struct ttm_resource_manager *man) int ttm_resource_alloc(struct ttm_buffer_object *bo, const struct ttm_place *place, - struct ttm_resource *res); -void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource *res); + struct ttm_resource **res); +void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource **res); void ttm_resource_manager_init(struct ttm_resource_manager *man, unsigned long p_size); -- cgit v1.2.3 From 3eb7d96e94150304011d214750b45766cf62d9c9 Mon Sep 17 00:00:00 2001 From: Christian König Date: Sat, 17 Apr 2021 18:48:36 +0200 Subject: drm/ttm: flip over the range manager to self allocated nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Start with the range manager to make the resource object the base class for the allocated nodes. While at it cleanup a lot of the code around that. Signed-off-by: Christian König Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210602100914.46246-2-christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 1 + drivers/gpu/drm/drm_gem_vram_helper.c | 2 ++ drivers/gpu/drm/nouveau/nouveau_ttm.c | 2 ++ drivers/gpu/drm/qxl/qxl_ttm.c | 1 + drivers/gpu/drm/radeon/radeon_ttm.c | 1 + drivers/gpu/drm/ttm/ttm_range_manager.c | 56 ++++++++++++++++++++++++--------- drivers/gpu/drm/ttm/ttm_resource.c | 26 +++++++++------ include/drm/ttm/ttm_bo_driver.h | 26 --------------- include/drm/ttm/ttm_range_manager.h | 43 +++++++++++++++++++++++++ include/drm/ttm/ttm_resource.h | 3 ++ 10 files changed, 111 insertions(+), 50 deletions(-) create mode 100644 include/drm/ttm/ttm_range_manager.h (limited to 'include') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 69db89261650..df1f185faae9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -45,6 +45,7 @@ #include #include #include +#include #include diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c b/drivers/gpu/drm/drm_gem_vram_helper.c index 83e7258c7f90..17a4c5d47b6a 100644 --- a/drivers/gpu/drm/drm_gem_vram_helper.c +++ b/drivers/gpu/drm/drm_gem_vram_helper.c @@ -17,6 +17,8 @@ #include #include +#include + static const struct drm_gem_object_funcs drm_gem_vram_object_funcs; /** diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c index 65430912ff72..b08b8efeefba 100644 --- a/drivers/gpu/drm/nouveau/nouveau_ttm.c +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c @@ -26,6 +26,8 @@ #include #include +#include + #include "nouveau_drv.h" #include "nouveau_gem.h" #include "nouveau_mem.h" diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c index 8aa87b8edb9c..19fd39d9a00c 100644 --- a/drivers/gpu/drm/qxl/qxl_ttm.c +++ b/drivers/gpu/drm/qxl/qxl_ttm.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "qxl_drv.h" #include "qxl_object.h" diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index cdffa9b65108..ad2a5a791bba 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -45,6 +45,7 @@ #include #include #include +#include #include "radeon_reg.h" #include "radeon.h" diff --git a/drivers/gpu/drm/ttm/ttm_range_manager.c b/drivers/gpu/drm/ttm/ttm_range_manager.c index b9d5da6e6a81..ce5d07ca384c 100644 --- a/drivers/gpu/drm/ttm/ttm_range_manager.c +++ b/drivers/gpu/drm/ttm/ttm_range_manager.c @@ -29,12 +29,13 @@ * Authors: Thomas Hellstrom */ -#include +#include #include +#include +#include #include #include #include -#include /* * Currently we use a spinlock for the lock, but a mutex *may* be @@ -60,8 +61,8 @@ static int ttm_range_man_alloc(struct ttm_resource_manager *man, struct ttm_resource *mem) { struct ttm_range_manager *rman = to_range_manager(man); + struct ttm_range_mgr_node *node; struct drm_mm *mm = &rman->mm; - struct drm_mm_node *node; enum drm_mm_insert_mode mode; unsigned long lpfn; int ret; @@ -70,7 +71,7 @@ static int ttm_range_man_alloc(struct ttm_resource_manager *man, if (!lpfn) lpfn = man->size; - node = kzalloc(sizeof(*node), GFP_KERNEL); + node = kzalloc(struct_size(node, mm_nodes, 1), GFP_KERNEL); if (!node) return -ENOMEM; @@ -78,17 +79,19 @@ static int ttm_range_man_alloc(struct ttm_resource_manager *man, if (place->flags & TTM_PL_FLAG_TOPDOWN) mode = DRM_MM_INSERT_HIGH; + ttm_resource_init(bo, place, &node->base); + spin_lock(&rman->lock); - ret = drm_mm_insert_node_in_range(mm, node, mem->num_pages, - bo->page_alignment, 0, + ret = drm_mm_insert_node_in_range(mm, &node->mm_nodes[0], + mem->num_pages, bo->page_alignment, 0, place->fpfn, lpfn, mode); spin_unlock(&rman->lock); if (unlikely(ret)) { kfree(node); } else { - mem->mm_node = node; - mem->start = node->start; + mem->mm_node = &node->mm_nodes[0]; + mem->start = node->mm_nodes[0].start; } return ret; @@ -98,15 +101,19 @@ static void ttm_range_man_free(struct ttm_resource_manager *man, struct ttm_resource *mem) { struct ttm_range_manager *rman = to_range_manager(man); + struct ttm_range_mgr_node *node; - if (mem->mm_node) { - spin_lock(&rman->lock); - drm_mm_remove_node(mem->mm_node); - spin_unlock(&rman->lock); + if (!mem->mm_node) + return; - kfree(mem->mm_node); - mem->mm_node = NULL; - } + node = to_ttm_range_mgr_node(mem); + + spin_lock(&rman->lock); + drm_mm_remove_node(&node->mm_nodes[0]); + spin_unlock(&rman->lock); + + kfree(node); + mem->mm_node = NULL; } static void ttm_range_man_debug(struct ttm_resource_manager *man, @@ -125,6 +132,17 @@ static const struct ttm_resource_manager_func ttm_range_manager_func = { .debug = ttm_range_man_debug }; +/** + * ttm_range_man_init + * + * @bdev: ttm device + * @type: memory manager type + * @use_tt: if the memory manager uses tt + * @p_size: size of area to be managed in pages. + * + * Initialise a generic range manager for the selected memory type. + * The range manager is installed for this device in the type slot. + */ int ttm_range_man_init(struct ttm_device *bdev, unsigned type, bool use_tt, unsigned long p_size) @@ -152,6 +170,14 @@ int ttm_range_man_init(struct ttm_device *bdev, } EXPORT_SYMBOL(ttm_range_man_init); +/** + * ttm_range_man_fini + * + * @bdev: ttm device + * @type: memory manager type + * + * Remove the generic range manager from a slot and tear it down. + */ int ttm_range_man_fini(struct ttm_device *bdev, unsigned type) { diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index 65451e1bc303..2a51ace17614 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -25,6 +25,22 @@ #include #include +void ttm_resource_init(struct ttm_buffer_object *bo, + const struct ttm_place *place, + struct ttm_resource *res) +{ + res->mm_node = NULL; + res->start = 0; + res->num_pages = PFN_UP(bo->base.size); + res->mem_type = place->mem_type; + res->placement = place->flags; + res->bus.addr = NULL; + res->bus.offset = 0; + res->bus.is_iomem = false; + res->bus.caching = ttm_cached; +} +EXPORT_SYMBOL(ttm_resource_init); + int ttm_resource_alloc(struct ttm_buffer_object *bo, const struct ttm_place *place, struct ttm_resource **res_ptr) @@ -38,15 +54,7 @@ int ttm_resource_alloc(struct ttm_buffer_object *bo, if (!res) return -ENOMEM; - res->mm_node = NULL; - res->start = 0; - res->num_pages = PFN_UP(bo->base.size); - res->mem_type = place->mem_type; - res->placement = place->flags; - res->bus.addr = NULL; - res->bus.offset = 0; - res->bus.is_iomem = false; - res->bus.caching = ttm_cached; + ttm_resource_init(bo, place, res); r = man->func->alloc(man, bo, place, res); if (r) { kfree(res); diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index ead0ef7136c8..b266971c1974 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -304,30 +304,4 @@ int ttm_bo_tt_bind(struct ttm_buffer_object *bo, struct ttm_resource *mem); */ void ttm_bo_tt_destroy(struct ttm_buffer_object *bo); -/** - * ttm_range_man_init - * - * @bdev: ttm device - * @type: memory manager type - * @use_tt: if the memory manager uses tt - * @p_size: size of area to be managed in pages. - * - * Initialise a generic range manager for the selected memory type. - * The range manager is installed for this device in the type slot. - */ -int ttm_range_man_init(struct ttm_device *bdev, - unsigned type, bool use_tt, - unsigned long p_size); - -/** - * ttm_range_man_fini - * - * @bdev: ttm device - * @type: memory manager type - * - * Remove the generic range manager from a slot and tear it down. - */ -int ttm_range_man_fini(struct ttm_device *bdev, - unsigned type); - #endif diff --git a/include/drm/ttm/ttm_range_manager.h b/include/drm/ttm/ttm_range_manager.h new file mode 100644 index 000000000000..983f452ce54b --- /dev/null +++ b/include/drm/ttm/ttm_range_manager.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ + +#ifndef _TTM_RANGE_MANAGER_H_ +#define _TTM_RANGE_MANAGER_H_ + +#include +#include + +/** + * struct ttm_range_mgr_node + * + * @base: base clase we extend + * @mm_nodes: MM nodes, usually 1 + * + * Extending the ttm_resource object to manage an address space allocation with + * one or more drm_mm_nodes. + */ +struct ttm_range_mgr_node { + struct ttm_resource base; + struct drm_mm_node mm_nodes[]; +}; + +/** + * to_ttm_range_mgr_node + * + * @res: the resource to upcast + * + * Upcast the ttm_resource object into a ttm_range_mgr_node object. + */ +static inline struct ttm_range_mgr_node * +to_ttm_range_mgr_node(struct ttm_resource *res) +{ + return container_of(res->mm_node, struct ttm_range_mgr_node, + mm_nodes[0]); +} + +int ttm_range_man_init(struct ttm_device *bdev, + unsigned type, bool use_tt, + unsigned long p_size); +int ttm_range_man_fini(struct ttm_device *bdev, + unsigned type); + +#endif diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index c17c1a52070d..803e4875d779 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -223,6 +223,9 @@ ttm_resource_manager_cleanup(struct ttm_resource_manager *man) man->move = NULL; } +void ttm_resource_init(struct ttm_buffer_object *bo, + const struct ttm_place *place, + struct ttm_resource *res); int ttm_resource_alloc(struct ttm_buffer_object *bo, const struct ttm_place *place, struct ttm_resource **res); -- cgit v1.2.3 From cb1c81467af355829a4a9d8fa3f92ffab355d93c Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 30 Apr 2021 09:48:27 +0200 Subject: drm/ttm: flip the switch for driver allocated resources v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of both driver and TTM allocating memory finalize embedding the ttm_resource object as base into the driver backends. v2: fix typo in vmwgfx grid mgr and double init in amdgpu_vram_mgr.c Signed-off-by: Christian König Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210602100914.46246-10-christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 44 ++++++++----------- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h | 5 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 60 ++++++++++++-------------- drivers/gpu/drm/drm_gem_vram_helper.c | 3 +- drivers/gpu/drm/nouveau/nouveau_bo.c | 8 +--- drivers/gpu/drm/nouveau/nouveau_mem.c | 11 +++-- drivers/gpu/drm/nouveau/nouveau_mem.h | 14 +++--- drivers/gpu/drm/nouveau/nouveau_ttm.c | 32 +++++++------- drivers/gpu/drm/ttm/ttm_range_manager.c | 23 ++++------ drivers/gpu/drm/ttm/ttm_resource.c | 18 +------- drivers/gpu/drm/ttm/ttm_sys_manager.c | 12 +++--- drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c | 24 +++++------ drivers/gpu/drm/vmwgfx/vmwgfx_thp.c | 27 ++++++------ include/drm/ttm/ttm_range_manager.h | 3 +- include/drm/ttm/ttm_resource.h | 43 ++++++++---------- 16 files changed, 140 insertions(+), 189 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 29113f72bc39..194f9eecf89c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -40,8 +40,7 @@ to_gtt_mgr(struct ttm_resource_manager *man) static inline struct amdgpu_gtt_node * to_amdgpu_gtt_node(struct ttm_resource *res) { - return container_of(res->mm_node, struct amdgpu_gtt_node, - base.mm_nodes[0]); + return container_of(res, struct amdgpu_gtt_node, base.base); } /** @@ -102,13 +101,13 @@ const struct attribute_group amdgpu_gtt_mgr_attr_group = { /** * amdgpu_gtt_mgr_has_gart_addr - Check if mem has address space * - * @mem: the mem object to check + * @res: the mem object to check * * Check if a mem object has already address space allocated. */ -bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem) +bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *res) { - struct amdgpu_gtt_node *node = to_amdgpu_gtt_node(mem); + struct amdgpu_gtt_node *node = to_amdgpu_gtt_node(res); return drm_mm_node_allocated(&node->base.mm_nodes[0]); } @@ -126,19 +125,20 @@ bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem) static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man, struct ttm_buffer_object *tbo, const struct ttm_place *place, - struct ttm_resource *mem) + struct ttm_resource **res) { struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man); + uint32_t num_pages = PFN_UP(tbo->base.size); struct amdgpu_gtt_node *node; int r; spin_lock(&mgr->lock); - if ((tbo->resource == mem || tbo->resource->mem_type != TTM_PL_TT) && - atomic64_read(&mgr->available) < mem->num_pages) { + if (tbo->resource && tbo->resource->mem_type != TTM_PL_TT && + atomic64_read(&mgr->available) < num_pages) { spin_unlock(&mgr->lock); return -ENOSPC; } - atomic64_sub(mem->num_pages, &mgr->available); + atomic64_sub(num_pages, &mgr->available); spin_unlock(&mgr->lock); node = kzalloc(struct_size(node, base.mm_nodes, 1), GFP_KERNEL); @@ -154,29 +154,28 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man, spin_lock(&mgr->lock); r = drm_mm_insert_node_in_range(&mgr->mm, &node->base.mm_nodes[0], - mem->num_pages, - tbo->page_alignment, 0, - place->fpfn, place->lpfn, + num_pages, tbo->page_alignment, + 0, place->fpfn, place->lpfn, DRM_MM_INSERT_BEST); spin_unlock(&mgr->lock); if (unlikely(r)) goto err_free; - mem->start = node->base.mm_nodes[0].start; + node->base.base.start = node->base.mm_nodes[0].start; } else { node->base.mm_nodes[0].start = 0; - node->base.mm_nodes[0].size = mem->num_pages; - mem->start = AMDGPU_BO_INVALID_OFFSET; + node->base.mm_nodes[0].size = node->base.base.num_pages; + node->base.base.start = AMDGPU_BO_INVALID_OFFSET; } - mem->mm_node = &node->base.mm_nodes[0]; + *res = &node->base.base; return 0; err_free: kfree(node); err_out: - atomic64_add(mem->num_pages, &mgr->available); + atomic64_add(num_pages, &mgr->available); return r; } @@ -190,21 +189,16 @@ err_out: * Free the allocated GTT again. */ static void amdgpu_gtt_mgr_del(struct ttm_resource_manager *man, - struct ttm_resource *mem) + struct ttm_resource *res) { + struct amdgpu_gtt_node *node = to_amdgpu_gtt_node(res); struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man); - struct amdgpu_gtt_node *node; - - if (!mem->mm_node) - return; - - node = to_amdgpu_gtt_node(mem); spin_lock(&mgr->lock); if (drm_mm_node_allocated(&node->base.mm_nodes[0])) drm_mm_remove_node(&node->base.mm_nodes[0]); spin_unlock(&mgr->lock); - atomic64_add(mem->num_pages, &mgr->available); + atomic64_add(res->num_pages, &mgr->available); kfree(node); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 59723c3d5826..19c1384a133f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1296,7 +1296,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) if (bo->base.resv == &bo->base._resv) amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo); - if (bo->resource->mem_type != TTM_PL_VRAM || !bo->resource->mm_node || + if (bo->resource->mem_type != TTM_PL_VRAM || !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h index 40f2adf305bc..59e0fefb15aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h @@ -28,6 +28,7 @@ #include #include +#include /* state back for walking over vram_mgr and gtt_mgr allocations */ struct amdgpu_res_cursor { @@ -53,7 +54,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res, { struct drm_mm_node *node; - if (!res || !res->mm_node) { + if (!res) { cur->start = start; cur->size = size; cur->remaining = size; @@ -63,7 +64,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res, BUG_ON(start + size > res->num_pages << PAGE_SHIFT); - node = res->mm_node; + node = to_ttm_range_mgr_node(res)->mm_nodes; while (start >= node->size << PAGE_SHIFT) start -= node++->size << PAGE_SHIFT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 5ebfaed37e47..9a6df02477ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -219,19 +219,20 @@ static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev, u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - struct ttm_resource *mem = bo->tbo.resource; - struct drm_mm_node *nodes = mem->mm_node; - unsigned pages = mem->num_pages; + struct ttm_resource *res = bo->tbo.resource; + unsigned pages = res->num_pages; + struct drm_mm_node *mm; u64 usage; if (amdgpu_gmc_vram_full_visible(&adev->gmc)) return amdgpu_bo_size(bo); - if (mem->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT) + if (res->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT) return 0; - for (usage = 0; nodes && pages; pages -= nodes->size, nodes++) - usage += amdgpu_vram_mgr_vis_size(adev, nodes); + mm = &container_of(res, struct ttm_range_mgr_node, base)->mm_nodes[0]; + for (usage = 0; pages; pages -= mm->size, mm++) + usage += amdgpu_vram_mgr_vis_size(adev, mm); return usage; } @@ -367,7 +368,7 @@ static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem, static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, struct ttm_buffer_object *tbo, const struct ttm_place *place, - struct ttm_resource *mem) + struct ttm_resource **res) { unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages; struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); @@ -388,7 +389,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, max_bytes -= AMDGPU_VM_RESERVED_VRAM; /* bail out quickly if there's likely not enough VRAM for this BO */ - mem_bytes = (u64)mem->num_pages << PAGE_SHIFT; + mem_bytes = tbo->base.size; if (atomic64_add_return(mem_bytes, &mgr->usage) > max_bytes) { r = -ENOSPC; goto error_sub; @@ -406,7 +407,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, #endif pages_per_node = max_t(uint32_t, pages_per_node, tbo->page_alignment); - num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node); + num_nodes = DIV_ROUND_UP(PFN_UP(mem_bytes), pages_per_node); } node = kvmalloc(struct_size(node, mm_nodes, num_nodes), @@ -422,8 +423,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, if (place->flags & TTM_PL_FLAG_TOPDOWN) mode = DRM_MM_INSERT_HIGH; - mem->start = 0; - pages_left = mem->num_pages; + pages_left = node->base.num_pages; /* Limit maximum size to 2GB due to SG table limitations */ pages = min(pages_left, 2UL << (30 - PAGE_SHIFT)); @@ -451,7 +451,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, } vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]); - amdgpu_vram_mgr_virt_start(mem, &node->mm_nodes[i]); + amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]); pages_left -= pages; ++i; @@ -461,10 +461,10 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, spin_unlock(&mgr->lock); if (i == 1) - mem->placement |= TTM_PL_FLAG_CONTIGUOUS; + node->base.placement |= TTM_PL_FLAG_CONTIGUOUS; atomic64_add(vis_usage, &mgr->vis_usage); - mem->mm_node = &node->mm_nodes[0]; + *res = &node->base; return 0; error_free: @@ -487,28 +487,22 @@ error_sub: * Free the allocated VRAM again. */ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, - struct ttm_resource *mem) + struct ttm_resource *res) { + struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res); struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); - struct ttm_range_mgr_node *node; uint64_t usage = 0, vis_usage = 0; - unsigned pages = mem->num_pages; - struct drm_mm_node *nodes; - - if (!mem->mm_node) - return; - - node = to_ttm_range_mgr_node(mem); - nodes = &node->mm_nodes[0]; + unsigned i, pages; spin_lock(&mgr->lock); - while (pages) { - pages -= nodes->size; - drm_mm_remove_node(nodes); - usage += nodes->size << PAGE_SHIFT; - vis_usage += amdgpu_vram_mgr_vis_size(adev, nodes); - ++nodes; + for (i = 0, pages = res->num_pages; pages; + pages -= node->mm_nodes[i].size, ++i) { + struct drm_mm_node *mm = &node->mm_nodes[i]; + + drm_mm_remove_node(mm); + usage += mm->size << PAGE_SHIFT; + vis_usage += amdgpu_vram_mgr_vis_size(adev, mm); } amdgpu_vram_mgr_do_reserve(man); spin_unlock(&mgr->lock); @@ -533,7 +527,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, * Allocate and fill a sg table from a VRAM allocation. */ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, - struct ttm_resource *mem, + struct ttm_resource *res, u64 offset, u64 length, struct device *dev, enum dma_data_direction dir, @@ -549,7 +543,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, return -ENOMEM; /* Determine the number of DRM_MM nodes to export */ - amdgpu_res_first(mem, offset, length, &cursor); + amdgpu_res_first(res, offset, length, &cursor); while (cursor.remaining) { num_entries++; amdgpu_res_next(&cursor, cursor.size); @@ -569,7 +563,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, * and the number of bytes from it. Access the following * DRM_MM node(s) if more buffer needs to exported */ - amdgpu_res_first(mem, offset, length, &cursor); + amdgpu_res_first(res, offset, length, &cursor); for_each_sgtable_sg((*sgt), sg, i) { phys_addr_t phys = cursor.start + adev->gmc.aper_base; size_t size = cursor.size; diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c b/drivers/gpu/drm/drm_gem_vram_helper.c index 17a4c5d47b6a..2a1229b8364e 100644 --- a/drivers/gpu/drm/drm_gem_vram_helper.c +++ b/drivers/gpu/drm/drm_gem_vram_helper.c @@ -250,7 +250,8 @@ EXPORT_SYMBOL(drm_gem_vram_put); static u64 drm_gem_vram_pg_offset(struct drm_gem_vram_object *gbo) { /* Keep TTM behavior for now, remove when drivers are audited */ - if (WARN_ON_ONCE(!gbo->bo.resource->mm_node)) + if (WARN_ON_ONCE(!gbo->bo.resource || + gbo->bo.resource->mem_type == TTM_PL_SYSTEM)) return 0; return gbo->bo.resource->start; diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 3a0d9b3bf991..c3d20bc80022 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -918,12 +918,8 @@ static void nouveau_bo_move_ntfy(struct ttm_buffer_object *bo, } } - if (new_reg) { - if (new_reg->mm_node) - nvbo->offset = (new_reg->start << PAGE_SHIFT); - else - nvbo->offset = 0; - } + if (new_reg) + nvbo->offset = (new_reg->start << PAGE_SHIFT); } diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c index a1049e9feee1..0de6549fb875 100644 --- a/drivers/gpu/drm/nouveau/nouveau_mem.c +++ b/drivers/gpu/drm/nouveau/nouveau_mem.c @@ -178,25 +178,24 @@ void nouveau_mem_del(struct ttm_resource *reg) { struct nouveau_mem *mem = nouveau_mem(reg); - if (!mem) - return; + nouveau_mem_fini(mem); - kfree(reg->mm_node); - reg->mm_node = NULL; + kfree(mem); } int nouveau_mem_new(struct nouveau_cli *cli, u8 kind, u8 comp, - struct ttm_resource *reg) + struct ttm_resource **res) { struct nouveau_mem *mem; if (!(mem = kzalloc(sizeof(*mem), GFP_KERNEL))) return -ENOMEM; + mem->cli = cli; mem->kind = kind; mem->comp = comp; - reg->mm_node = mem; + *res = &mem->base; return 0; } diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.h b/drivers/gpu/drm/nouveau/nouveau_mem.h index 3a6a1be2ed52..2c01166a90f2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_mem.h +++ b/drivers/gpu/drm/nouveau/nouveau_mem.h @@ -6,12 +6,6 @@ struct ttm_tt; #include #include -static inline struct nouveau_mem * -nouveau_mem(struct ttm_resource *reg) -{ - return reg->mm_node; -} - struct nouveau_mem { struct ttm_resource base; struct nouveau_cli *cli; @@ -21,8 +15,14 @@ struct nouveau_mem { struct nvif_vma vma[2]; }; +static inline struct nouveau_mem * +nouveau_mem(struct ttm_resource *reg) +{ + return container_of(reg, struct nouveau_mem, base); +} + int nouveau_mem_new(struct nouveau_cli *, u8 kind, u8 comp, - struct ttm_resource *); + struct ttm_resource **); void nouveau_mem_del(struct ttm_resource *); int nouveau_mem_vram(struct ttm_resource *, bool contig, u8 page); int nouveau_mem_host(struct ttm_resource *, struct ttm_tt *); diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c index 1ac2417effc0..f4c2e46b6fe1 100644 --- a/drivers/gpu/drm/nouveau/nouveau_ttm.c +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c @@ -45,7 +45,7 @@ static int nouveau_vram_manager_new(struct ttm_resource_manager *man, struct ttm_buffer_object *bo, const struct ttm_place *place, - struct ttm_resource *reg) + struct ttm_resource **res) { struct nouveau_bo *nvbo = nouveau_bo(bo); struct nouveau_drm *drm = nouveau_bdev(bo->bdev); @@ -54,15 +54,15 @@ nouveau_vram_manager_new(struct ttm_resource_manager *man, if (drm->client.device.info.ram_size == 0) return -ENOMEM; - ret = nouveau_mem_new(&drm->master, nvbo->kind, nvbo->comp, reg); + ret = nouveau_mem_new(&drm->master, nvbo->kind, nvbo->comp, res); if (ret) return ret; - ttm_resource_init(bo, place, reg->mm_node); + ttm_resource_init(bo, place, *res); - ret = nouveau_mem_vram(reg, nvbo->contig, nvbo->page); + ret = nouveau_mem_vram(*res, nvbo->contig, nvbo->page); if (ret) { - nouveau_mem_del(reg); + nouveau_mem_del(*res); return ret; } @@ -78,18 +78,18 @@ static int nouveau_gart_manager_new(struct ttm_resource_manager *man, struct ttm_buffer_object *bo, const struct ttm_place *place, - struct ttm_resource *reg) + struct ttm_resource **res) { struct nouveau_bo *nvbo = nouveau_bo(bo); struct nouveau_drm *drm = nouveau_bdev(bo->bdev); int ret; - ret = nouveau_mem_new(&drm->master, nvbo->kind, nvbo->comp, reg); + ret = nouveau_mem_new(&drm->master, nvbo->kind, nvbo->comp, res); if (ret) return ret; - ttm_resource_init(bo, place, reg->mm_node); - reg->start = 0; + ttm_resource_init(bo, place, *res); + (*res)->start = 0; return 0; } @@ -102,27 +102,27 @@ static int nv04_gart_manager_new(struct ttm_resource_manager *man, struct ttm_buffer_object *bo, const struct ttm_place *place, - struct ttm_resource *reg) + struct ttm_resource **res) { struct nouveau_bo *nvbo = nouveau_bo(bo); struct nouveau_drm *drm = nouveau_bdev(bo->bdev); struct nouveau_mem *mem; int ret; - ret = nouveau_mem_new(&drm->master, nvbo->kind, nvbo->comp, reg); - mem = nouveau_mem(reg); + ret = nouveau_mem_new(&drm->master, nvbo->kind, nvbo->comp, res); if (ret) return ret; - ttm_resource_init(bo, place, reg->mm_node); + mem = nouveau_mem(*res); + ttm_resource_init(bo, place, *res); ret = nvif_vmm_get(&mem->cli->vmm.vmm, PTES, false, 12, 0, - (long)reg->num_pages << PAGE_SHIFT, &mem->vma[0]); + (long)(*res)->num_pages << PAGE_SHIFT, &mem->vma[0]); if (ret) { - nouveau_mem_del(reg); + nouveau_mem_del(*res); return ret; } - reg->start = mem->vma[0].addr >> PAGE_SHIFT; + (*res)->start = mem->vma[0].addr >> PAGE_SHIFT; return 0; } diff --git a/drivers/gpu/drm/ttm/ttm_range_manager.c b/drivers/gpu/drm/ttm/ttm_range_manager.c index ce5d07ca384c..c32e1aee2481 100644 --- a/drivers/gpu/drm/ttm/ttm_range_manager.c +++ b/drivers/gpu/drm/ttm/ttm_range_manager.c @@ -58,7 +58,7 @@ to_range_manager(struct ttm_resource_manager *man) static int ttm_range_man_alloc(struct ttm_resource_manager *man, struct ttm_buffer_object *bo, const struct ttm_place *place, - struct ttm_resource *mem) + struct ttm_resource **res) { struct ttm_range_manager *rman = to_range_manager(man); struct ttm_range_mgr_node *node; @@ -83,37 +83,30 @@ static int ttm_range_man_alloc(struct ttm_resource_manager *man, spin_lock(&rman->lock); ret = drm_mm_insert_node_in_range(mm, &node->mm_nodes[0], - mem->num_pages, bo->page_alignment, 0, + node->base.num_pages, + bo->page_alignment, 0, place->fpfn, lpfn, mode); spin_unlock(&rman->lock); - if (unlikely(ret)) { + if (unlikely(ret)) kfree(node); - } else { - mem->mm_node = &node->mm_nodes[0]; - mem->start = node->mm_nodes[0].start; - } + else + node->base.start = node->mm_nodes[0].start; return ret; } static void ttm_range_man_free(struct ttm_resource_manager *man, - struct ttm_resource *mem) + struct ttm_resource *res) { + struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res); struct ttm_range_manager *rman = to_range_manager(man); - struct ttm_range_mgr_node *node; - - if (!mem->mm_node) - return; - - node = to_ttm_range_mgr_node(mem); spin_lock(&rman->lock); drm_mm_remove_node(&node->mm_nodes[0]); spin_unlock(&rman->lock); kfree(node); - mem->mm_node = NULL; } static void ttm_range_man_debug(struct ttm_resource_manager *man, diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index 2a51ace17614..2a68145572cc 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -29,7 +29,6 @@ void ttm_resource_init(struct ttm_buffer_object *bo, const struct ttm_place *place, struct ttm_resource *res) { - res->mm_node = NULL; res->start = 0; res->num_pages = PFN_UP(bo->base.size); res->mem_type = place->mem_type; @@ -47,22 +46,8 @@ int ttm_resource_alloc(struct ttm_buffer_object *bo, { struct ttm_resource_manager *man = ttm_manager_type(bo->bdev, place->mem_type); - struct ttm_resource *res; - int r; - - res = kmalloc(sizeof(*res), GFP_KERNEL); - if (!res) - return -ENOMEM; - - ttm_resource_init(bo, place, res); - r = man->func->alloc(man, bo, place, res); - if (r) { - kfree(res); - return r; - } - *res_ptr = res; - return 0; + return man->func->alloc(man, bo, place, res_ptr); } void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource **res) @@ -74,7 +59,6 @@ void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource **res) man = ttm_manager_type(bo->bdev, (*res)->mem_type); man->func->free(man, *res); - kfree(*res); *res = NULL; } EXPORT_SYMBOL(ttm_resource_free); diff --git a/drivers/gpu/drm/ttm/ttm_sys_manager.c b/drivers/gpu/drm/ttm/ttm_sys_manager.c index 2b75f493c3c9..63aca52f75e1 100644 --- a/drivers/gpu/drm/ttm/ttm_sys_manager.c +++ b/drivers/gpu/drm/ttm/ttm_sys_manager.c @@ -10,20 +10,20 @@ static int ttm_sys_man_alloc(struct ttm_resource_manager *man, struct ttm_buffer_object *bo, const struct ttm_place *place, - struct ttm_resource *mem) + struct ttm_resource **res) { - mem->mm_node = kzalloc(sizeof(*mem), GFP_KERNEL); - if (!mem->mm_node) + *res = kzalloc(sizeof(**res), GFP_KERNEL); + if (!*res) return -ENOMEM; - ttm_resource_init(bo, place, mem->mm_node); + ttm_resource_init(bo, place, *res); return 0; } static void ttm_sys_man_free(struct ttm_resource_manager *man, - struct ttm_resource *mem) + struct ttm_resource *res) { - kfree(mem->mm_node); + kfree(res); } static const struct ttm_resource_manager_func ttm_sys_manager_func = { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c index 82a5e6489810..28ceb749a733 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c @@ -52,16 +52,16 @@ static struct vmwgfx_gmrid_man *to_gmrid_manager(struct ttm_resource_manager *ma static int vmw_gmrid_man_get_node(struct ttm_resource_manager *man, struct ttm_buffer_object *bo, const struct ttm_place *place, - struct ttm_resource *mem) + struct ttm_resource **res) { struct vmwgfx_gmrid_man *gman = to_gmrid_manager(man); int id; - mem->mm_node = kmalloc(sizeof(*mem), GFP_KERNEL); - if (!mem->mm_node) + *res = kmalloc(sizeof(**res), GFP_KERNEL); + if (!*res) return -ENOMEM; - ttm_resource_init(bo, place, mem->mm_node); + ttm_resource_init(bo, place, *res); id = ida_alloc_max(&gman->gmr_ida, gman->max_gmr_ids - 1, GFP_KERNEL); if (id < 0) @@ -70,34 +70,34 @@ static int vmw_gmrid_man_get_node(struct ttm_resource_manager *man, spin_lock(&gman->lock); if (gman->max_gmr_pages > 0) { - gman->used_gmr_pages += mem->num_pages; + gman->used_gmr_pages += (*res)->num_pages; if (unlikely(gman->used_gmr_pages > gman->max_gmr_pages)) goto nospace; } - mem->mm_node = gman; - mem->start = id; + (*res)->start = id; spin_unlock(&gman->lock); return 0; nospace: - gman->used_gmr_pages -= mem->num_pages; + gman->used_gmr_pages -= (*res)->num_pages; spin_unlock(&gman->lock); ida_free(&gman->gmr_ida, id); + kfree(*res); return -ENOSPC; } static void vmw_gmrid_man_put_node(struct ttm_resource_manager *man, - struct ttm_resource *mem) + struct ttm_resource *res) { struct vmwgfx_gmrid_man *gman = to_gmrid_manager(man); - ida_free(&gman->gmr_ida, mem->start); + ida_free(&gman->gmr_ida, res->start); spin_lock(&gman->lock); - gman->used_gmr_pages -= mem->num_pages; + gman->used_gmr_pages -= res->num_pages; spin_unlock(&gman->lock); - kfree(mem->mm_node); + kfree(res); } static const struct ttm_resource_manager_func vmw_gmrid_manager_func; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c b/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c index 8765835696ac..2a3d3468e4e0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c @@ -51,7 +51,7 @@ static int vmw_thp_insert_aligned(struct ttm_buffer_object *bo, static int vmw_thp_get_node(struct ttm_resource_manager *man, struct ttm_buffer_object *bo, const struct ttm_place *place, - struct ttm_resource *mem) + struct ttm_resource **res) { struct vmw_thp_manager *rman = to_thp_manager(man); struct drm_mm *mm = &rman->mm; @@ -78,26 +78,27 @@ static int vmw_thp_get_node(struct ttm_resource_manager *man, spin_lock(&rman->lock); if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) { align_pages = (HPAGE_PUD_SIZE >> PAGE_SHIFT); - if (mem->num_pages >= align_pages) { + if (node->base.num_pages >= align_pages) { ret = vmw_thp_insert_aligned(bo, mm, &node->mm_nodes[0], - align_pages, place, mem, - lpfn, mode); + align_pages, place, + &node->base, lpfn, mode); if (!ret) goto found_unlock; } } align_pages = (HPAGE_PMD_SIZE >> PAGE_SHIFT); - if (mem->num_pages >= align_pages) { + if (node->base.num_pages >= align_pages) { ret = vmw_thp_insert_aligned(bo, mm, &node->mm_nodes[0], - align_pages, place, mem, lpfn, - mode); + align_pages, place, &node->base, + lpfn, mode); if (!ret) goto found_unlock; } ret = drm_mm_insert_node_in_range(mm, &node->mm_nodes[0], - mem->num_pages, bo->page_alignment, 0, + node->base.num_pages, + bo->page_alignment, 0, place->fpfn, lpfn, mode); found_unlock: spin_unlock(&rman->lock); @@ -105,20 +106,18 @@ found_unlock: if (unlikely(ret)) { kfree(node); } else { - mem->mm_node = &node->mm_nodes[0]; - mem->start = node->mm_nodes[0].start; + node->base.start = node->mm_nodes[0].start; + *res = &node->base; } return ret; } - - static void vmw_thp_put_node(struct ttm_resource_manager *man, - struct ttm_resource *mem) + struct ttm_resource *res) { + struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res); struct vmw_thp_manager *rman = to_thp_manager(man); - struct ttm_range_mgr_node * node = mem->mm_node; spin_lock(&rman->lock); drm_mm_remove_node(&node->mm_nodes[0]); diff --git a/include/drm/ttm/ttm_range_manager.h b/include/drm/ttm/ttm_range_manager.h index 983f452ce54b..22b6fa42ac20 100644 --- a/include/drm/ttm/ttm_range_manager.h +++ b/include/drm/ttm/ttm_range_manager.h @@ -30,8 +30,7 @@ struct ttm_range_mgr_node { static inline struct ttm_range_mgr_node * to_ttm_range_mgr_node(struct ttm_resource *res) { - return container_of(res->mm_node, struct ttm_range_mgr_node, - mm_nodes[0]); + return container_of(res, struct ttm_range_mgr_node, base); } int ttm_range_man_init(struct ttm_device *bdev, diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index 803e4875d779..4abb95b9fd11 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -45,46 +45,38 @@ struct ttm_resource_manager_func { * * @man: Pointer to a memory type manager. * @bo: Pointer to the buffer object we're allocating space for. - * @placement: Placement details. - * @flags: Additional placement flags. - * @mem: Pointer to a struct ttm_resource to be filled in. + * @place: Placement details. + * @res: Resulting pointer to the ttm_resource. * * This function should allocate space in the memory type managed - * by @man. Placement details if - * applicable are given by @placement. If successful, - * @mem::mm_node should be set to a non-null value, and - * @mem::start should be set to a value identifying the beginning + * by @man. Placement details if applicable are given by @place. If + * successful, a filled in ttm_resource object should be returned in + * @res. @res::start should be set to a value identifying the beginning * of the range allocated, and the function should return zero. - * If the memory region accommodate the buffer object, @mem::mm_node - * should be set to NULL, and the function should return 0. + * If the manager can't fulfill the request -ENOSPC should be returned. * If a system error occurred, preventing the request to be fulfilled, * the function should return a negative error code. * - * Note that @mem::mm_node will only be dereferenced by - * struct ttm_resource_manager functions and optionally by the driver, - * which has knowledge of the underlying type. - * - * This function may not be called from within atomic context, so - * an implementation can and must use either a mutex or a spinlock to - * protect any data structures managing the space. + * This function may not be called from within atomic context and needs + * to take care of its own locking to protect any data structures + * managing the space. */ int (*alloc)(struct ttm_resource_manager *man, struct ttm_buffer_object *bo, const struct ttm_place *place, - struct ttm_resource *mem); + struct ttm_resource **res); /** * struct ttm_resource_manager_func member free * * @man: Pointer to a memory type manager. - * @mem: Pointer to a struct ttm_resource to be filled in. + * @res: Pointer to a struct ttm_resource to be freed. * - * This function frees memory type resources previously allocated - * and that are identified by @mem::mm_node and @mem::start. May not - * be called from within atomic context. + * This function frees memory type resources previously allocated. + * May not be called from within atomic context. */ void (*free)(struct ttm_resource_manager *man, - struct ttm_resource *mem); + struct ttm_resource *res); /** * struct ttm_resource_manager_func member debug @@ -158,9 +150,9 @@ struct ttm_bus_placement { /** * struct ttm_resource * - * @mm_node: Memory manager node. - * @size: Requested size of memory region. - * @num_pages: Actual size of memory region in pages. + * @start: Start of the allocation. + * @num_pages: Actual size of resource in pages. + * @mem_type: Resource type of the allocation. * @placement: Placement flags. * @bus: Placement on io bus accessible to the CPU * @@ -168,7 +160,6 @@ struct ttm_bus_placement { * buffer object. */ struct ttm_resource { - void *mm_node; unsigned long start; unsigned long num_pages; uint32_t mem_type; -- cgit v1.2.3 From 3e2926f8753dac1ded56c8ef3e91f56ee763dafd Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 4 Jun 2021 09:00:11 -0400 Subject: drm/panfrost: Add AFBC_FEATURES parameter The value of the AFBC_FEATURES register is required by userspace to determine AFBC support on Bifrost. A user on our IRC channel (#panfrost) reported a workload that raised a fault on one system's Mali G31 but worked flawlessly with another system's Mali G31. We determined the cause to be missing AFBC support on one vendor's Mali implementation -- it turns out AFBC is optional on Bifrost! Whether AFBC is supported or not is exposed in the AFBC_FEATURES register on Bifrost, which reads back as 0 on Midgard. A zero value indicates AFBC is fully supported, provided the architecture itself supports AFBC, allowing backwards-compatibility with Midgard. Bits 0 and 15 indicate that AFBC support is absent for texturing and rendering respectively. The user experiencing the fault reports that AFBC_FEATURES reads back 0x10001 on their system, confirming the architectural lack of AFBC. Userspace needs this parameter to know to disable AFBC on that chip, and perhaps others. v2: Fix typo from copy-paste fail. v3: Bump the UABI version. This commit was cherry-picked from another series so chalking this up to a rebase fail. Signed-off-by: Alyssa Rosenzweig Reviewed-by: Steven Price Cc: Rob Herring Cc: Tomeu Vizoso Signed-off-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20210604130011.3203-1-alyssa.rosenzweig@collabora.com --- drivers/gpu/drm/panfrost/panfrost_device.h | 1 + drivers/gpu/drm/panfrost/panfrost_drv.c | 4 +++- drivers/gpu/drm/panfrost/panfrost_gpu.c | 1 + drivers/gpu/drm/panfrost/panfrost_regs.h | 1 + include/uapi/drm/panfrost_drm.h | 1 + 5 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h index 597cf1459b0a..f614e98771e4 100644 --- a/drivers/gpu/drm/panfrost/panfrost_device.h +++ b/drivers/gpu/drm/panfrost/panfrost_device.h @@ -45,6 +45,7 @@ struct panfrost_features { u32 thread_max_workgroup_sz; u32 thread_max_barrier_sz; u32 coherency_features; + u32 afbc_features; u32 texture_features[4]; u32 js_features[16]; diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index ca07098a6141..1596559f3d14 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -63,6 +63,7 @@ static int panfrost_ioctl_get_param(struct drm_device *ddev, void *data, struct PANFROST_FEATURE(THREAD_MAX_BARRIER_SZ, thread_max_barrier_sz); PANFROST_FEATURE(COHERENCY_FEATURES, coherency_features); + PANFROST_FEATURE(AFBC_FEATURES, afbc_features); PANFROST_FEATURE_ARRAY(TEXTURE_FEATURES, texture_features, 3); PANFROST_FEATURE_ARRAY(JS_FEATURES, js_features, 15); PANFROST_FEATURE(NR_CORE_GROUPS, nr_core_groups); @@ -547,6 +548,7 @@ DEFINE_DRM_GEM_FOPS(panfrost_drm_driver_fops); * Panfrost driver version: * - 1.0 - initial interface * - 1.1 - adds HEAP and NOEXEC flags for CREATE_BO + * - 1.2 - adds AFBC_FEATURES query */ static const struct drm_driver panfrost_drm_driver = { .driver_features = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ, @@ -559,7 +561,7 @@ static const struct drm_driver panfrost_drm_driver = { .desc = "panfrost DRM", .date = "20180908", .major = 1, - .minor = 1, + .minor = 2, .gem_create_object = panfrost_gem_create_object, .prime_handle_to_fd = drm_gem_prime_handle_to_fd, diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c index 2aae636f1cf5..0e70e27fd8c3 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gpu.c +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c @@ -228,6 +228,7 @@ static void panfrost_gpu_init_features(struct panfrost_device *pfdev) pfdev->features.thread_max_workgroup_sz = gpu_read(pfdev, GPU_THREAD_MAX_WORKGROUP_SIZE); pfdev->features.thread_max_barrier_sz = gpu_read(pfdev, GPU_THREAD_MAX_BARRIER_SIZE); pfdev->features.coherency_features = gpu_read(pfdev, GPU_COHERENCY_FEATURES); + pfdev->features.afbc_features = gpu_read(pfdev, GPU_AFBC_FEATURES); for (i = 0; i < 4; i++) pfdev->features.texture_features[i] = gpu_read(pfdev, GPU_TEXTURE_FEATURES(i)); diff --git a/drivers/gpu/drm/panfrost/panfrost_regs.h b/drivers/gpu/drm/panfrost/panfrost_regs.h index eddaa62ad8b0..dc9df5457f1c 100644 --- a/drivers/gpu/drm/panfrost/panfrost_regs.h +++ b/drivers/gpu/drm/panfrost/panfrost_regs.h @@ -82,6 +82,7 @@ #define GPU_TEXTURE_FEATURES(n) (0x0B0 + ((n) * 4)) #define GPU_JS_FEATURES(n) (0x0C0 + ((n) * 4)) +#define GPU_AFBC_FEATURES (0x4C) /* (RO) AFBC support on Bifrost */ #define GPU_SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ #define GPU_SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */ diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index ec19db1eead8..061e700dd06c 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -171,6 +171,7 @@ enum drm_panfrost_param { DRM_PANFROST_PARAM_JS_FEATURES15, DRM_PANFROST_PARAM_NR_CORE_GROUPS, DRM_PANFROST_PARAM_THREAD_TLS_ALLOC, + DRM_PANFROST_PARAM_AFBC_FEATURES, }; struct drm_panfrost_get_param { -- cgit v1.2.3 From 6edbd6abb783d54f6ac4c3ed5cd9e50cff6c15e9 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 10 May 2021 16:14:09 +0200 Subject: dma-buf: rename and cleanup dma_resv_get_excl v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the comment needs to state explicitly that this doesn't get a reference to the object then the function is named rather badly. Rename the function and use rcu_dereference_check(), this way it can be used from both rcu as well as lock protected critical sections. v2: improve kerneldoc as suggested by Daniel v3: use dma_resv_excl_fence as function name Signed-off-by: Christian König Acked-by: Daniel Vetter Reviewed-by: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210602111714.212426-4-christian.koenig@amd.com --- drivers/dma-buf/dma-buf.c | 5 ++--- drivers/dma-buf/dma-resv.c | 10 +++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 2 +- drivers/gpu/drm/etnaviv/etnaviv_gem.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_busy.c | 3 +-- drivers/gpu/drm/msm/msm_gem.c | 4 ++-- drivers/gpu/drm/nouveau/nouveau_bo.c | 2 +- drivers/gpu/drm/nouveau/nouveau_fence.c | 2 +- drivers/gpu/drm/radeon/radeon_display.c | 2 +- drivers/gpu/drm/radeon/radeon_sync.c | 2 +- drivers/gpu/drm/radeon/radeon_uvd.c | 2 +- drivers/gpu/drm/ttm/ttm_bo.c | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 2 +- include/linux/dma-resv.h | 14 ++++++-------- 15 files changed, 26 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index ee04fb442015..d419cf90ee73 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -234,7 +234,7 @@ retry: shared_count = fobj->shared_count; else shared_count = 0; - fence_excl = rcu_dereference(resv->fence_excl); + fence_excl = dma_resv_excl_fence(resv); if (read_seqcount_retry(&resv->seq, seq)) { rcu_read_unlock(); goto retry; @@ -1382,8 +1382,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) buf_obj->name ?: ""); robj = buf_obj->resv; - fence = rcu_dereference_protected(robj->fence_excl, - dma_resv_held(robj)); + fence = dma_resv_excl_fence(robj); if (fence) seq_printf(s, "\tExclusive fence: %s %s %ssignalled\n", fence->ops->get_driver_name(fence), diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 6132ba631991..ed7b4e8f002f 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -284,7 +284,7 @@ EXPORT_SYMBOL(dma_resv_add_shared_fence); */ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) { - struct dma_fence *old_fence = dma_resv_get_excl(obj); + struct dma_fence *old_fence = dma_resv_excl_fence(obj); struct dma_resv_list *old; u32 i = 0; @@ -380,7 +380,7 @@ retry: rcu_read_unlock(); src_list = dma_resv_get_list(dst); - old = dma_resv_get_excl(dst); + old = dma_resv_excl_fence(dst); write_seqcount_begin(&dst->seq); /* write_seqcount_begin provides the necessary memory barrier */ @@ -428,7 +428,7 @@ int dma_resv_get_fences_rcu(struct dma_resv *obj, rcu_read_lock(); seq = read_seqcount_begin(&obj->seq); - fence_excl = rcu_dereference(obj->fence_excl); + fence_excl = dma_resv_excl_fence(obj); if (fence_excl && !dma_fence_get_rcu(fence_excl)) goto unlock; @@ -523,7 +523,7 @@ retry: rcu_read_lock(); i = -1; - fence = rcu_dereference(obj->fence_excl); + fence = dma_resv_excl_fence(obj); if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { if (!dma_fence_get_rcu(fence)) goto unlock_retry; @@ -645,7 +645,7 @@ retry: } if (!shared_count) { - struct dma_fence *fence_excl = rcu_dereference(obj->fence_excl); + struct dma_fence *fence_excl = dma_resv_excl_fence(obj); if (fence_excl) { ret = dma_resv_test_signaled_single(fence_excl); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 73c76a3e2b12..7d5aaf584634 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -226,7 +226,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj, if (!amdgpu_vm_ready(vm)) goto out_unlock; - fence = dma_resv_get_excl(bo->tbo.base.resv); + fence = dma_resv_excl_fence(bo->tbo.base.resv); if (fence) { amdgpu_bo_fence(bo, fence, true); fence = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 4e558632a5d2..2bdc9df5c6b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -210,7 +210,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, return -EINVAL; /* always sync to the exclusive fence */ - f = dma_resv_get_excl(resv); + f = dma_resv_excl_fence(resv); r = amdgpu_sync_fence(sync, f); flist = dma_resv_get_list(resv); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index db69f19ab5bc..2237fe5204d0 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -471,7 +471,7 @@ static void etnaviv_gem_describe(struct drm_gem_object *obj, struct seq_file *m) } } - fence = rcu_dereference(robj->fence_excl); + fence = dma_resv_excl_fence(robj); if (fence) etnaviv_gem_describe_fence(fence, "Exclusive", m); rcu_read_unlock(); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 25235ef630c1..088d375b3395 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -113,8 +113,7 @@ retry: seq = raw_read_seqcount(&obj->base.resv->seq); /* Translate the exclusive fence to the READ *and* WRITE engine */ - args->busy = - busy_check_writer(rcu_dereference(obj->base.resv->fence_excl)); + args->busy = busy_check_writer(dma_resv_excl_fence(obj->base.resv)); /* Translate shared fences to READ set of engines */ list = rcu_dereference(obj->base.resv->fence); diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 56df86e5f740..a5a2a922e3e8 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -819,7 +819,7 @@ int msm_gem_sync_object(struct drm_gem_object *obj, fobj = dma_resv_get_list(obj->resv); if (!fobj || (fobj->shared_count == 0)) { - fence = dma_resv_get_excl(obj->resv); + fence = dma_resv_excl_fence(obj->resv); /* don't need to wait on our own fences, since ring is fifo */ if (fence && (fence->context != fctx->context)) { ret = dma_fence_wait(fence, true); @@ -1035,7 +1035,7 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m, } } - fence = rcu_dereference(robj->fence_excl); + fence = dma_resv_excl_fence(robj); if (fence) describe_fence(fence, "Exclusive", m); rcu_read_unlock(); diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index c3d20bc80022..520b1ea9d16c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -951,7 +951,7 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo, { struct nouveau_drm *drm = nouveau_bdev(bo->bdev); struct drm_device *dev = drm->dev; - struct dma_fence *fence = dma_resv_get_excl(bo->base.resv); + struct dma_fence *fence = dma_resv_excl_fence(bo->base.resv); nv10_bo_put_tile_region(dev, *old_tile, fence); *old_tile = new_tile; diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index e5dcbf67de7e..19c096de5bdc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -356,7 +356,7 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e } fobj = dma_resv_get_list(resv); - fence = dma_resv_get_excl(resv); + fence = dma_resv_excl_fence(resv); if (fence && (!exclusive || !fobj || !fobj->shared_count)) { struct nouveau_channel *prev = NULL; diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 652af7a134bd..406681317419 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -533,7 +533,7 @@ static int radeon_crtc_page_flip_target(struct drm_crtc *crtc, DRM_ERROR("failed to pin new rbo buffer before flip\n"); goto cleanup; } - work->fence = dma_fence_get(dma_resv_get_excl(new_rbo->tbo.base.resv)); + work->fence = dma_fence_get(dma_resv_excl_fence(new_rbo->tbo.base.resv)); radeon_bo_get_tiling_flags(new_rbo, &tiling_flags, NULL); radeon_bo_unreserve(new_rbo); diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c index 5d3302945076..c8a1711325de 100644 --- a/drivers/gpu/drm/radeon/radeon_sync.c +++ b/drivers/gpu/drm/radeon/radeon_sync.c @@ -98,7 +98,7 @@ int radeon_sync_resv(struct radeon_device *rdev, int r = 0; /* always sync to the exclusive fence */ - f = dma_resv_get_excl(resv); + f = dma_resv_excl_fence(resv); fence = f ? to_radeon_fence(f) : NULL; if (fence && fence->rdev == rdev) radeon_sync_fence(sync, fence); diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index dfa9fdbe98da..1f5b1a5c0a09 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -477,7 +477,7 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, return -EINVAL; } - f = dma_resv_get_excl(bo->tbo.base.resv); + f = dma_resv_excl_fence(bo->tbo.base.resv); if (f) { r = radeon_fence_wait((struct radeon_fence *)f, false); if (r) { diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 4ed56520b81d..1752f8e523e7 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -262,7 +262,7 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo) rcu_read_lock(); fobj = rcu_dereference(resv->fence); - fence = rcu_dereference(resv->fence_excl); + fence = dma_resv_excl_fence(resv); if (fence && !fence->ops->signaled) dma_fence_enable_sw_signaling(fence); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 62ea920addc3..7b45393ad98e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -1166,7 +1166,7 @@ int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start, if (bo->moving) dma_fence_put(bo->moving); bo->moving = dma_fence_get - (dma_resv_get_excl(bo->base.resv)); + (dma_resv_excl_fence(bo->base.resv)); } return 0; diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index f32a3d176513..e3a7f740bb06 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -226,22 +226,20 @@ static inline void dma_resv_unlock(struct dma_resv *obj) } /** - * dma_resv_get_excl - get the reservation object's - * exclusive fence, with update-side lock held + * dma_resv_exclusive - return the object's exclusive fence * @obj: the reservation object * - * Returns the exclusive fence (if any). Does NOT take a - * reference. Writers must hold obj->lock, readers may only - * hold a RCU read side lock. + * Returns the exclusive fence (if any). Caller must either hold the objects + * through dma_resv_lock() or the RCU read side lock through rcu_read_lock(), + * or one of the variants of each * * RETURNS * The exclusive fence or NULL */ static inline struct dma_fence * -dma_resv_get_excl(struct dma_resv *obj) +dma_resv_excl_fence(struct dma_resv *obj) { - return rcu_dereference_protected(obj->fence_excl, - dma_resv_held(obj)); + return rcu_dereference_check(obj->fence_excl, dma_resv_held(obj)); } /** -- cgit v1.2.3 From fb5ce730f21434d8100942cf1dbe1acda255fbeb Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 11 May 2021 14:11:41 +0200 Subject: dma-buf: rename and cleanup dma_resv_get_list v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the comment needs to state explicitly that this is doesn't get a reference to the object then the function is named rather badly. Rename the function and use it in even more places. v2: use dma_resv_shared_list as new name Signed-off-by: Christian König Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210602111714.212426-5-christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 32 ++++++++++++------------ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- drivers/gpu/drm/etnaviv/etnaviv_gem.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_busy.c | 2 +- drivers/gpu/drm/msm/msm_gem.c | 4 +-- drivers/gpu/drm/nouveau/nouveau_fence.c | 2 +- drivers/gpu/drm/qxl/qxl_debugfs.c | 2 +- drivers/gpu/drm/radeon/radeon_sync.c | 2 +- drivers/gpu/drm/ttm/ttm_bo.c | 2 +- include/linux/dma-resv.h | 27 ++++++++++---------- 13 files changed, 41 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index ed7b4e8f002f..62e7e055ac62 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -149,8 +149,7 @@ int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences) dma_resv_assert_held(obj); - old = dma_resv_get_list(obj); - + old = dma_resv_shared_list(obj); if (old && old->shared_max) { if ((old->shared_count + num_fences) <= old->shared_max) return 0; @@ -219,12 +218,13 @@ EXPORT_SYMBOL(dma_resv_reserve_shared); */ void dma_resv_reset_shared_max(struct dma_resv *obj) { - /* Test shared fence slot reservation */ - if (rcu_access_pointer(obj->fence)) { - struct dma_resv_list *fence = dma_resv_get_list(obj); + struct dma_resv_list *fences = dma_resv_shared_list(obj); - fence->shared_max = fence->shared_count; - } + dma_resv_assert_held(obj); + + /* Test shared fence slot reservation */ + if (fences) + fences->shared_max = fences->shared_count; } EXPORT_SYMBOL(dma_resv_reset_shared_max); #endif @@ -247,7 +247,7 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence) dma_resv_assert_held(obj); - fobj = dma_resv_get_list(obj); + fobj = dma_resv_shared_list(obj); count = fobj->shared_count; write_seqcount_begin(&obj->seq); @@ -290,7 +290,7 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) dma_resv_assert_held(obj); - old = dma_resv_get_list(obj); + old = dma_resv_shared_list(obj); if (old) i = old->shared_count; @@ -329,7 +329,7 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) dma_resv_assert_held(dst); rcu_read_lock(); - src_list = rcu_dereference(src->fence); + src_list = dma_resv_shared_list(src); retry: if (src_list) { @@ -342,7 +342,7 @@ retry: return -ENOMEM; rcu_read_lock(); - src_list = rcu_dereference(src->fence); + src_list = dma_resv_shared_list(src); if (!src_list || src_list->shared_count > shared_count) { kfree(dst_list); goto retry; @@ -360,7 +360,7 @@ retry: if (!dma_fence_get_rcu(fence)) { dma_resv_list_free(dst_list); - src_list = rcu_dereference(src->fence); + src_list = dma_resv_shared_list(src); goto retry; } @@ -379,7 +379,7 @@ retry: new = dma_fence_get_rcu_safe(&src->fence_excl); rcu_read_unlock(); - src_list = dma_resv_get_list(dst); + src_list = dma_resv_shared_list(dst); old = dma_resv_excl_fence(dst); write_seqcount_begin(&dst->seq); @@ -432,7 +432,7 @@ int dma_resv_get_fences_rcu(struct dma_resv *obj, if (fence_excl && !dma_fence_get_rcu(fence_excl)) goto unlock; - fobj = rcu_dereference(obj->fence); + fobj = dma_resv_shared_list(obj); if (fobj) sz += sizeof(*shared) * fobj->shared_max; @@ -538,7 +538,7 @@ retry: } if (wait_all) { - struct dma_resv_list *fobj = rcu_dereference(obj->fence); + struct dma_resv_list *fobj = dma_resv_shared_list(obj); if (fobj) shared_count = fobj->shared_count; @@ -623,7 +623,7 @@ retry: seq = read_seqcount_begin(&obj->seq); if (test_all) { - struct dma_resv_list *fobj = rcu_dereference(obj->fence); + struct dma_resv_list *fobj = dma_resv_shared_list(obj); unsigned int i; if (fobj) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index d5e6519bdea1..65528592d5bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -247,7 +247,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, if (!ef) return -EINVAL; - old = dma_resv_get_list(resv); + old = dma_resv_shared_list(resv); if (!old) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 6dd0ea6e9e24..04caa31056d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -49,7 +49,7 @@ __dma_resv_make_exclusive(struct dma_resv *obj) unsigned int count; int r; - if (!dma_resv_get_list(obj)) /* no shared fences to convert */ + if (!dma_resv_shared_list(obj)) /* no shared fences to convert */ return 0; r = dma_resv_get_fences_rcu(obj, NULL, &count, &fences); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 2bdc9df5c6b9..1b2ceccaf5b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -213,7 +213,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, f = dma_resv_excl_fence(resv); r = amdgpu_sync_fence(sync, f); - flist = dma_resv_get_list(resv); + flist = dma_resv_shared_list(resv); if (!flist || r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index df1f185faae9..53a8ab8ce2a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1339,7 +1339,7 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, * If true, then return false as any KFD process needs all its BOs to * be resident to run successfully */ - flist = dma_resv_get_list(bo->base.resv); + flist = dma_resv_shared_list(bo->base.resv); if (flist) { for (i = 0; i < flist->shared_count; ++i) { f = rcu_dereference_protected(flist->shared[i], diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index 2237fe5204d0..8792d8dd5106 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -461,7 +461,7 @@ static void etnaviv_gem_describe(struct drm_gem_object *obj, struct seq_file *m) off, etnaviv_obj->vaddr, obj->size); rcu_read_lock(); - fobj = rcu_dereference(robj->fence); + fobj = dma_resv_shared_list(robj); if (fobj) { unsigned int i, shared_count = fobj->shared_count; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 088d375b3395..35279dd561f5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -116,7 +116,7 @@ retry: args->busy = busy_check_writer(dma_resv_excl_fence(obj->base.resv)); /* Translate shared fences to READ set of engines */ - list = rcu_dereference(obj->base.resv->fence); + list = dma_resv_shared_list(obj->base.resv); if (list) { unsigned int shared_count = list->shared_count, i; diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index a5a2a922e3e8..410a93a7e77f 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -817,7 +817,7 @@ int msm_gem_sync_object(struct drm_gem_object *obj, struct dma_fence *fence; int i, ret; - fobj = dma_resv_get_list(obj->resv); + fobj = dma_resv_shared_list(obj->resv); if (!fobj || (fobj->shared_count == 0)) { fence = dma_resv_excl_fence(obj->resv); /* don't need to wait on our own fences, since ring is fifo */ @@ -1025,7 +1025,7 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m, } rcu_read_lock(); - fobj = rcu_dereference(robj->fence); + fobj = dma_resv_shared_list(robj); if (fobj) { unsigned int i, shared_count = fobj->shared_count; diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 19c096de5bdc..6b43918035df 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -355,7 +355,7 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e return ret; } - fobj = dma_resv_get_list(resv); + fobj = dma_resv_shared_list(resv); fence = dma_resv_excl_fence(resv); if (fence && (!exclusive || !fobj || !fobj->shared_count)) { diff --git a/drivers/gpu/drm/qxl/qxl_debugfs.c b/drivers/gpu/drm/qxl/qxl_debugfs.c index 183d15e2cf58..1f9a59601bb1 100644 --- a/drivers/gpu/drm/qxl/qxl_debugfs.c +++ b/drivers/gpu/drm/qxl/qxl_debugfs.c @@ -61,7 +61,7 @@ qxl_debugfs_buffers_info(struct seq_file *m, void *data) int rel; rcu_read_lock(); - fobj = rcu_dereference(bo->tbo.base.resv->fence); + fobj = dma_resv_shared_list(bo->tbo.base.resv); rel = fobj ? fobj->shared_count : 0; rcu_read_unlock(); diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c index c8a1711325de..9257b60144c4 100644 --- a/drivers/gpu/drm/radeon/radeon_sync.c +++ b/drivers/gpu/drm/radeon/radeon_sync.c @@ -105,7 +105,7 @@ int radeon_sync_resv(struct radeon_device *rdev, else if (f) r = dma_fence_wait(f, true); - flist = dma_resv_get_list(resv); + flist = dma_resv_shared_list(resv); if (shared || !flist || r) return r; diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 1752f8e523e7..f04a269b7065 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -261,7 +261,7 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo) int i; rcu_read_lock(); - fobj = rcu_dereference(resv->fence); + fobj = dma_resv_shared_list(resv); fence = dma_resv_excl_fence(resv); if (fence && !fence->ops->signaled) dma_fence_enable_sw_signaling(fence); diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index e3a7f740bb06..8dc19d65a217 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -78,20 +78,6 @@ struct dma_resv { #define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base) -/** - * dma_resv_get_list - get the reservation object's - * shared fence list, with update-side lock held - * @obj: the reservation object - * - * Returns the shared fence list. Does NOT take references to - * the fence. The obj->lock must be held. - */ -static inline struct dma_resv_list *dma_resv_get_list(struct dma_resv *obj) -{ - return rcu_dereference_protected(obj->fence, - dma_resv_held(obj)); -} - #ifdef CONFIG_DEBUG_MUTEXES void dma_resv_reset_shared_max(struct dma_resv *obj); #else @@ -268,6 +254,19 @@ dma_resv_get_excl_rcu(struct dma_resv *obj) return fence; } +/** + * dma_resv_shared_list - get the reservation object's shared fence list + * @obj: the reservation object + * + * Returns the shared fence list. Caller must either hold the objects + * through dma_resv_lock() or the RCU read side lock through rcu_read_lock(), + * or one of the variants of each + */ +static inline struct dma_resv_list *dma_resv_shared_list(struct dma_resv *obj) +{ + return rcu_dereference_check(obj->fence, dma_resv_held(obj)); +} + void dma_resv_init(struct dma_resv *obj); void dma_resv_fini(struct dma_resv *obj); int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences); -- cgit v1.2.3 From 6b41323a265a02b7af906c6d6fd93f6cddd7ac12 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 2 Jun 2021 12:44:32 +0200 Subject: dma-buf: rename dma_resv_get_excl_rcu to _unlocked MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That describes much better what the function is doing here. Signed-off-by: Christian König Reviewed-by: Jason Ekstrand Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210602111714.212426-6-christian.koenig@amd.com --- drivers/gpu/drm/drm_gem.c | 2 +- drivers/gpu/drm/drm_gem_atomic_helper.c | 2 +- drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 2 +- drivers/gpu/drm/i915/display/intel_display.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_object.h | 2 +- drivers/gpu/drm/i915/gem/i915_gem_wait.c | 4 ++-- drivers/gpu/drm/i915/i915_request.c | 2 +- drivers/gpu/drm/i915/i915_sw_fence.c | 2 +- drivers/gpu/drm/nouveau/dispnv50/wndw.c | 2 +- drivers/gpu/drm/panfrost/panfrost_job.c | 2 +- include/linux/dma-resv.h | 4 ++-- 11 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 9989425e9875..263b4fb03303 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1375,7 +1375,7 @@ int drm_gem_fence_array_add_implicit(struct xarray *fence_array, if (!write) { struct dma_fence *fence = - dma_resv_get_excl_rcu(obj->resv); + dma_resv_get_excl_unlocked(obj->resv); return drm_gem_fence_array_add(fence_array, fence); } diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c index a005c5a0ba46..a27135084ae5 100644 --- a/drivers/gpu/drm/drm_gem_atomic_helper.c +++ b/drivers/gpu/drm/drm_gem_atomic_helper.c @@ -147,7 +147,7 @@ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_st return 0; obj = drm_gem_fb_get_obj(state->fb, 0); - fence = dma_resv_get_excl_rcu(obj->resv); + fence = dma_resv_get_excl_unlocked(obj->resv); drm_atomic_set_fence_for_plane(state, fence); return 0; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index d05c35994579..c942d2a8c252 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -195,7 +195,7 @@ static int submit_fence_sync(struct etnaviv_gem_submit *submit) if (ret) return ret; } else { - bo->excl = dma_resv_get_excl_rcu(robj); + bo->excl = dma_resv_get_excl_unlocked(robj); } } diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 384ff0bb6e19..f17c5f54feb6 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -11040,7 +11040,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane, if (ret < 0) goto unpin_fb; - fence = dma_resv_get_excl_rcu(obj->base.resv); + fence = dma_resv_get_excl_unlocked(obj->base.resv); if (fence) { add_rps_boost_after_vblank(new_plane_state->hw.crtc, fence); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 2ebd79537aea..7c0eb425cb3b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -500,7 +500,7 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) struct dma_fence *fence; rcu_read_lock(); - fence = dma_resv_get_excl_rcu(obj->base.resv); + fence = dma_resv_get_excl_unlocked(obj->base.resv); rcu_read_unlock(); if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence)) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index 4b9856d5ba14..c13aeddf5aa7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -73,7 +73,7 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, */ prune_fences = count && timeout >= 0; } else { - excl = dma_resv_get_excl_rcu(resv); + excl = dma_resv_get_excl_unlocked(resv); } if (excl && timeout >= 0) @@ -170,7 +170,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, kfree(shared); } else { - excl = dma_resv_get_excl_rcu(obj->base.resv); + excl = dma_resv_get_excl_unlocked(obj->base.resv); } if (excl) { diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index bec9c3652188..c85494f411f4 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1611,7 +1611,7 @@ i915_request_await_object(struct i915_request *to, dma_fence_put(shared[i]); kfree(shared); } else { - excl = dma_resv_get_excl_rcu(obj->base.resv); + excl = dma_resv_get_excl_unlocked(obj->base.resv); } if (excl) { diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 2744558f3050..7aaf74552d06 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -606,7 +606,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, dma_fence_put(shared[i]); kfree(shared); } else { - excl = dma_resv_get_excl_rcu(resv); + excl = dma_resv_get_excl_unlocked(resv); } if (ret >= 0 && excl && excl->ops != exclude) { diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index 0cb1f9d848d3..8d048bacd6f0 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -561,7 +561,7 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) asyw->image.handle[0] = ctxdma->object.handle; } - asyw->state.fence = dma_resv_get_excl_rcu(nvbo->bo.base.resv); + asyw->state.fence = dma_resv_get_excl_unlocked(nvbo->bo.base.resv); asyw->image.offset[0] = nvbo->offset; if (wndw->func->prepare) { diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index 6003cfeb1322..2df3e999a38d 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -203,7 +203,7 @@ static void panfrost_acquire_object_fences(struct drm_gem_object **bos, int i; for (i = 0; i < bo_count; i++) - implicit_fences[i] = dma_resv_get_excl_rcu(bos[i]->resv); + implicit_fences[i] = dma_resv_get_excl_unlocked(bos[i]->resv); } static void panfrost_attach_object_fences(struct drm_gem_object **bos, diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 8dc19d65a217..3e0eefcead44 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -229,7 +229,7 @@ dma_resv_excl_fence(struct dma_resv *obj) } /** - * dma_resv_get_excl_rcu - get the reservation object's + * dma_resv_get_excl_unlocked - get the reservation object's * exclusive fence, without lock held. * @obj: the reservation object * @@ -240,7 +240,7 @@ dma_resv_excl_fence(struct dma_resv *obj) * The exclusive fence or NULL if none */ static inline struct dma_fence * -dma_resv_get_excl_rcu(struct dma_resv *obj) +dma_resv_get_excl_unlocked(struct dma_resv *obj) { struct dma_fence *fence; -- cgit v1.2.3 From d3fae3b3daac09961ab871a25093b0ae404282d5 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 2 Jun 2021 13:01:15 +0200 Subject: dma-buf: drop the _rcu postfix on function names v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The functions can be called both in _rcu context as well as while holding the lock. v2: add some kerneldoc as suggested by Daniel v3: fix indentation Signed-off-by: Christian König Reviewed-by: Jason Ekstrand Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210602111714.212426-7-christian.koenig@amd.com --- drivers/dma-buf/dma-buf.c | 3 +-- drivers/dma-buf/dma-resv.c | 32 ++++++++++++----------- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 5 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 3 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 5 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 4 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 5 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 11 ++++---- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 ++-- drivers/gpu/drm/drm_gem.c | 5 ++-- drivers/gpu/drm/etnaviv/etnaviv_gem.c | 6 ++--- drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 6 ++--- drivers/gpu/drm/i915/dma_resv_utils.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_busy.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 4 +-- drivers/gpu/drm/i915/gem/i915_gem_wait.c | 6 ++--- drivers/gpu/drm/i915/i915_request.c | 4 +-- drivers/gpu/drm/i915/i915_sw_fence.c | 2 +- drivers/gpu/drm/msm/msm_gem.c | 3 +-- drivers/gpu/drm/nouveau/nouveau_gem.c | 4 +-- drivers/gpu/drm/panfrost/panfrost_drv.c | 3 +-- drivers/gpu/drm/radeon/radeon_gem.c | 6 ++--- drivers/gpu/drm/radeon/radeon_mn.c | 4 +-- drivers/gpu/drm/ttm/ttm_bo.c | 18 ++++++------- drivers/gpu/drm/vgem/vgem_fence.c | 3 +-- drivers/gpu/drm/virtio/virtgpu_ioctl.c | 5 ++-- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 6 ++--- include/linux/dma-resv.h | 17 ++++-------- 31 files changed, 84 insertions(+), 103 deletions(-) (limited to 'include') diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index d419cf90ee73..511fe0d217a0 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -1147,8 +1147,7 @@ static int __dma_buf_begin_cpu_access(struct dma_buf *dmabuf, long ret; /* Wait on any implicit rendering fences */ - ret = dma_resv_wait_timeout_rcu(resv, write, true, - MAX_SCHEDULE_TIMEOUT); + ret = dma_resv_wait_timeout(resv, write, true, MAX_SCHEDULE_TIMEOUT); if (ret < 0) return ret; diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 62e7e055ac62..f26c71747d43 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -396,7 +396,7 @@ retry: EXPORT_SYMBOL(dma_resv_copy_fences); /** - * dma_resv_get_fences_rcu - Get an object's shared and exclusive + * dma_resv_get_fences - Get an object's shared and exclusive * fences without update side lock held * @obj: the reservation object * @pfence_excl: the returned exclusive fence (or NULL) @@ -408,10 +408,9 @@ EXPORT_SYMBOL(dma_resv_copy_fences); * exclusive fence is not specified the fence is put into the array of the * shared fences as well. Returns either zero or -ENOMEM. */ -int dma_resv_get_fences_rcu(struct dma_resv *obj, - struct dma_fence **pfence_excl, - unsigned int *pshared_count, - struct dma_fence ***pshared) +int dma_resv_get_fences(struct dma_resv *obj, struct dma_fence **pfence_excl, + unsigned int *pshared_count, + struct dma_fence ***pshared) { struct dma_fence **shared = NULL; struct dma_fence *fence_excl; @@ -494,23 +493,24 @@ unlock: *pshared = shared; return ret; } -EXPORT_SYMBOL_GPL(dma_resv_get_fences_rcu); +EXPORT_SYMBOL_GPL(dma_resv_get_fences); /** - * dma_resv_wait_timeout_rcu - Wait on reservation's objects + * dma_resv_wait_timeout - Wait on reservation's objects * shared and/or exclusive fences. * @obj: the reservation object * @wait_all: if true, wait on all fences, else wait on just exclusive fence * @intr: if true, do interruptible wait * @timeout: timeout value in jiffies or zero to return immediately * + * Callers are not required to hold specific locks, but maybe hold + * dma_resv_lock() already * RETURNS * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or * greater than zer on success. */ -long dma_resv_wait_timeout_rcu(struct dma_resv *obj, - bool wait_all, bool intr, - unsigned long timeout) +long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr, + unsigned long timeout) { long ret = timeout ? timeout : 1; unsigned int seq, shared_count; @@ -582,7 +582,7 @@ unlock_retry: rcu_read_unlock(); goto retry; } -EXPORT_SYMBOL_GPL(dma_resv_wait_timeout_rcu); +EXPORT_SYMBOL_GPL(dma_resv_wait_timeout); static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence) @@ -602,16 +602,18 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence) } /** - * dma_resv_test_signaled_rcu - Test if a reservation object's - * fences have been signaled. + * dma_resv_test_signaled - Test if a reservation object's fences have been + * signaled. * @obj: the reservation object * @test_all: if true, test all fences, otherwise only test the exclusive * fence * + * Callers are not required to hold specific locks, but maybe hold + * dma_resv_lock() already * RETURNS * true if all fences signaled, else false */ -bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all) +bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all) { unsigned int seq, shared_count; int ret; @@ -660,7 +662,7 @@ retry: rcu_read_unlock(); return ret; } -EXPORT_SYMBOL_GPL(dma_resv_test_signaled_rcu); +EXPORT_SYMBOL_GPL(dma_resv_test_signaled); #if IS_ENABLED(CONFIG_LOCKDEP) static int __init dma_resv_lockdep(void) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 49f73b5b89b0..ac7b37dfff5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -203,9 +203,8 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, goto unpin; } - r = dma_resv_get_fences_rcu(new_abo->tbo.base.resv, &work->excl, - &work->shared_count, - &work->shared); + r = dma_resv_get_fences(new_abo->tbo.base.resv, &work->excl, + &work->shared_count, &work->shared); if (unlikely(r != 0)) { DRM_ERROR("failed to get fences for buffer\n"); goto unpin; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 04caa31056d0..c3053b83b80c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -52,7 +52,7 @@ __dma_resv_make_exclusive(struct dma_resv *obj) if (!dma_resv_shared_list(obj)) /* no shared fences to convert */ return 0; - r = dma_resv_get_fences_rcu(obj, NULL, &count, &fences); + r = dma_resv_get_fences(obj, NULL, &count, &fences); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 7d5aaf584634..1c3e3b608332 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -526,8 +526,7 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, return -ENOENT; } robj = gem_to_amdgpu_bo(gobj); - ret = dma_resv_wait_timeout_rcu(robj->tbo.base.resv, true, true, - timeout); + ret = dma_resv_wait_timeout(robj->tbo.base.resv, true, true, timeout); /* ret == 0 means not signaled, * ret > 0 means signaled diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index b4971e90b98c..df69b1e9e451 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -112,7 +112,7 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, unsigned count; int r; - r = dma_resv_get_fences_rcu(resv, NULL, &count, &fences); + r = dma_resv_get_fences(resv, NULL, &count, &fences); if (r) goto fallback; @@ -156,8 +156,7 @@ fallback: /* Not enough memory for the delayed delete, as last resort * block for all the fences to complete. */ - dma_resv_wait_timeout_rcu(resv, true, false, - MAX_SCHEDULE_TIMEOUT); + dma_resv_wait_timeout(resv, true, false, MAX_SCHEDULE_TIMEOUT); amdgpu_pasid_free(pasid); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 2741c28ff1b5..d6c54c7f7679 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -75,8 +75,8 @@ static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni, mmu_interval_set_seq(mni, cur_seq); - r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, true, false, - MAX_SCHEDULE_TIMEOUT); + r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false, + MAX_SCHEDULE_TIMEOUT); mutex_unlock(&adev->notifier_lock); if (r <= 0) DRM_ERROR("(%ld) failed to wait for user bo\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 19c1384a133f..96447e1d4c9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -756,8 +756,8 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) return 0; } - r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, false, false, - MAX_SCHEDULE_TIMEOUT); + r = dma_resv_wait_timeout(bo->tbo.base.resv, false, false, + MAX_SCHEDULE_TIMEOUT); if (r < 0) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 82f0542c7792..a692a4570627 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -1126,9 +1126,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, ib->length_dw = 16; if (direct) { - r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, - true, false, - msecs_to_jiffies(10)); + r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false, + msecs_to_jiffies(10)); if (r == 0) r = -ETIMEDOUT; if (r < 0) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index bcfd4a8d0288..d1a229212e7a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2022,13 +2022,12 @@ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) unsigned i, shared_count; int r; - r = dma_resv_get_fences_rcu(resv, &excl, - &shared_count, &shared); + r = dma_resv_get_fences(resv, &excl, &shared_count, &shared); if (r) { /* Not enough memory to grab the fence list, as last resort * block for all the fences to complete. */ - dma_resv_wait_timeout_rcu(resv, true, false, + dma_resv_wait_timeout(resv, true, false, MAX_SCHEDULE_TIMEOUT); return; } @@ -2640,7 +2639,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo) return true; /* Don't evict VM page tables while they are busy */ - if (!dma_resv_test_signaled_rcu(bo->tbo.base.resv, true)) + if (!dma_resv_test_signaled(bo->tbo.base.resv, true)) return false; /* Try to block ongoing updates */ @@ -2820,8 +2819,8 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, */ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) { - timeout = dma_resv_wait_timeout_rcu(vm->root.base.bo->tbo.base.resv, - true, true, timeout); + timeout = dma_resv_wait_timeout(vm->root.base.bo->tbo.base.resv, true, + true, timeout); if (timeout <= 0) return timeout; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3267eb2e35dd..6dde2873d47b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8400,9 +8400,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, * deadlock during GPU reset when this fence will not signal * but we hold reservation lock for the BO. */ - r = dma_resv_wait_timeout_rcu(abo->tbo.base.resv, true, - false, - msecs_to_jiffies(5000)); + r = dma_resv_wait_timeout(abo->tbo.base.resv, true, false, + msecs_to_jiffies(5000)); if (unlikely(r <= 0)) DRM_ERROR("Waiting for fences timed out!"); diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 263b4fb03303..d62fb1a3c916 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -770,8 +770,7 @@ long drm_gem_dma_resv_wait(struct drm_file *filep, u32 handle, return -EINVAL; } - ret = dma_resv_wait_timeout_rcu(obj->resv, wait_all, - true, timeout); + ret = dma_resv_wait_timeout(obj->resv, wait_all, true, timeout); if (ret == 0) ret = -ETIME; else if (ret > 0) @@ -1380,7 +1379,7 @@ int drm_gem_fence_array_add_implicit(struct xarray *fence_array, return drm_gem_fence_array_add(fence_array, fence); } - ret = dma_resv_get_fences_rcu(obj->resv, NULL, + ret = dma_resv_get_fences(obj->resv, NULL, &fence_count, &fences); if (ret || !fence_count) return ret; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index 8792d8dd5106..b8fa6ed3dd73 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -390,14 +390,12 @@ int etnaviv_gem_cpu_prep(struct drm_gem_object *obj, u32 op, } if (op & ETNA_PREP_NOSYNC) { - if (!dma_resv_test_signaled_rcu(obj->resv, - write)) + if (!dma_resv_test_signaled(obj->resv, write)) return -EBUSY; } else { unsigned long remain = etnaviv_timeout_to_jiffies(timeout); - ret = dma_resv_wait_timeout_rcu(obj->resv, - write, true, remain); + ret = dma_resv_wait_timeout(obj->resv, write, true, remain); if (ret <= 0) return ret == 0 ? -ETIMEDOUT : ret; } diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index c942d2a8c252..d53856d7a747 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -189,9 +189,9 @@ static int submit_fence_sync(struct etnaviv_gem_submit *submit) continue; if (bo->flags & ETNA_SUBMIT_BO_WRITE) { - ret = dma_resv_get_fences_rcu(robj, &bo->excl, - &bo->nr_shared, - &bo->shared); + ret = dma_resv_get_fences(robj, &bo->excl, + &bo->nr_shared, + &bo->shared); if (ret) return ret; } else { diff --git a/drivers/gpu/drm/i915/dma_resv_utils.c b/drivers/gpu/drm/i915/dma_resv_utils.c index 9e508e7d4629..7df91b7e4ca8 100644 --- a/drivers/gpu/drm/i915/dma_resv_utils.c +++ b/drivers/gpu/drm/i915/dma_resv_utils.c @@ -10,7 +10,7 @@ void dma_resv_prune(struct dma_resv *resv) { if (dma_resv_trylock(resv)) { - if (dma_resv_test_signaled_rcu(resv, true)) + if (dma_resv_test_signaled(resv, true)) dma_resv_add_excl_fence(resv, NULL); dma_resv_unlock(resv); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 35279dd561f5..6234e17259c1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -105,7 +105,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * Alternatively, we can trade that extra information on read/write * activity with * args->busy = - * !dma_resv_test_signaled_rcu(obj->resv, true); + * !dma_resv_test_signaled(obj->resv, true); * to report the overall busyness. This is what the wait-ioctl does. * */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 297143511f99..66789111a24b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1481,7 +1481,7 @@ static inline bool use_reloc_gpu(struct i915_vma *vma) if (DBG_FORCE_RELOC) return false; - return !dma_resv_test_signaled_rcu(vma->resv, true); + return !dma_resv_test_signaled(vma->resv, true); } static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index a657b99ec760..b5cbbe659a77 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -85,8 +85,8 @@ static bool i915_gem_userptr_invalidate(struct mmu_interval_notifier *mni, return true; /* we will unbind on next submission, still have userptr pins */ - r = dma_resv_wait_timeout_rcu(obj->base.resv, true, false, - MAX_SCHEDULE_TIMEOUT); + r = dma_resv_wait_timeout(obj->base.resv, true, false, + MAX_SCHEDULE_TIMEOUT); if (r <= 0) drm_err(&i915->drm, "(%ld) failed to wait for idle\n", r); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index c13aeddf5aa7..1e97520c62b2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -45,7 +45,7 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, unsigned int count, i; int ret; - ret = dma_resv_get_fences_rcu(resv, &excl, &count, &shared); + ret = dma_resv_get_fences(resv, &excl, &count, &shared); if (ret) return ret; @@ -158,8 +158,8 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int count, i; int ret; - ret = dma_resv_get_fences_rcu(obj->base.resv, - &excl, &count, &shared); + ret = dma_resv_get_fences(obj->base.resv, &excl, &count, + &shared); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index c85494f411f4..6cb91f042642 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1594,8 +1594,8 @@ i915_request_await_object(struct i915_request *to, struct dma_fence **shared; unsigned int count, i; - ret = dma_resv_get_fences_rcu(obj->base.resv, - &excl, &count, &shared); + ret = dma_resv_get_fences(obj->base.resv, &excl, &count, + &shared); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 7aaf74552d06..c589a681da77 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -582,7 +582,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, struct dma_fence **shared; unsigned int count, i; - ret = dma_resv_get_fences_rcu(resv, &excl, &count, &shared); + ret = dma_resv_get_fences(resv, &excl, &count, &shared); if (ret) return ret; diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 410a93a7e77f..a94a43de95ef 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -915,8 +915,7 @@ int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t *timeout) op & MSM_PREP_NOSYNC ? 0 : timeout_to_jiffies(timeout); long ret; - ret = dma_resv_wait_timeout_rcu(obj->resv, write, - true, remain); + ret = dma_resv_wait_timeout(obj->resv, write, true, remain); if (ret == 0) return remain == 0 ? -EBUSY : -ETIMEDOUT; else if (ret < 0) diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index d863e5ed954a..5b27845075a1 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -964,8 +964,8 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data, return -ENOENT; nvbo = nouveau_gem_object(gem); - lret = dma_resv_wait_timeout_rcu(nvbo->bo.base.resv, write, true, - no_wait ? 0 : 30 * HZ); + lret = dma_resv_wait_timeout(nvbo->bo.base.resv, write, true, + no_wait ? 0 : 30 * HZ); if (!lret) ret = -EBUSY; else if (lret > 0) diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 1596559f3d14..075ec0ef746c 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -312,8 +312,7 @@ panfrost_ioctl_wait_bo(struct drm_device *dev, void *data, if (!gem_obj) return -ENOENT; - ret = dma_resv_wait_timeout_rcu(gem_obj->resv, true, - true, timeout); + ret = dma_resv_wait_timeout(gem_obj->resv, true, true, timeout); if (!ret) ret = timeout ? -ETIMEDOUT : -EBUSY; diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 3272c33af8fe..458f92a70887 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -161,7 +161,7 @@ static int radeon_gem_set_domain(struct drm_gem_object *gobj, } if (domain == RADEON_GEM_DOMAIN_CPU) { /* Asking for cpu access wait for object idle */ - r = dma_resv_wait_timeout_rcu(robj->tbo.base.resv, true, true, 30 * HZ); + r = dma_resv_wait_timeout(robj->tbo.base.resv, true, true, 30 * HZ); if (!r) r = -EBUSY; @@ -523,7 +523,7 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, } robj = gem_to_radeon_bo(gobj); - r = dma_resv_test_signaled_rcu(robj->tbo.base.resv, true); + r = dma_resv_test_signaled(robj->tbo.base.resv, true); if (r == 0) r = -EBUSY; else @@ -552,7 +552,7 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, } robj = gem_to_radeon_bo(gobj); - ret = dma_resv_wait_timeout_rcu(robj->tbo.base.resv, true, true, 30 * HZ); + ret = dma_resv_wait_timeout(robj->tbo.base.resv, true, true, 30 * HZ); if (ret == 0) r = -EBUSY; else if (ret < 0) diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c index e37c9a57a7c3..9fa88549c89e 100644 --- a/drivers/gpu/drm/radeon/radeon_mn.c +++ b/drivers/gpu/drm/radeon/radeon_mn.c @@ -66,8 +66,8 @@ static bool radeon_mn_invalidate(struct mmu_interval_notifier *mn, return true; } - r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, true, false, - MAX_SCHEDULE_TIMEOUT); + r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false, + MAX_SCHEDULE_TIMEOUT); if (r <= 0) DRM_ERROR("(%ld) failed to wait for user bo\n", r); diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index f04a269b7065..7e7284da5630 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -296,7 +296,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, struct dma_resv *resv = &bo->base._resv; int ret; - if (dma_resv_test_signaled_rcu(resv, true)) + if (dma_resv_test_signaled(resv, true)) ret = 0; else ret = -EBUSY; @@ -308,8 +308,8 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, dma_resv_unlock(bo->base.resv); spin_unlock(&bo->bdev->lru_lock); - lret = dma_resv_wait_timeout_rcu(resv, true, interruptible, - 30 * HZ); + lret = dma_resv_wait_timeout(resv, true, interruptible, + 30 * HZ); if (lret < 0) return lret; @@ -411,8 +411,8 @@ static void ttm_bo_release(struct kref *kref) /* Last resort, if we fail to allocate memory for the * fences block for the BO to become idle */ - dma_resv_wait_timeout_rcu(bo->base.resv, true, false, - 30 * HZ); + dma_resv_wait_timeout(bo->base.resv, true, false, + 30 * HZ); } if (bo->bdev->funcs->release_notify) @@ -422,7 +422,7 @@ static void ttm_bo_release(struct kref *kref) ttm_mem_io_free(bdev, bo->resource); } - if (!dma_resv_test_signaled_rcu(bo->base.resv, true) || + if (!dma_resv_test_signaled(bo->base.resv, true) || !dma_resv_trylock(bo->base.resv)) { /* The BO is not idle, resurrect it for delayed destroy */ ttm_bo_flush_all_fences(bo); @@ -1094,14 +1094,14 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, long timeout = 15 * HZ; if (no_wait) { - if (dma_resv_test_signaled_rcu(bo->base.resv, true)) + if (dma_resv_test_signaled(bo->base.resv, true)) return 0; else return -EBUSY; } - timeout = dma_resv_wait_timeout_rcu(bo->base.resv, true, - interruptible, timeout); + timeout = dma_resv_wait_timeout(bo->base.resv, true, interruptible, + timeout); if (timeout < 0) return timeout; diff --git a/drivers/gpu/drm/vgem/vgem_fence.c b/drivers/gpu/drm/vgem/vgem_fence.c index 2902dc6e64fa..bd6f75285fd9 100644 --- a/drivers/gpu/drm/vgem/vgem_fence.c +++ b/drivers/gpu/drm/vgem/vgem_fence.c @@ -151,8 +151,7 @@ int vgem_fence_attach_ioctl(struct drm_device *dev, /* Check for a conflicting fence */ resv = obj->resv; - if (!dma_resv_test_signaled_rcu(resv, - arg->flags & VGEM_FENCE_WRITE)) { + if (!dma_resv_test_signaled(resv, arg->flags & VGEM_FENCE_WRITE)) { ret = -EBUSY; goto err_fence; } diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index 669f2ee39515..5c1ad1596889 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -451,10 +451,9 @@ static int virtio_gpu_wait_ioctl(struct drm_device *dev, void *data, return -ENOENT; if (args->flags & VIRTGPU_WAIT_NOWAIT) { - ret = dma_resv_test_signaled_rcu(obj->resv, true); + ret = dma_resv_test_signaled(obj->resv, true); } else { - ret = dma_resv_wait_timeout_rcu(obj->resv, true, true, - timeout); + ret = dma_resv_wait_timeout(obj->resv, true, true, timeout); } if (ret == 0) ret = -EBUSY; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index 176b6201ef2b..362f56d5b12b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -743,9 +743,9 @@ static int vmw_user_bo_synccpu_grab(struct vmw_user_buffer_object *user_bo, if (flags & drm_vmw_synccpu_allow_cs) { long lret; - lret = dma_resv_wait_timeout_rcu - (bo->base.resv, true, true, - nonblock ? 0 : MAX_SCHEDULE_TIMEOUT); + lret = dma_resv_wait_timeout(bo->base.resv, true, true, + nonblock ? 0 : + MAX_SCHEDULE_TIMEOUT); if (!lret) return -EBUSY; else if (lret < 0) diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 3e0eefcead44..562b885cf9c3 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -271,19 +271,12 @@ void dma_resv_init(struct dma_resv *obj); void dma_resv_fini(struct dma_resv *obj); int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences); void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence); - void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence); - -int dma_resv_get_fences_rcu(struct dma_resv *obj, - struct dma_fence **pfence_excl, - unsigned *pshared_count, - struct dma_fence ***pshared); - +int dma_resv_get_fences(struct dma_resv *obj, struct dma_fence **pfence_excl, + unsigned *pshared_count, struct dma_fence ***pshared); int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src); - -long dma_resv_wait_timeout_rcu(struct dma_resv *obj, bool wait_all, bool intr, - unsigned long timeout); - -bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all); +long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr, + unsigned long timeout); +bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all); #endif /* _LINUX_RESERVATION_H */ -- cgit v1.2.3 From 3bf3710e3718a5aebdf465343bc1125b6e8cca96 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 2 Jun 2021 10:38:10 +0200 Subject: drm/ttm: Add a generic TTM memcpy move for page-based iomem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The internal ttm_bo_util memcpy uses ioremap functionality, and while it probably might be possible to use it for copying in- and out of sglist represented io memory, using io_mem_reserve() / io_mem_free() callbacks, that would cause problems with fault(). Instead, implement a method mapping page-by-page using kmap_local() semantics. As an additional benefit we then avoid the occasional global TLB flushes of ioremap() and consuming ioremap space, elimination of a critical point of failure and with a slight change of semantics we could also push the memcpy out async for testing and async driver development purposes. A special linear iomem iterator is introduced internally to mimic the old ioremap behaviour for code-paths that can't immediately be ported over. This adds to the code size and should be considered a temporary solution. Looking at the code we have a lot of checks for iomap tagged pointers. Ideally we should extend the core memremap functions to also accept uncached memory and kmap_local functionality. Then we could strip a lot of code. Cc: Christian König Signed-off-by: Thomas Hellström Reviewed-by: Christian König Link: https://lore.kernel.org/r/20210602083818.241793-4-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/ttm/ttm_bo_util.c | 279 +++++++++++++------------------------ drivers/gpu/drm/ttm/ttm_module.c | 35 +++++ drivers/gpu/drm/ttm/ttm_resource.c | 193 +++++++++++++++++++++++++ drivers/gpu/drm/ttm/ttm_tt.c | 45 ++++++ include/drm/ttm/ttm_bo_driver.h | 28 ++++ include/drm/ttm/ttm_caching.h | 2 + include/drm/ttm/ttm_kmap_iter.h | 61 ++++++++ include/drm/ttm/ttm_resource.h | 61 ++++++++ include/drm/ttm/ttm_tt.h | 16 +++ 9 files changed, 539 insertions(+), 181 deletions(-) create mode 100644 include/drm/ttm/ttm_kmap_iter.h (limited to 'include') diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 1b326e70cb02..0d1e417e62d6 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -72,188 +72,125 @@ void ttm_mem_io_free(struct ttm_device *bdev, mem->bus.addr = NULL; } -static int ttm_resource_ioremap(struct ttm_device *bdev, - struct ttm_resource *mem, - void **virtual) +/** + * ttm_move_memcpy - Helper to perform a memcpy ttm move operation. + * @bo: The struct ttm_buffer_object. + * @new_mem: The struct ttm_resource we're moving to (copy destination). + * @new_iter: A struct ttm_kmap_iter representing the destination resource. + * @src_iter: A struct ttm_kmap_iter representing the source resource. + * + * This function is intended to be able to move out async under a + * dma-fence if desired. + */ +void ttm_move_memcpy(struct ttm_buffer_object *bo, + u32 num_pages, + struct ttm_kmap_iter *dst_iter, + struct ttm_kmap_iter *src_iter) { - int ret; - void *addr; - - *virtual = NULL; - ret = ttm_mem_io_reserve(bdev, mem); - if (ret || !mem->bus.is_iomem) - return ret; + const struct ttm_kmap_iter_ops *dst_ops = dst_iter->ops; + const struct ttm_kmap_iter_ops *src_ops = src_iter->ops; + struct ttm_tt *ttm = bo->ttm; + struct dma_buf_map src_map, dst_map; + pgoff_t i; - if (mem->bus.addr) { - addr = mem->bus.addr; - } else { - size_t bus_size = (size_t)mem->num_pages << PAGE_SHIFT; + /* Single TTM move. NOP */ + if (dst_ops->maps_tt && src_ops->maps_tt) + return; - if (mem->bus.caching == ttm_write_combined) - addr = ioremap_wc(mem->bus.offset, bus_size); -#ifdef CONFIG_X86 - else if (mem->bus.caching == ttm_cached) - addr = ioremap_cache(mem->bus.offset, bus_size); -#endif - else - addr = ioremap(mem->bus.offset, bus_size); - if (!addr) { - ttm_mem_io_free(bdev, mem); - return -ENOMEM; + /* Don't move nonexistent data. Clear destination instead. */ + if (src_ops->maps_tt && (!ttm || !ttm_tt_is_populated(ttm))) { + if (ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)) + return; + + for (i = 0; i < num_pages; ++i) { + dst_ops->map_local(dst_iter, &dst_map, i); + if (dst_map.is_iomem) + memset_io(dst_map.vaddr_iomem, 0, PAGE_SIZE); + else + memset(dst_map.vaddr, 0, PAGE_SIZE); + if (dst_ops->unmap_local) + dst_ops->unmap_local(dst_iter, &dst_map); } + return; } - *virtual = addr; - return 0; -} - -static void ttm_resource_iounmap(struct ttm_device *bdev, - struct ttm_resource *mem, - void *virtual) -{ - if (virtual && mem->bus.addr == NULL) - iounmap(virtual); - ttm_mem_io_free(bdev, mem); -} - -static int ttm_copy_io_page(void *dst, void *src, unsigned long page) -{ - uint32_t *dstP = - (uint32_t *) ((unsigned long)dst + (page << PAGE_SHIFT)); - uint32_t *srcP = - (uint32_t *) ((unsigned long)src + (page << PAGE_SHIFT)); - - int i; - for (i = 0; i < PAGE_SIZE / sizeof(uint32_t); ++i) - iowrite32(ioread32(srcP++), dstP++); - return 0; -} - -static int ttm_copy_io_ttm_page(struct ttm_tt *ttm, void *src, - unsigned long page, - pgprot_t prot) -{ - struct page *d = ttm->pages[page]; - void *dst; - - if (!d) - return -ENOMEM; - - src = (void *)((unsigned long)src + (page << PAGE_SHIFT)); - dst = kmap_atomic_prot(d, prot); - if (!dst) - return -ENOMEM; - - memcpy_fromio(dst, src, PAGE_SIZE); - - kunmap_atomic(dst); - - return 0; -} - -static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void *dst, - unsigned long page, - pgprot_t prot) -{ - struct page *s = ttm->pages[page]; - void *src; - - if (!s) - return -ENOMEM; - - dst = (void *)((unsigned long)dst + (page << PAGE_SHIFT)); - src = kmap_atomic_prot(s, prot); - if (!src) - return -ENOMEM; - memcpy_toio(dst, src, PAGE_SIZE); - - kunmap_atomic(src); + for (i = 0; i < num_pages; ++i) { + dst_ops->map_local(dst_iter, &dst_map, i); + src_ops->map_local(src_iter, &src_map, i); + + if (!src_map.is_iomem && !dst_map.is_iomem) { + memcpy(dst_map.vaddr, src_map.vaddr, PAGE_SIZE); + } else if (!src_map.is_iomem) { + dma_buf_map_memcpy_to(&dst_map, src_map.vaddr, + PAGE_SIZE); + } else if (!dst_map.is_iomem) { + memcpy_fromio(dst_map.vaddr, src_map.vaddr_iomem, + PAGE_SIZE); + } else { + int j; + u32 __iomem *src = src_map.vaddr_iomem; + u32 __iomem *dst = dst_map.vaddr_iomem; - return 0; + for (j = 0; j < (PAGE_SIZE / sizeof(u32)); ++j) + iowrite32(ioread32(src++), dst++); + } + if (src_ops->unmap_local) + src_ops->unmap_local(src_iter, &src_map); + if (dst_ops->unmap_local) + dst_ops->unmap_local(dst_iter, &dst_map); + } } +EXPORT_SYMBOL(ttm_move_memcpy); int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, - struct ttm_resource *new_mem) + struct ttm_resource *dst_mem) { - struct ttm_resource *old_mem = bo->resource; struct ttm_device *bdev = bo->bdev; - struct ttm_resource_manager *man; + struct ttm_resource_manager *dst_man = + ttm_manager_type(bo->bdev, dst_mem->mem_type); struct ttm_tt *ttm = bo->ttm; - void *old_iomap; - void *new_iomap; - int ret; - unsigned long i; - - man = ttm_manager_type(bdev, new_mem->mem_type); - - ret = ttm_bo_wait_ctx(bo, ctx); - if (ret) - return ret; - - ret = ttm_resource_ioremap(bdev, old_mem, &old_iomap); - if (ret) - return ret; - ret = ttm_resource_ioremap(bdev, new_mem, &new_iomap); - if (ret) - goto out; - - /* - * Single TTM move. NOP. - */ - if (old_iomap == NULL && new_iomap == NULL) - goto out1; - - /* - * Don't move nonexistent data. Clear destination instead. - */ - if (old_iomap == NULL && - (ttm == NULL || (!ttm_tt_is_populated(ttm) && - !(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)))) { - memset_io(new_iomap, 0, new_mem->num_pages*PAGE_SIZE); - goto out1; - } + struct ttm_resource *src_mem = bo->resource; + struct ttm_resource_manager *src_man = + ttm_manager_type(bdev, src_mem->mem_type); + struct ttm_resource src_copy = *src_mem; + union { + struct ttm_kmap_iter_tt tt; + struct ttm_kmap_iter_linear_io io; + } _dst_iter, _src_iter; + struct ttm_kmap_iter *dst_iter, *src_iter; + int ret = 0; - /* - * TTM might be null for moves within the same region. - */ - if (ttm) { + if (ttm && ((ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) || + dst_man->use_tt)) { ret = ttm_tt_populate(bdev, ttm, ctx); if (ret) - goto out1; + return ret; } - for (i = 0; i < new_mem->num_pages; ++i) { - if (old_iomap == NULL) { - pgprot_t prot = ttm_io_prot(bo, old_mem, PAGE_KERNEL); - ret = ttm_copy_ttm_io_page(ttm, new_iomap, i, - prot); - } else if (new_iomap == NULL) { - pgprot_t prot = ttm_io_prot(bo, new_mem, PAGE_KERNEL); - ret = ttm_copy_io_ttm_page(ttm, old_iomap, i, - prot); - } else { - ret = ttm_copy_io_page(new_iomap, old_iomap, i); - } - if (ret) - break; - } - mb(); -out1: - ttm_resource_iounmap(bdev, new_mem, new_iomap); -out: - ttm_resource_iounmap(bdev, old_mem, old_iomap); - - if (ret) { - ttm_resource_free(bo, &new_mem); - return ret; + dst_iter = ttm_kmap_iter_linear_io_init(&_dst_iter.io, bdev, dst_mem); + if (PTR_ERR(dst_iter) == -EINVAL && dst_man->use_tt) + dst_iter = ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm); + if (IS_ERR(dst_iter)) + return PTR_ERR(dst_iter); + + src_iter = ttm_kmap_iter_linear_io_init(&_src_iter.io, bdev, src_mem); + if (PTR_ERR(src_iter) == -EINVAL && src_man->use_tt) + src_iter = ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm); + if (IS_ERR(src_iter)) { + ret = PTR_ERR(src_iter); + goto out_src_iter; } - ttm_resource_free(bo, &bo->resource); - ttm_bo_assign_mem(bo, new_mem); + ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); + src_copy = *src_mem; + ttm_bo_move_sync_cleanup(bo, dst_mem); - if (!man->use_tt) - ttm_bo_tt_destroy(bo); + if (!src_iter->ops->maps_tt) + ttm_kmap_iter_linear_io_fini(&_src_iter.io, bdev, &src_copy); +out_src_iter: + if (!dst_iter->ops->maps_tt) + ttm_kmap_iter_linear_io_fini(&_dst_iter.io, bdev, dst_mem); return ret; } @@ -335,27 +272,7 @@ pgprot_t ttm_io_prot(struct ttm_buffer_object *bo, struct ttm_resource *res, man = ttm_manager_type(bo->bdev, res->mem_type); caching = man->use_tt ? bo->ttm->caching : res->bus.caching; - /* Cached mappings need no adjustment */ - if (caching == ttm_cached) - return tmp; - -#if defined(__i386__) || defined(__x86_64__) - if (caching == ttm_write_combined) - tmp = pgprot_writecombine(tmp); - else if (boot_cpu_data.x86 > 3) - tmp = pgprot_noncached(tmp); -#endif -#if defined(__ia64__) || defined(__arm__) || defined(__aarch64__) || \ - defined(__powerpc__) || defined(__mips__) - if (caching == ttm_write_combined) - tmp = pgprot_writecombine(tmp); - else - tmp = pgprot_noncached(tmp); -#endif -#if defined(__sparc__) - tmp = pgprot_noncached(tmp); -#endif - return tmp; + return ttm_prot_from_caching(caching, tmp); } EXPORT_SYMBOL(ttm_io_prot); diff --git a/drivers/gpu/drm/ttm/ttm_module.c b/drivers/gpu/drm/ttm/ttm_module.c index 56b0efdba1a9..997c458f68a9 100644 --- a/drivers/gpu/drm/ttm/ttm_module.c +++ b/drivers/gpu/drm/ttm/ttm_module.c @@ -31,12 +31,47 @@ */ #include #include +#include #include #include #include +#include #include "ttm_module.h" +/** + * ttm_prot_from_caching - Modify the page protection according to the + * ttm cacing mode + * @caching: The ttm caching mode + * @tmp: The original page protection + * + * Return: The modified page protection + */ +pgprot_t ttm_prot_from_caching(enum ttm_caching caching, pgprot_t tmp) +{ + /* Cached mappings need no adjustment */ + if (caching == ttm_cached) + return tmp; + +#if defined(__i386__) || defined(__x86_64__) + if (caching == ttm_write_combined) + tmp = pgprot_writecombine(tmp); + else if (boot_cpu_data.x86 > 3) + tmp = pgprot_noncached(tmp); +#endif +#if defined(__ia64__) || defined(__arm__) || defined(__aarch64__) || \ + defined(__powerpc__) || defined(__mips__) + if (caching == ttm_write_combined) + tmp = pgprot_writecombine(tmp); + else + tmp = pgprot_noncached(tmp); +#endif +#if defined(__sparc__) + tmp = pgprot_noncached(tmp); +#endif + return tmp; +} + struct dentry *ttm_debugfs_root; static int __init ttm_init(void) diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index 2a68145572cc..2431717376e7 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -22,6 +22,10 @@ * Authors: Christian König */ +#include +#include +#include + #include #include @@ -154,3 +158,192 @@ void ttm_resource_manager_debug(struct ttm_resource_manager *man, man->func->debug(man, p); } EXPORT_SYMBOL(ttm_resource_manager_debug); + +static void ttm_kmap_iter_iomap_map_local(struct ttm_kmap_iter *iter, + struct dma_buf_map *dmap, + pgoff_t i) +{ + struct ttm_kmap_iter_iomap *iter_io = + container_of(iter, typeof(*iter_io), base); + void __iomem *addr; + +retry: + while (i >= iter_io->cache.end) { + iter_io->cache.sg = iter_io->cache.sg ? + sg_next(iter_io->cache.sg) : iter_io->st->sgl; + iter_io->cache.i = iter_io->cache.end; + iter_io->cache.end += sg_dma_len(iter_io->cache.sg) >> + PAGE_SHIFT; + iter_io->cache.offs = sg_dma_address(iter_io->cache.sg) - + iter_io->start; + } + + if (i < iter_io->cache.i) { + iter_io->cache.end = 0; + iter_io->cache.sg = NULL; + goto retry; + } + + addr = io_mapping_map_local_wc(iter_io->iomap, iter_io->cache.offs + + (((resource_size_t)i - iter_io->cache.i) + << PAGE_SHIFT)); + dma_buf_map_set_vaddr_iomem(dmap, addr); +} + +static void ttm_kmap_iter_iomap_unmap_local(struct ttm_kmap_iter *iter, + struct dma_buf_map *map) +{ + io_mapping_unmap_local(map->vaddr_iomem); +} + +static const struct ttm_kmap_iter_ops ttm_kmap_iter_io_ops = { + .map_local = ttm_kmap_iter_iomap_map_local, + .unmap_local = ttm_kmap_iter_iomap_unmap_local, + .maps_tt = false, +}; + +/** + * ttm_kmap_iter_iomap_init - Initialize a struct ttm_kmap_iter_iomap + * @iter_io: The struct ttm_kmap_iter_iomap to initialize. + * @iomap: The struct io_mapping representing the underlying linear io_memory. + * @st: sg_table into @iomap, representing the memory of the struct + * ttm_resource. + * @start: Offset that needs to be subtracted from @st to make + * sg_dma_address(st->sgl) - @start == 0 for @iomap start. + * + * Return: Pointer to the embedded struct ttm_kmap_iter. + */ +struct ttm_kmap_iter * +ttm_kmap_iter_iomap_init(struct ttm_kmap_iter_iomap *iter_io, + struct io_mapping *iomap, + struct sg_table *st, + resource_size_t start) +{ + iter_io->base.ops = &ttm_kmap_iter_io_ops; + iter_io->iomap = iomap; + iter_io->st = st; + iter_io->start = start; + memset(&iter_io->cache, 0, sizeof(iter_io->cache)); + + return &iter_io->base; +} +EXPORT_SYMBOL(ttm_kmap_iter_iomap_init); + +/** + * DOC: Linear io iterator + * + * This code should die in the not too near future. Best would be if we could + * make io-mapping use memremap for all io memory, and have memremap + * implement a kmap_local functionality. We could then strip a huge amount of + * code. These linear io iterators are implemented to mimic old functionality, + * and they don't use kmap_local semantics at all internally. Rather ioremap or + * friends, and at least on 32-bit they add global TLB flushes and points + * of failure. + */ + +static void ttm_kmap_iter_linear_io_map_local(struct ttm_kmap_iter *iter, + struct dma_buf_map *dmap, + pgoff_t i) +{ + struct ttm_kmap_iter_linear_io *iter_io = + container_of(iter, typeof(*iter_io), base); + + *dmap = iter_io->dmap; + dma_buf_map_incr(dmap, i * PAGE_SIZE); +} + +static const struct ttm_kmap_iter_ops ttm_kmap_iter_linear_io_ops = { + .map_local = ttm_kmap_iter_linear_io_map_local, + .maps_tt = false, +}; + +/** + * ttm_kmap_iter_linear_io_init - Initialize an iterator for linear io memory + * @iter_io: The iterator to initialize + * @bdev: The TTM device + * @mem: The ttm resource representing the iomap. + * + * This function is for internal TTM use only. It sets up a memcpy kmap iterator + * pointing at a linear chunk of io memory. + * + * Return: A pointer to the embedded struct ttm_kmap_iter or error pointer on + * failure. + */ +struct ttm_kmap_iter * +ttm_kmap_iter_linear_io_init(struct ttm_kmap_iter_linear_io *iter_io, + struct ttm_device *bdev, + struct ttm_resource *mem) +{ + int ret; + + ret = ttm_mem_io_reserve(bdev, mem); + if (ret) + goto out_err; + if (!mem->bus.is_iomem) { + ret = -EINVAL; + goto out_io_free; + } + + if (mem->bus.addr) { + dma_buf_map_set_vaddr(&iter_io->dmap, mem->bus.addr); + iter_io->needs_unmap = false; + } else { + size_t bus_size = (size_t)mem->num_pages << PAGE_SHIFT; + + iter_io->needs_unmap = true; + memset(&iter_io->dmap, 0, sizeof(iter_io->dmap)); + if (mem->bus.caching == ttm_write_combined) + dma_buf_map_set_vaddr_iomem(&iter_io->dmap, + ioremap_wc(mem->bus.offset, + bus_size)); + else if (mem->bus.caching == ttm_cached) + dma_buf_map_set_vaddr(&iter_io->dmap, + memremap(mem->bus.offset, bus_size, + MEMREMAP_WB | + MEMREMAP_WT | + MEMREMAP_WC)); + + /* If uncached requested or if mapping cached or wc failed */ + if (dma_buf_map_is_null(&iter_io->dmap)) + dma_buf_map_set_vaddr_iomem(&iter_io->dmap, + ioremap(mem->bus.offset, + bus_size)); + + if (dma_buf_map_is_null(&iter_io->dmap)) { + ret = -ENOMEM; + goto out_io_free; + } + } + + iter_io->base.ops = &ttm_kmap_iter_linear_io_ops; + return &iter_io->base; + +out_io_free: + ttm_mem_io_free(bdev, mem); +out_err: + return ERR_PTR(ret); +} + +/** + * ttm_kmap_iter_linear_io_fini - Clean up an iterator for linear io memory + * @iter_io: The iterator to initialize + * @bdev: The TTM device + * @mem: The ttm resource representing the iomap. + * + * This function is for internal TTM use only. It cleans up a memcpy kmap + * iterator initialized by ttm_kmap_iter_linear_io_init. + */ +void +ttm_kmap_iter_linear_io_fini(struct ttm_kmap_iter_linear_io *iter_io, + struct ttm_device *bdev, + struct ttm_resource *mem) +{ + if (iter_io->needs_unmap && dma_buf_map_is_set(&iter_io->dmap)) { + if (iter_io->dmap.is_iomem) + iounmap(iter_io->dmap.vaddr_iomem); + else + memunmap(iter_io->dmap.vaddr); + } + + ttm_mem_io_free(bdev, mem); +} diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 682c840c9a51..24031a8acd2d 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -433,3 +433,48 @@ void ttm_tt_mgr_init(unsigned long num_pages, unsigned long num_dma32_pages) if (!ttm_dma32_pages_limit) ttm_dma32_pages_limit = num_dma32_pages; } + +static void ttm_kmap_iter_tt_map_local(struct ttm_kmap_iter *iter, + struct dma_buf_map *dmap, + pgoff_t i) +{ + struct ttm_kmap_iter_tt *iter_tt = + container_of(iter, typeof(*iter_tt), base); + + dma_buf_map_set_vaddr(dmap, kmap_local_page_prot(iter_tt->tt->pages[i], + iter_tt->prot)); +} + +static void ttm_kmap_iter_tt_unmap_local(struct ttm_kmap_iter *iter, + struct dma_buf_map *map) +{ + kunmap_local(map->vaddr); +} + +static const struct ttm_kmap_iter_ops ttm_kmap_iter_tt_ops = { + .map_local = ttm_kmap_iter_tt_map_local, + .unmap_local = ttm_kmap_iter_tt_unmap_local, + .maps_tt = true, +}; + +/** + * ttm_kmap_iter_tt_init - Initialize a struct ttm_kmap_iter_tt + * @iter_tt: The struct ttm_kmap_iter_tt to initialize. + * @tt: Struct ttm_tt holding page pointers of the struct ttm_resource. + * + * Return: Pointer to the embedded struct ttm_kmap_iter. + */ +struct ttm_kmap_iter * +ttm_kmap_iter_tt_init(struct ttm_kmap_iter_tt *iter_tt, + struct ttm_tt *tt) +{ + iter_tt->base.ops = &ttm_kmap_iter_tt_ops; + iter_tt->tt = tt; + if (tt) + iter_tt->prot = ttm_prot_from_caching(tt->caching, PAGE_KERNEL); + else + iter_tt->prot = PAGE_KERNEL; + + return &iter_tt->base; +} +EXPORT_SYMBOL(ttm_kmap_iter_tt_init); diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index b266971c1974..68d6069572aa 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -40,6 +40,7 @@ #include #include "ttm_bo_api.h" +#include "ttm_kmap_iter.h" #include "ttm_placement.h" #include "ttm_tt.h" #include "ttm_pool.h" @@ -270,6 +271,23 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, bool pipeline, struct ttm_resource *new_mem); +/** + * ttm_bo_move_accel_cleanup. + * + * @bo: A pointer to a struct ttm_buffer_object. + * @new_mem: struct ttm_resource indicating where to move. + * + * Special case of ttm_bo_move_accel_cleanup where the bo is guaranteed + * by the caller to be idle. Typically used after memcpy buffer moves. + */ +static inline void ttm_bo_move_sync_cleanup(struct ttm_buffer_object *bo, + struct ttm_resource *new_mem) +{ + int ret = ttm_bo_move_accel_cleanup(bo, NULL, true, false, new_mem); + + WARN_ON(ret); +} + /** * ttm_bo_pipeline_gutting. * @@ -304,4 +322,14 @@ int ttm_bo_tt_bind(struct ttm_buffer_object *bo, struct ttm_resource *mem); */ void ttm_bo_tt_destroy(struct ttm_buffer_object *bo); +void ttm_move_memcpy(struct ttm_buffer_object *bo, + u32 num_pages, + struct ttm_kmap_iter *dst_iter, + struct ttm_kmap_iter *src_iter); + +struct ttm_kmap_iter * +ttm_kmap_iter_iomap_init(struct ttm_kmap_iter_iomap *iter_io, + struct io_mapping *iomap, + struct sg_table *st, + resource_size_t start); #endif diff --git a/include/drm/ttm/ttm_caching.h b/include/drm/ttm/ttm_caching.h index a0b4a49fa432..3c9dd65f5aaf 100644 --- a/include/drm/ttm/ttm_caching.h +++ b/include/drm/ttm/ttm_caching.h @@ -33,4 +33,6 @@ enum ttm_caching { ttm_cached }; +pgprot_t ttm_prot_from_caching(enum ttm_caching caching, pgprot_t tmp); + #endif diff --git a/include/drm/ttm/ttm_kmap_iter.h b/include/drm/ttm/ttm_kmap_iter.h new file mode 100644 index 000000000000..8bb00fd39d6c --- /dev/null +++ b/include/drm/ttm/ttm_kmap_iter.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ +#ifndef __TTM_KMAP_ITER_H__ +#define __TTM_KMAP_ITER_H__ + +#include + +struct ttm_kmap_iter; +struct dma_buf_map; + +/** + * struct ttm_kmap_iter_ops - Ops structure for a struct + * ttm_kmap_iter. + * @maps_tt: Whether the iterator maps TT memory directly, as opposed + * mapping a TT through an aperture. Both these modes have + * struct ttm_resource_manager::use_tt set, but the latter typically + * returns is_iomem == true from ttm_mem_io_reserve. + */ +struct ttm_kmap_iter_ops { + /** + * kmap_local() - Map a PAGE_SIZE part of the resource using + * kmap_local semantics. + * @res_iter: Pointer to the struct ttm_kmap_iter representing + * the resource. + * @dmap: The struct dma_buf_map holding the virtual address after + * the operation. + * @i: The location within the resource to map. PAGE_SIZE granularity. + */ + void (*map_local)(struct ttm_kmap_iter *res_iter, + struct dma_buf_map *dmap, pgoff_t i); + /** + * unmap_local() - Unmap a PAGE_SIZE part of the resource previously + * mapped using kmap_local. + * @res_iter: Pointer to the struct ttm_kmap_iter representing + * the resource. + * @dmap: The struct dma_buf_map holding the virtual address after + * the operation. + */ + void (*unmap_local)(struct ttm_kmap_iter *res_iter, + struct dma_buf_map *dmap); + bool maps_tt; +}; + +/** + * struct ttm_kmap_iter - Iterator for kmap_local type operations on a + * resource. + * @ops: Pointer to the operations struct. + * + * This struct is intended to be embedded in a resource-specific specialization + * implementing operations for the resource. + * + * Nothing stops us from extending the operations to vmap, vmap_pfn etc, + * replacing some or parts of the ttm_bo_util. cpu-map functionality. + */ +struct ttm_kmap_iter { + const struct ttm_kmap_iter_ops *ops; +}; + +#endif /* __TTM_KMAP_ITER_H__ */ diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index 4abb95b9fd11..140b6b9a8bbe 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -27,9 +27,11 @@ #include #include +#include #include #include #include +#include #define TTM_MAX_BO_PRIORITY 4U @@ -38,6 +40,10 @@ struct ttm_resource_manager; struct ttm_resource; struct ttm_place; struct ttm_buffer_object; +struct dma_buf_map; +struct io_mapping; +struct sg_table; +struct scatterlist; struct ttm_resource_manager_func { /** @@ -167,6 +173,45 @@ struct ttm_resource { struct ttm_bus_placement bus; }; +/** + * struct ttm_kmap_iter_iomap - Specialization for a struct io_mapping + + * struct sg_table backed struct ttm_resource. + * @base: Embedded struct ttm_kmap_iter providing the usage interface. + * @iomap: struct io_mapping representing the underlying linear io_memory. + * @st: sg_table into @iomap, representing the memory of the struct ttm_resource. + * @start: Offset that needs to be subtracted from @st to make + * sg_dma_address(st->sgl) - @start == 0 for @iomap start. + * @cache: Scatterlist traversal cache for fast lookups. + * @cache.sg: Pointer to the currently cached scatterlist segment. + * @cache.i: First index of @sg. PAGE_SIZE granularity. + * @cache.end: Last index + 1 of @sg. PAGE_SIZE granularity. + * @cache.offs: First offset into @iomap of @sg. PAGE_SIZE granularity. + */ +struct ttm_kmap_iter_iomap { + struct ttm_kmap_iter base; + struct io_mapping *iomap; + struct sg_table *st; + resource_size_t start; + struct { + struct scatterlist *sg; + pgoff_t i; + pgoff_t end; + pgoff_t offs; + } cache; +}; + +/** + * struct ttm_kmap_iter_linear_io - Iterator specialization for linear io + * @base: The base iterator + * @dmap: Points to the starting address of the region + * @needs_unmap: Whether we need to unmap on fini + */ +struct ttm_kmap_iter_linear_io { + struct ttm_kmap_iter base; + struct dma_buf_map dmap; + bool needs_unmap; +}; + /** * ttm_resource_manager_set_used * @@ -231,4 +276,20 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, void ttm_resource_manager_debug(struct ttm_resource_manager *man, struct drm_printer *p); +struct ttm_kmap_iter * +ttm_kmap_iter_iomap_init(struct ttm_kmap_iter_iomap *iter_io, + struct io_mapping *iomap, + struct sg_table *st, + resource_size_t start); + +struct ttm_kmap_iter_linear_io; + +struct ttm_kmap_iter * +ttm_kmap_iter_linear_io_init(struct ttm_kmap_iter_linear_io *iter_io, + struct ttm_device *bdev, + struct ttm_resource *mem); + +void ttm_kmap_iter_linear_io_fini(struct ttm_kmap_iter_linear_io *iter_io, + struct ttm_device *bdev, + struct ttm_resource *mem); #endif diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h index 134d09ef7766..3102059db726 100644 --- a/include/drm/ttm/ttm_tt.h +++ b/include/drm/ttm/ttm_tt.h @@ -29,6 +29,7 @@ #include #include +#include struct ttm_bo_device; struct ttm_tt; @@ -69,6 +70,18 @@ struct ttm_tt { enum ttm_caching caching; }; +/** + * struct ttm_kmap_iter_tt - Specialization of a mappig iterator for a tt. + * @base: Embedded struct ttm_kmap_iter providing the usage interface + * @tt: Cached struct ttm_tt. + * @prot: Cached page protection for mapping. + */ +struct ttm_kmap_iter_tt { + struct ttm_kmap_iter base; + struct ttm_tt *tt; + pgprot_t prot; +}; + static inline bool ttm_tt_is_populated(struct ttm_tt *tt) { return tt->page_flags & TTM_PAGE_FLAG_PRIV_POPULATED; @@ -159,6 +172,9 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm); void ttm_tt_mgr_init(unsigned long num_pages, unsigned long num_dma32_pages); +struct ttm_kmap_iter *ttm_kmap_iter_tt_init(struct ttm_kmap_iter_tt *iter_tt, + struct ttm_tt *tt); + #if IS_ENABLED(CONFIG_AGP) #include -- cgit v1.2.3 From b7e32bef4ae5f9149276203564b7911fac466588 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 2 Jun 2021 10:38:11 +0200 Subject: drm: Add a prefetching memcpy_from_wc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reading out of write-combining mapped memory is typically very slow since the CPU doesn't prefetch. However some archs have special instructions to do this. So add a best-effort memcpy_from_wc taking dma-buf-map pointer arguments that attempts to use a fast prefetching memcpy and otherwise falls back to ordinary memcopies, taking the iomem tagging into account. The code is largely copied from i915_memcpy_from_wc. Cc: Daniel Vetter Cc: Christian König Suggested-by: Daniel Vetter Signed-off-by: Thomas Hellström Acked-by: Christian König Acked-by: Daniel Vetter Link: https://lore.kernel.org/r/20210602083818.241793-5-thomas.hellstrom@linux.intel.com Link: https://patchwork.freedesktop.org/patch/msgid/20210602083818.241793-5-thomas.hellstrom@linux.intel.com --- Documentation/gpu/drm-mm.rst | 4 +- drivers/gpu/drm/drm_cache.c | 148 +++++++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/drm_drv.c | 2 + include/drm/drm_cache.h | 7 ++ 4 files changed, 159 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst index 21be6deadc12..d5a73fa2c9ef 100644 --- a/Documentation/gpu/drm-mm.rst +++ b/Documentation/gpu/drm-mm.rst @@ -469,8 +469,8 @@ DRM MM Range Allocator Function References .. kernel-doc:: drivers/gpu/drm/drm_mm.c :export: -DRM Cache Handling -================== +DRM Cache Handling and Fast WC memcpy() +======================================= .. kernel-doc:: drivers/gpu/drm/drm_cache.c :export: diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c index 79a50ef1250f..546599f19a93 100644 --- a/drivers/gpu/drm/drm_cache.c +++ b/drivers/gpu/drm/drm_cache.c @@ -28,6 +28,7 @@ * Authors: Thomas Hellström */ +#include #include #include #include @@ -35,6 +36,9 @@ #include +/* A small bounce buffer that fits on the stack. */ +#define MEMCPY_BOUNCE_SIZE 128 + #if defined(CONFIG_X86) #include @@ -209,3 +213,147 @@ bool drm_need_swiotlb(int dma_bits) return max_iomem > ((u64)1 << dma_bits); } EXPORT_SYMBOL(drm_need_swiotlb); + +static void memcpy_fallback(struct dma_buf_map *dst, + const struct dma_buf_map *src, + unsigned long len) +{ + if (!dst->is_iomem && !src->is_iomem) { + memcpy(dst->vaddr, src->vaddr, len); + } else if (!src->is_iomem) { + dma_buf_map_memcpy_to(dst, src->vaddr, len); + } else if (!dst->is_iomem) { + memcpy_fromio(dst->vaddr, src->vaddr_iomem, len); + } else { + /* + * Bounce size is not performance tuned, but using a + * bounce buffer like this is significantly faster than + * resorting to ioreadxx() + iowritexx(). + */ + char bounce[MEMCPY_BOUNCE_SIZE]; + void __iomem *_src = src->vaddr_iomem; + void __iomem *_dst = dst->vaddr_iomem; + + while (len >= MEMCPY_BOUNCE_SIZE) { + memcpy_fromio(bounce, _src, MEMCPY_BOUNCE_SIZE); + memcpy_toio(_dst, bounce, MEMCPY_BOUNCE_SIZE); + _src += MEMCPY_BOUNCE_SIZE; + _dst += MEMCPY_BOUNCE_SIZE; + len -= MEMCPY_BOUNCE_SIZE; + } + if (len) { + memcpy_fromio(bounce, _src, MEMCPY_BOUNCE_SIZE); + memcpy_toio(_dst, bounce, MEMCPY_BOUNCE_SIZE); + } + } +} + +#ifdef CONFIG_X86 + +static DEFINE_STATIC_KEY_FALSE(has_movntdqa); + +static void __memcpy_ntdqa(void *dst, const void *src, unsigned long len) +{ + kernel_fpu_begin(); + + while (len >= 4) { + asm("movntdqa (%0), %%xmm0\n" + "movntdqa 16(%0), %%xmm1\n" + "movntdqa 32(%0), %%xmm2\n" + "movntdqa 48(%0), %%xmm3\n" + "movaps %%xmm0, (%1)\n" + "movaps %%xmm1, 16(%1)\n" + "movaps %%xmm2, 32(%1)\n" + "movaps %%xmm3, 48(%1)\n" + :: "r" (src), "r" (dst) : "memory"); + src += 64; + dst += 64; + len -= 4; + } + while (len--) { + asm("movntdqa (%0), %%xmm0\n" + "movaps %%xmm0, (%1)\n" + :: "r" (src), "r" (dst) : "memory"); + src += 16; + dst += 16; + } + + kernel_fpu_end(); +} + +/* + * __drm_memcpy_from_wc copies @len bytes from @src to @dst using + * non-temporal instructions where available. Note that all arguments + * (@src, @dst) must be aligned to 16 bytes and @len must be a multiple + * of 16. + */ +static void __drm_memcpy_from_wc(void *dst, const void *src, unsigned long len) +{ + if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15)) + memcpy(dst, src, len); + else if (likely(len)) + __memcpy_ntdqa(dst, src, len >> 4); +} + +/** + * drm_memcpy_from_wc - Perform the fastest available memcpy from a source + * that may be WC. + * @dst: The destination pointer + * @src: The source pointer + * @len: The size of the area o transfer in bytes + * + * Tries an arch optimized memcpy for prefetching reading out of a WC region, + * and if no such beast is available, falls back to a normal memcpy. + */ +void drm_memcpy_from_wc(struct dma_buf_map *dst, + const struct dma_buf_map *src, + unsigned long len) +{ + if (WARN_ON(in_interrupt())) { + memcpy_fallback(dst, src, len); + return; + } + + if (static_branch_likely(&has_movntdqa)) { + __drm_memcpy_from_wc(dst->is_iomem ? + (void __force *)dst->vaddr_iomem : + dst->vaddr, + src->is_iomem ? + (void const __force *)src->vaddr_iomem : + src->vaddr, + len); + return; + } + + memcpy_fallback(dst, src, len); +} +EXPORT_SYMBOL(drm_memcpy_from_wc); + +/* + * drm_memcpy_init_early - One time initialization of the WC memcpy code + */ +void drm_memcpy_init_early(void) +{ + /* + * Some hypervisors (e.g. KVM) don't support VEX-prefix instructions + * emulation. So don't enable movntdqa in hypervisor guest. + */ + if (static_cpu_has(X86_FEATURE_XMM4_1) && + !boot_cpu_has(X86_FEATURE_HYPERVISOR)) + static_branch_enable(&has_movntdqa); +} +#else +void drm_memcpy_from_wc(struct dma_buf_map *dst, + const struct dma_buf_map *src, + unsigned long len) +{ + WARN_ON(in_interrupt()); + + memcpy_fallback(dst, src, len); +} +EXPORT_SYMBOL(drm_memcpy_from_wc); + +void drm_memcpy_init_early(void) +{ +} +#endif /* CONFIG_X86 */ diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 3d8d68a98b95..8804ec7d3215 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -1041,6 +1042,7 @@ static int __init drm_core_init(void) drm_connector_ida_init(); idr_init(&drm_minors_idr); + drm_memcpy_init_early(); ret = drm_sysfs_init(); if (ret < 0) { diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h index e9ad4863d915..cc9de1632dd3 100644 --- a/include/drm/drm_cache.h +++ b/include/drm/drm_cache.h @@ -35,6 +35,8 @@ #include +struct dma_buf_map; + void drm_clflush_pages(struct page *pages[], unsigned long num_pages); void drm_clflush_sg(struct sg_table *st); void drm_clflush_virt_range(void *addr, unsigned long length); @@ -70,4 +72,9 @@ static inline bool drm_arch_can_wc_memory(void) #endif } +void drm_memcpy_init_early(void); + +void drm_memcpy_from_wc(struct dma_buf_map *dst, + const struct dma_buf_map *src, + unsigned long len); #endif -- cgit v1.2.3 From a3be8cd70fec2aa8913b59c9026031205c29df28 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 2 Jun 2021 10:38:13 +0200 Subject: drm/ttm: Document and optimize ttm_bo_pipeline_gutting() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the bo is idle when calling ttm_bo_pipeline_gutting(), we unnecessarily create a ghost object and push it out to delayed destroy. Fix this by adding a path for idle, and document the function. Also avoid having the bo end up in a bad state vulnerable to user-space triggered kernel BUGs if the call to ttm_tt_create() fails. Finally reuse ttm_bo_pipeline_gutting() in ttm_bo_evict(). Cc: Christian König Signed-off-by: Thomas Hellström Reviewed-by: Christian König Link: https://lore.kernel.org/r/20210602083818.241793-7-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/ttm/ttm_bo.c | 20 +++++++-------- drivers/gpu/drm/ttm/ttm_bo_util.c | 54 +++++++++++++++++++++++++++++++++++---- include/drm/ttm/ttm_tt.h | 13 ++++++++++ 3 files changed, 72 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 7e7284da5630..c0ca28edd869 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -503,10 +503,15 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bdev->funcs->evict_flags(bo, &placement); if (!placement.num_placement && !placement.num_busy_placement) { - ttm_bo_wait(bo, false, false); + ret = ttm_bo_wait(bo, true, false); + if (ret) + return ret; - ttm_bo_cleanup_memtype_use(bo); - return ttm_tt_create(bo, false); + /* + * Since we've already synced, this frees backing store + * immediately. + */ + return ttm_bo_pipeline_gutting(bo); } ret = ttm_bo_mem_space(bo, &placement, &evict_mem, ctx); @@ -947,13 +952,8 @@ int ttm_bo_validate(struct ttm_buffer_object *bo, /* * Remove the backing store if no placement is given. */ - if (!placement->num_placement && !placement->num_busy_placement) { - ret = ttm_bo_pipeline_gutting(bo); - if (ret) - return ret; - - return ttm_tt_create(bo, false); - } + if (!placement->num_placement && !placement->num_busy_placement) + return ttm_bo_pipeline_gutting(bo); /* * Check whether we need to move buffer. diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 3161adb5903c..1d25994411a0 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -566,26 +566,70 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, } EXPORT_SYMBOL(ttm_bo_move_accel_cleanup); +/** + * ttm_bo_pipeline_gutting - purge the contents of a bo + * @bo: The buffer object + * + * Purge the contents of a bo, async if the bo is not idle. + * After a successful call, the bo is left unpopulated in + * system placement. The function may wait uninterruptible + * for idle on OOM. + * + * Return: 0 if successful, negative error code on failure. + */ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo) { static const struct ttm_place sys_mem = { .mem_type = TTM_PL_SYSTEM }; struct ttm_buffer_object *ghost; + struct ttm_tt *ttm; int ret; - ret = ttm_buffer_object_transfer(bo, &ghost); + /* If already idle, no need for ghost object dance. */ + ret = ttm_bo_wait(bo, false, true); + if (ret != -EBUSY) { + if (!bo->ttm) { + /* See comment below about clearing. */ + ret = ttm_tt_create(bo, true); + if (ret) + return ret; + } else { + ttm_tt_unpopulate(bo->bdev, bo->ttm); + if (bo->type == ttm_bo_type_device) + ttm_tt_mark_for_clear(bo->ttm); + } + ttm_resource_free(bo, &bo->resource); + return ttm_resource_alloc(bo, &sys_mem, &bo->resource); + } + + /* + * We need an unpopulated ttm_tt after giving our current one, + * if any, to the ghost object. And we can't afford to fail + * creating one *after* the operation. If the bo subsequently gets + * resurrected, make sure it's cleared (if ttm_bo_type_device) + * to avoid leaking sensitive information to user-space. + */ + + ttm = bo->ttm; + bo->ttm = NULL; + ret = ttm_tt_create(bo, true); + swap(bo->ttm, ttm); if (ret) return ret; + ret = ttm_buffer_object_transfer(bo, &ghost); + if (ret) { + ttm_tt_destroy(bo->bdev, ttm); + return ret; + } + ret = dma_resv_copy_fences(&ghost->base._resv, bo->base.resv); /* Last resort, wait for the BO to be idle when we are OOM */ if (ret) ttm_bo_wait(bo, false, false); - ret = ttm_resource_alloc(bo, &sys_mem, &bo->resource); - bo->ttm = NULL; - dma_resv_unlock(&ghost->base._resv); ttm_bo_put(ghost); + bo->ttm = ttm; - return ret; + return ttm_resource_alloc(bo, &sys_mem, &bo->resource); } diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h index 3102059db726..818680c6a8ed 100644 --- a/include/drm/ttm/ttm_tt.h +++ b/include/drm/ttm/ttm_tt.h @@ -170,6 +170,19 @@ int ttm_tt_populate(struct ttm_device *bdev, struct ttm_tt *ttm, struct ttm_oper */ void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm); +/** + * ttm_tt_mark_for_clear - Mark pages for clearing on populate. + * + * @ttm: Pointer to the ttm_tt structure + * + * Marks pages for clearing so that the next time the page vector is + * populated, the pages will be cleared. + */ +static inline void ttm_tt_mark_for_clear(struct ttm_tt *ttm) +{ + ttm->page_flags |= TTM_PAGE_FLAG_ZERO_ALLOC; +} + void ttm_tt_mgr_init(unsigned long num_pages, unsigned long num_dma32_pages); struct ttm_kmap_iter *ttm_kmap_iter_tt_init(struct ttm_kmap_iter_tt *iter_tt, -- cgit v1.2.3