From a92e145059cb883155a24a2d3ac33296d33d9df7 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Wed, 23 Aug 2017 15:17:47 -0400 Subject: drm/ttm: Add DMA map/unmap tracepoint (v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also exports two functions that vendor drivers can call to trace DMA mappings. This is meant to help translate IOMMU mappings of bus addresses back to physical pages. Used by the umr amdgpu debugger for instance. Signed-off-by: Tom St Denis Reviewed-by: Christian König Signed-off-by: Alex Deucher (v2): Use dev_name() to get PCI path instead. (v3): Use correct types for dma/phys addresses --- include/drm/ttm/ttm_debug.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 include/drm/ttm/ttm_debug.h (limited to 'include') diff --git a/include/drm/ttm/ttm_debug.h b/include/drm/ttm/ttm_debug.h new file mode 100644 index 000000000000..b5e460fa5086 --- /dev/null +++ b/include/drm/ttm/ttm_debug.h @@ -0,0 +1,31 @@ +/************************************************************************** + * + * Copyright (c) 2017 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +/* + * Authors: Tom St Denis + */ +extern void ttm_trace_dma_map(struct device *dev, struct ttm_dma_tt *tt); +extern void ttm_trace_dma_unmap(struct device *dev, struct ttm_dma_tt *tt); -- cgit v1.2.3 From a4dec819c8bba6365eb893a4ca88db4dd1210110 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Fri, 18 Aug 2017 10:04:57 -0400 Subject: drm/ttm: Add helper functions to populate/map in one call (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These functions replace a section of common code found in radeon/amdgpu drivers (and possibly others) as part of the ttm_tt_*populate() callbacks. v2: squash in fix for sw iommu from Tom Signed-off-by: Tom St Denis Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/ttm/ttm_page_alloc.c | 41 ++++++++++++++++++++++++++++++++++++ include/drm/ttm/ttm_page_alloc.h | 21 ++++++++++++++++++ 2 files changed, 62 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index 871599826773..6a660d196d87 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -920,6 +920,47 @@ void ttm_pool_unpopulate(struct ttm_tt *ttm) } EXPORT_SYMBOL(ttm_pool_unpopulate); +int ttm_populate_and_map_pages(struct device *dev, struct ttm_dma_tt *tt) +{ + unsigned i; + int r; + + r = ttm_pool_populate(&tt->ttm); + if (r) + return r; + + for (i = 0; i < tt->ttm.num_pages; i++) { + tt->dma_address[i] = dma_map_page(dev, tt->ttm.pages[i], + 0, PAGE_SIZE, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, tt->dma_address[i])) { + while (i--) { + dma_unmap_page(dev, tt->dma_address[i], + PAGE_SIZE, DMA_BIDIRECTIONAL); + tt->dma_address[i] = 0; + } + ttm_pool_unpopulate(&tt->ttm); + return -EFAULT; + } + } + return 0; +} +EXPORT_SYMBOL(ttm_populate_and_map_pages); + +void ttm_unmap_and_unpopulate_pages(struct device *dev, struct ttm_dma_tt *tt) +{ + unsigned i; + + for (i = 0; i < tt->ttm.num_pages; i++) { + if (tt->dma_address[i]) { + dma_unmap_page(dev, tt->dma_address[i], + PAGE_SIZE, DMA_BIDIRECTIONAL); + } + } + ttm_pool_unpopulate(&tt->ttm); +} +EXPORT_SYMBOL(ttm_unmap_and_unpopulate_pages); + int ttm_page_alloc_debugfs(struct seq_file *m, void *data) { struct ttm_page_pool *p; diff --git a/include/drm/ttm/ttm_page_alloc.h b/include/drm/ttm/ttm_page_alloc.h index 49a828425fa2..bf21166f2b97 100644 --- a/include/drm/ttm/ttm_page_alloc.h +++ b/include/drm/ttm/ttm_page_alloc.h @@ -83,6 +83,17 @@ extern int ttm_dma_page_alloc_debugfs(struct seq_file *m, void *data); extern int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev); extern void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev); + +/** + * Populates and DMA maps pages to fullfil a ttm_dma_populate() request + */ +int ttm_populate_and_map_pages(struct device *dev, struct ttm_dma_tt *tt); + +/** + * Unpopulates and DMA unmaps pages as part of a + * ttm_dma_unpopulate() request */ +void ttm_unmap_and_unpopulate_pages(struct device *dev, struct ttm_dma_tt *tt); + #else static inline int ttm_dma_page_alloc_init(struct ttm_mem_global *glob, unsigned max_pages) @@ -105,6 +116,16 @@ static inline void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev) { } + +static inline int ttm_populate_and_map_pages(struct device *dev, struct ttm_dma_tt *tt) +{ + return -ENOMEM; +} + +static inline void ttm_unmap_and_unpopulate_pages(struct device *dev, struct ttm_dma_tt *tt) +{ +} + #endif #endif -- cgit v1.2.3 From 96bec198352799794b0f8937620e811ef8b9fa22 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Thu, 24 Aug 2017 06:46:39 -0400 Subject: drm/ttm: Remove needless 'extern' on functions in header. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Minor tidy up. Signed-off-by: Tom St Denis Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/drm/ttm/ttm_page_alloc.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_page_alloc.h b/include/drm/ttm/ttm_page_alloc.h index bf21166f2b97..38a2b4770c35 100644 --- a/include/drm/ttm/ttm_page_alloc.h +++ b/include/drm/ttm/ttm_page_alloc.h @@ -47,7 +47,7 @@ void ttm_page_alloc_fini(void); * * Add backing pages to all of @ttm */ -extern int ttm_pool_populate(struct ttm_tt *ttm); +int ttm_pool_populate(struct ttm_tt *ttm); /** * ttm_pool_unpopulate: @@ -56,12 +56,12 @@ extern int ttm_pool_populate(struct ttm_tt *ttm); * * Free all pages of @ttm */ -extern void ttm_pool_unpopulate(struct ttm_tt *ttm); +void ttm_pool_unpopulate(struct ttm_tt *ttm); /** * Output the state of pools to debugfs file */ -extern int ttm_page_alloc_debugfs(struct seq_file *m, void *data); +int ttm_page_alloc_debugfs(struct seq_file *m, void *data); #if defined(CONFIG_SWIOTLB) || defined(CONFIG_INTEL_IOMMU) @@ -78,10 +78,10 @@ void ttm_dma_page_alloc_fini(void); /** * Output the state of pools to debugfs file */ -extern int ttm_dma_page_alloc_debugfs(struct seq_file *m, void *data); +int ttm_dma_page_alloc_debugfs(struct seq_file *m, void *data); -extern int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev); -extern void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev); +int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev); +void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev); /** -- cgit v1.2.3 From e1eb899b45781b9bb77e6d7772d6e67bb0bc1a18 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 25 Aug 2017 09:14:43 +0200 Subject: drm/amdgpu: add IOCTL interface for per VM BOs v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the IOCTL interface so that applications can allocate per VM BOs. Still WIP since not all corner cases are tested yet, but this reduces average CS overhead for 10K BOs from 21ms down to 48us. v2: add some extra checks, remove the WIP tag v3: rename new flag to AMDGPU_GEM_CREATE_VM_ALWAYS_VALID Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 63 ++++++++++++++++++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 3 +- include/uapi/drm/amdgpu_drm.h | 2 + 5 files changed, 55 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 103635ab784c..5809f55e0d9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -455,9 +455,10 @@ struct amdgpu_sa_bo { */ void amdgpu_gem_force_release(struct amdgpu_device *adev); int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, - int alignment, u32 initial_domain, - u64 flags, bool kernel, - struct drm_gem_object **obj); + int alignment, u32 initial_domain, + u64 flags, bool kernel, + struct reservation_object *resv, + struct drm_gem_object **obj); int amdgpu_mode_dumb_create(struct drm_file *file_priv, struct drm_device *dev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 9afa9c097e1f..b6cb276f0a70 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -149,7 +149,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | AMDGPU_GEM_CREATE_VRAM_CLEARED, - true, &gobj); + true, NULL, &gobj); if (ret) { pr_err("failed to allocate framebuffer (%d)\n", aligned_size); return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index e32a2b55b54f..f1e61b3df640 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj) } int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, - int alignment, u32 initial_domain, - u64 flags, bool kernel, - struct drm_gem_object **obj) + int alignment, u32 initial_domain, + u64 flags, bool kernel, + struct reservation_object *resv, + struct drm_gem_object **obj) { - struct amdgpu_bo *robj; + struct amdgpu_bo *bo; int r; *obj = NULL; @@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, retry: r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain, - flags, NULL, NULL, 0, &robj); + flags, NULL, resv, 0, &bo); if (r) { if (r != -ERESTARTSYS) { if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) { @@ -71,7 +72,7 @@ retry: } return r; } - *obj = &robj->gem_base; + *obj = &bo->gem_base; return 0; } @@ -119,6 +120,10 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, if (mm && mm != current->mm) return -EPERM; + if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID && + abo->tbo.resv != vm->root.base.bo->tbo.resv) + return -EPERM; + r = amdgpu_bo_reserve(abo, false); if (r) return r; @@ -142,13 +147,14 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj, struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_list_entry vm_pd; - struct list_head list; + struct list_head list, duplicates; struct ttm_validate_buffer tv; struct ww_acquire_ctx ticket; struct amdgpu_bo_va *bo_va; int r; INIT_LIST_HEAD(&list); + INIT_LIST_HEAD(&duplicates); tv.bo = &bo->tbo; tv.shared = true; @@ -156,7 +162,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj, amdgpu_vm_get_pd_bo(vm, &list, &vm_pd); - r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL); + r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates); if (r) { dev_err(adev->dev, "leaking bo va because " "we fail to reserve bo (%d)\n", r); @@ -191,9 +197,12 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; union drm_amdgpu_gem_create *args = data; uint64_t flags = args->in.domain_flags; uint64_t size = args->in.bo_size; + struct reservation_object *resv = NULL; struct drm_gem_object *gobj; uint32_t handle; int r; @@ -202,7 +211,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_NO_CPU_ACCESS | AMDGPU_GEM_CREATE_CPU_GTT_USWC | - AMDGPU_GEM_CREATE_VRAM_CLEARED)) + AMDGPU_GEM_CREATE_VRAM_CLEARED | + AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)) return -EINVAL; /* reject invalid gem domains */ @@ -229,9 +239,25 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, } size = roundup(size, PAGE_SIZE); + if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) { + r = amdgpu_bo_reserve(vm->root.base.bo, false); + if (r) + return r; + + resv = vm->root.base.bo->tbo.resv; + } + r = amdgpu_gem_object_create(adev, size, args->in.alignment, (u32)(0xffffffff & args->in.domains), - flags, false, &gobj); + flags, false, resv, &gobj); + if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) { + if (!r) { + struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj); + + abo->parent = amdgpu_bo_ref(vm->root.base.bo); + } + amdgpu_bo_unreserve(vm->root.base.bo); + } if (r) return r; @@ -273,9 +299,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, } /* create a gem object to contain this object in */ - r = amdgpu_gem_object_create(adev, args->size, 0, - AMDGPU_GEM_DOMAIN_CPU, 0, - 0, &gobj); + r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU, + 0, 0, NULL, &gobj); if (r) return r; @@ -527,7 +552,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct amdgpu_bo_list_entry vm_pd; struct ttm_validate_buffer tv; struct ww_acquire_ctx ticket; - struct list_head list; + struct list_head list, duplicates; uint64_t va_flags; int r = 0; @@ -563,6 +588,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, } INIT_LIST_HEAD(&list); + INIT_LIST_HEAD(&duplicates); if ((args->operation != AMDGPU_VA_OP_CLEAR) && !(args->flags & AMDGPU_VM_PAGE_PRT)) { gobj = drm_gem_object_lookup(filp, args->handle); @@ -579,7 +605,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd); - r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL); + r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates); if (r) goto error_unref; @@ -645,6 +671,7 @@ error_unref: int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { + struct amdgpu_device *adev = dev->dev_private; struct drm_amdgpu_gem_op *args = data; struct drm_gem_object *gobj; struct amdgpu_bo *robj; @@ -692,6 +719,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; + if (robj->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) + amdgpu_vm_bo_invalidate(adev, robj, true); + amdgpu_bo_unreserve(robj); break; default: @@ -721,8 +751,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, - ttm_bo_type_device, - &gobj); + false, NULL, &gobj); if (r) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index 5b3f92891f89..7e0826469b5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, { struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); - if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) || + bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) return ERR_PTR(-EPERM); return drm_gem_prime_export(dev, gobj, flags); diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 7b8fa11c2285..e055776f2f4c 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -87,6 +87,8 @@ extern "C" { #define AMDGPU_GEM_CREATE_SHADOW (1 << 4) /* Flag that allocating the BO should use linear VRAM */ #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS (1 << 5) +/* Flag that BO is always valid in this VM */ +#define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID (1 << 6) struct drm_amdgpu_gem_create_in { /** the requested memory size */ -- cgit v1.2.3