diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 668 |
1 files changed, 464 insertions, 204 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 3dc5ab2764ff..2170db83e41d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -32,12 +32,19 @@ #include "amdgpu_dma_buf.h" #include <uapi/linux/kfd_ioctl.h> #include "amdgpu_xgmi.h" +#include "kfd_smi_events.h" /* Userptr restore delay, just long enough to allow consecutive VM * changes to accumulate */ #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 +/* + * Align VRAM availability to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB + * BO chunk + */ +#define VRAM_AVAILABLITY_ALIGN (1 << 21) + /* Impose limit on how much memory KFD can use */ static struct { uint64_t max_system_mem_limit; @@ -108,21 +115,12 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size) * compromise that should work in most cases without reserving too * much memory for page tables unnecessarily (factor 16K, >> 14). */ -#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14) -static size_t amdgpu_amdkfd_acc_size(uint64_t size) -{ - size >>= PAGE_SHIFT; - size *= sizeof(dma_addr_t) + sizeof(void *); - - return __roundup_pow_of_two(sizeof(struct amdgpu_bo)) + - __roundup_pow_of_two(sizeof(struct ttm_tt)) + - PAGE_ALIGN(size); -} +#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM) /** * amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size - * of buffer including any reserved for control structures + * of buffer. * * @adev: Device to which allocated BO belongs to * @size: Size of buffer, in bytes, encapsulated by B0. This should be @@ -131,33 +129,32 @@ static size_t amdgpu_amdkfd_acc_size(uint64_t size) * * Return: returns -ENOMEM in case of error, ZERO otherwise */ -static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, +int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag) { uint64_t reserved_for_pt = ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); - size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed; + size_t system_mem_needed, ttm_mem_needed, vram_needed; int ret = 0; - acc_size = amdgpu_amdkfd_acc_size(size); - + system_mem_needed = 0; + ttm_mem_needed = 0; vram_needed = 0; if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { - system_mem_needed = acc_size + size; - ttm_mem_needed = acc_size + size; + system_mem_needed = size; + ttm_mem_needed = size; } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - system_mem_needed = acc_size; - ttm_mem_needed = acc_size; + /* + * Conservatively round up the allocation requirement to 2 MB + * to avoid fragmentation caused by 4K allocations in the tail + * 2M BO chunk. + */ vram_needed = size; } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { - system_mem_needed = acc_size + size; - ttm_mem_needed = acc_size; - } else if (alloc_flag & - (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | - KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { - system_mem_needed = acc_size; - ttm_mem_needed = acc_size; - } else { + system_mem_needed = size; + } else if (!(alloc_flag & + (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | + KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag); return -ENOMEM; } @@ -172,8 +169,10 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) || (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > kfd_mem_limit.max_ttm_mem_limit) || - (adev->kfd.vram_used + vram_needed > - adev->gmc.real_vram_size - reserved_for_pt)) { + (adev && adev->kfd.vram_used + vram_needed > + adev->gmc.real_vram_size - + atomic64_read(&adev->vram_pin_size) - + reserved_for_pt)) { ret = -ENOMEM; goto release; } @@ -181,7 +180,12 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, /* Update memory accounting by decreasing available system * memory, TTM memory and GPU memory as computed above */ - adev->kfd.vram_used += vram_needed; + WARN_ONCE(vram_needed && !adev, + "adev reference can't be null when vram is used"); + if (adev) { + adev->kfd.vram_used += vram_needed; + adev->kfd.vram_used_aligned += ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN); + } kfd_mem_limit.system_mem_used += system_mem_needed; kfd_mem_limit.ttm_mem_used += ttm_mem_needed; @@ -190,36 +194,30 @@ release: return ret; } -static void unreserve_mem_limit(struct amdgpu_device *adev, +void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag) { - size_t acc_size; - - acc_size = amdgpu_amdkfd_acc_size(size); - spin_lock(&kfd_mem_limit.mem_limit_lock); if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { - kfd_mem_limit.system_mem_used -= (acc_size + size); - kfd_mem_limit.ttm_mem_used -= (acc_size + size); + kfd_mem_limit.system_mem_used -= size; + kfd_mem_limit.ttm_mem_used -= size; } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - kfd_mem_limit.system_mem_used -= acc_size; - kfd_mem_limit.ttm_mem_used -= acc_size; - adev->kfd.vram_used -= size; + WARN_ONCE(!adev, + "adev reference can't be null when alloc mem flags vram is set"); + if (adev) { + adev->kfd.vram_used -= size; + adev->kfd.vram_used_aligned -= ALIGN(size, VRAM_AVAILABLITY_ALIGN); + } } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { - kfd_mem_limit.system_mem_used -= (acc_size + size); - kfd_mem_limit.ttm_mem_used -= acc_size; - } else if (alloc_flag & - (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | - KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { - kfd_mem_limit.system_mem_used -= acc_size; - kfd_mem_limit.ttm_mem_used -= acc_size; - } else { + kfd_mem_limit.system_mem_used -= size; + } else if (!(alloc_flag & + (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | + KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag); goto release; } - - WARN_ONCE(adev->kfd.vram_used < 0, + WARN_ONCE(adev && adev->kfd.vram_used < 0, "KFD VRAM memory accounting unbalanced"); WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0, "KFD TTM memory accounting unbalanced"); @@ -236,11 +234,47 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo) u32 alloc_flags = bo->kfd_bo->alloc_flags; u64 size = amdgpu_bo_size(bo); - unreserve_mem_limit(adev, size, alloc_flags); + amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags); kfree(bo->kfd_bo); } +/** + * @create_dmamap_sg_bo: Creates a amdgpu_bo object to reflect information + * about USERPTR or DOOREBELL or MMIO BO. + * @adev: Device for which dmamap BO is being created + * @mem: BO of peer device that is being DMA mapped. Provides parameters + * in building the dmamap BO + * @bo_out: Output parameter updated with handle of dmamap BO + */ +static int +create_dmamap_sg_bo(struct amdgpu_device *adev, + struct kgd_mem *mem, struct amdgpu_bo **bo_out) +{ + struct drm_gem_object *gem_obj; + int ret, align; + + ret = amdgpu_bo_reserve(mem->bo, false); + if (ret) + return ret; + + align = 1; + ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, align, + AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE, + ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj); + + amdgpu_bo_unreserve(mem->bo); + + if (ret) { + pr_err("Error in creating DMA mappable SG BO on domain: %d\n", ret); + return -EINVAL; + } + + *bo_out = gem_to_amdgpu_bo(gem_obj); + (*bo_out)->parent = amdgpu_bo_ref(mem->bo); + return ret; +} + /* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's * reservation object. * @@ -350,22 +384,8 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) return ret; } - ret = amdgpu_amdkfd_validate_vm_bo(NULL, pd); - if (ret) { - pr_err("failed to validate PD\n"); - return ret; - } - vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.bo); - if (vm->use_cpu_for_update) { - ret = amdgpu_bo_kmap(pd, NULL); - if (ret) { - pr_err("failed to kmap PD, ret=%d\n", ret); - return ret; - } - } - return 0; } @@ -399,45 +419,42 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) switch (adev->asic_type) { case CHIP_ARCTURUS: - if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - if (bo_adev == adev) - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; - else - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; - } else { - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; - } - break; case CHIP_ALDEBARAN: - if (coherent && uncached) { - if (adev->gmc.xgmi.connected_to_cpu || - !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) - snoop = true; - mapping_flags |= AMDGPU_VM_MTYPE_UC; - } else if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { + if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { if (bo_adev == adev) { - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; - if (adev->gmc.xgmi.connected_to_cpu) + if (uncached) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else if (coherent) + mapping_flags |= AMDGPU_VM_MTYPE_CC; + else + mapping_flags |= AMDGPU_VM_MTYPE_RW; + if (adev->asic_type == CHIP_ALDEBARAN && + adev->gmc.xgmi.connected_to_cpu) snoop = true; } else { - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + if (uncached || coherent) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; if (amdgpu_xgmi_same_hive(adev, bo_adev)) snoop = true; } } else { + if (uncached || coherent) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; snoop = true; - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; } break; default: - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + if (uncached || coherent) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; + + if (!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) + snoop = true; } pte_flags = amdgpu_gem_va_map_flags(adev, mapping_flags); @@ -446,6 +463,38 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) return pte_flags; } +/** + * create_sg_table() - Create an sg_table for a contiguous DMA addr range + * @addr: The starting address to point to + * @size: Size of memory area in bytes being pointed to + * + * Allocates an instance of sg_table and initializes it to point to memory + * area specified by input parameters. The address used to build is assumed + * to be DMA mapped, if needed. + * + * DOORBELL or MMIO BOs use only one scatterlist node in their sg_table + * because they are physically contiguous. + * + * Return: Initialized instance of SG Table or NULL + */ +static struct sg_table *create_sg_table(uint64_t addr, uint32_t size) +{ + struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL); + + if (!sg) + return NULL; + if (sg_alloc_table(sg, 1, GFP_KERNEL)) { + kfree(sg); + return NULL; + } + sg_dma_address(sg->sgl) = addr; + sg->sgl->length = size; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->sgl->dma_length = size; +#endif + return sg; +} + static int kfd_mem_dmamap_userptr(struct kgd_mem *mem, struct kfd_mem_attachment *attachment) @@ -510,6 +559,87 @@ kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment) return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); } +/** + * kfd_mem_dmamap_sg_bo() - Create DMA mapped sg_table to access DOORBELL or MMIO BO + * @mem: SG BO of the DOORBELL or MMIO resource on the owning device + * @attachment: Virtual address attachment of the BO on accessing device + * + * An access request from the device that owns DOORBELL does not require DMA mapping. + * This is because the request doesn't go through PCIe root complex i.e. it instead + * loops back. The need to DMA map arises only when accessing peer device's DOORBELL + * + * In contrast, all access requests for MMIO need to be DMA mapped without regard to + * device ownership. This is because access requests for MMIO go through PCIe root + * complex. + * + * This is accomplished in two steps: + * - Obtain DMA mapped address of DOORBELL or MMIO memory that could be used + * in updating requesting device's page table + * - Signal TTM to mark memory pointed to by requesting device's BO as GPU + * accessible. This allows an update of requesting device's page table + * with entries associated with DOOREBELL or MMIO memory + * + * This method is invoked in the following contexts: + * - Mapping of DOORBELL or MMIO BO of same or peer device + * - Validating an evicted DOOREBELL or MMIO BO on device seeking access + * + * Return: ZERO if successful, NON-ZERO otherwise + */ +static int +kfd_mem_dmamap_sg_bo(struct kgd_mem *mem, + struct kfd_mem_attachment *attachment) +{ + struct ttm_operation_ctx ctx = {.interruptible = true}; + struct amdgpu_bo *bo = attachment->bo_va->base.bo; + struct amdgpu_device *adev = attachment->adev; + struct ttm_tt *ttm = bo->tbo.ttm; + enum dma_data_direction dir; + dma_addr_t dma_addr; + bool mmio; + int ret; + + /* Expect SG Table of dmapmap BO to be NULL */ + mmio = (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP); + if (unlikely(ttm->sg)) { + pr_err("SG Table of %d BO for peer device is UNEXPECTEDLY NON-NULL", mmio); + return -EINVAL; + } + + dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + dma_addr = mem->bo->tbo.sg->sgl->dma_address; + pr_debug("%d BO size: %d\n", mmio, mem->bo->tbo.sg->sgl->length); + pr_debug("%d BO address before DMA mapping: %llx\n", mmio, dma_addr); + dma_addr = dma_map_resource(adev->dev, dma_addr, + mem->bo->tbo.sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC); + ret = dma_mapping_error(adev->dev, dma_addr); + if (unlikely(ret)) + return ret; + pr_debug("%d BO address after DMA mapping: %llx\n", mmio, dma_addr); + + ttm->sg = create_sg_table(dma_addr, mem->bo->tbo.sg->sgl->length); + if (unlikely(!ttm->sg)) { + ret = -ENOMEM; + goto unmap_sg; + } + + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (unlikely(ret)) + goto free_sg; + + return ret; + +free_sg: + sg_free_table(ttm->sg); + kfree(ttm->sg); + ttm->sg = NULL; +unmap_sg: + dma_unmap_resource(adev->dev, dma_addr, mem->bo->tbo.sg->sgl->length, + dir, DMA_ATTR_SKIP_CPU_SYNC); + return ret; +} + static int kfd_mem_dmamap_attachment(struct kgd_mem *mem, struct kfd_mem_attachment *attachment) @@ -521,6 +651,8 @@ kfd_mem_dmamap_attachment(struct kgd_mem *mem, return kfd_mem_dmamap_userptr(mem, attachment); case KFD_MEM_ATT_DMABUF: return kfd_mem_dmamap_dmabuf(attachment); + case KFD_MEM_ATT_SG: + return kfd_mem_dmamap_sg_bo(mem, attachment); default: WARN_ON_ONCE(1); } @@ -561,6 +693,50 @@ kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment) ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); } +/** + * kfd_mem_dmaunmap_sg_bo() - Free DMA mapped sg_table of DOORBELL or MMIO BO + * @mem: SG BO of the DOORBELL or MMIO resource on the owning device + * @attachment: Virtual address attachment of the BO on accessing device + * + * The method performs following steps: + * - Signal TTM to mark memory pointed to by BO as GPU inaccessible + * - Free SG Table that is used to encapsulate DMA mapped memory of + * peer device's DOORBELL or MMIO memory + * + * This method is invoked in the following contexts: + * UNMapping of DOORBELL or MMIO BO on a device having access to its memory + * Eviction of DOOREBELL or MMIO BO on device having access to its memory + * + * Return: void + */ +static void +kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem, + struct kfd_mem_attachment *attachment) +{ + struct ttm_operation_ctx ctx = {.interruptible = true}; + struct amdgpu_bo *bo = attachment->bo_va->base.bo; + struct amdgpu_device *adev = attachment->adev; + struct ttm_tt *ttm = bo->tbo.ttm; + enum dma_data_direction dir; + + if (unlikely(!ttm->sg)) { + pr_err("SG Table of BO is UNEXPECTEDLY NULL"); + return; + } + + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); + ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + + dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + dma_unmap_resource(adev->dev, ttm->sg->sgl->dma_address, + ttm->sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC); + sg_free_table(ttm->sg); + kfree(ttm->sg); + ttm->sg = NULL; + bo->tbo.sg = NULL; +} + static void kfd_mem_dmaunmap_attachment(struct kgd_mem *mem, struct kfd_mem_attachment *attachment) @@ -574,39 +750,15 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem, case KFD_MEM_ATT_DMABUF: kfd_mem_dmaunmap_dmabuf(attachment); break; + case KFD_MEM_ATT_SG: + kfd_mem_dmaunmap_sg_bo(mem, attachment); + break; default: WARN_ON_ONCE(1); } } static int -kfd_mem_attach_userptr(struct amdgpu_device *adev, struct kgd_mem *mem, - struct amdgpu_bo **bo) -{ - unsigned long bo_size = mem->bo->tbo.base.size; - struct drm_gem_object *gobj; - int ret; - - ret = amdgpu_bo_reserve(mem->bo, false); - if (ret) - return ret; - - ret = amdgpu_gem_object_create(adev, bo_size, 1, - AMDGPU_GEM_DOMAIN_CPU, - AMDGPU_GEM_CREATE_PREEMPTIBLE, - ttm_bo_type_sg, mem->bo->tbo.base.resv, - &gobj); - amdgpu_bo_unreserve(mem->bo); - if (ret) - return ret; - - *bo = gem_to_amdgpu_bo(gobj); - (*bo)->parent = amdgpu_bo_ref(mem->bo); - - return 0; -} - -static int kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem, struct amdgpu_bo **bo) { @@ -630,7 +782,6 @@ kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem, *bo = gem_to_amdgpu_bo(gobj); (*bo)->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE; - (*bo)->parent = amdgpu_bo_ref(mem->bo); return 0; } @@ -656,6 +807,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, uint64_t va = mem->va; struct kfd_mem_attachment *attachment[2] = {NULL, NULL}; struct amdgpu_bo *bo[2] = {NULL, NULL}; + bool same_hive = false; int i, ret; if (!va) { @@ -663,6 +815,24 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, return -EINVAL; } + /* Determine access to VRAM, MMIO and DOORBELL BOs of peer devices + * + * The access path of MMIO and DOORBELL BOs of is always over PCIe. + * In contrast the access path of VRAM BOs depens upon the type of + * link that connects the peer device. Access over PCIe is allowed + * if peer device has large BAR. In contrast, access over xGMI is + * allowed for both small and large BAR configurations of peer device + */ + if ((adev != bo_adev) && + ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) || + (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) || + (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { + if (mem->domain == AMDGPU_GEM_DOMAIN_VRAM) + same_hive = amdgpu_xgmi_same_hive(adev, bo_adev); + if (!same_hive && !amdgpu_device_is_peer_accessible(bo_adev, adev)) + return -EINVAL; + } + for (i = 0; i <= is_aql; i++) { attachment[i] = kzalloc(sizeof(*attachment[i]), GFP_KERNEL); if (unlikely(!attachment[i])) { @@ -673,9 +843,9 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, va + bo_size, vm); - if (adev == bo_adev || - (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && adev->ram_is_direct_mapped) || - (mem->domain == AMDGPU_GEM_DOMAIN_VRAM && amdgpu_xgmi_same_hive(adev, bo_adev))) { + if ((adev == bo_adev && !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) || + (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && adev->ram_is_direct_mapped) || + same_hive) { /* Mappings on the local GPU, or VRAM mappings in the * local hive, or userptr mapping IOMMU direct map mode * share the original BO @@ -691,26 +861,30 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, } else if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) { /* Create an SG BO to DMA-map userptrs on other GPUs */ attachment[i]->type = KFD_MEM_ATT_USERPTR; - ret = kfd_mem_attach_userptr(adev, mem, &bo[i]); + ret = create_dmamap_sg_bo(adev, mem, &bo[i]); if (ret) goto unwind; - } else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT && - mem->bo->tbo.type != ttm_bo_type_sg) { - /* GTT BOs use DMA-mapping ability of dynamic-attach - * DMA bufs. TODO: The same should work for VRAM on - * large-BAR GPUs. - */ + /* Handle DOORBELL BOs of peer devices and MMIO BOs of local and peer devices */ + } else if (mem->bo->tbo.type == ttm_bo_type_sg) { + WARN_ONCE(!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL || + mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP), + "Handing invalid SG BO in ATTACH request"); + attachment[i]->type = KFD_MEM_ATT_SG; + ret = create_dmamap_sg_bo(adev, mem, &bo[i]); + if (ret) + goto unwind; + /* Enable acces to GTT and VRAM BOs of peer devices */ + } else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT || + mem->domain == AMDGPU_GEM_DOMAIN_VRAM) { attachment[i]->type = KFD_MEM_ATT_DMABUF; ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]); if (ret) goto unwind; + pr_debug("Employ DMABUF mechanism to enable peer GPU access\n"); } else { - /* FIXME: Need to DMA-map other BO types: - * large-BAR VRAM, doorbells, MMIO remap - */ - attachment[i]->type = KFD_MEM_ATT_SHARED; - bo[i] = mem->bo; - drm_gem_object_get(&bo[i]->tbo.base); + WARN_ONCE(true, "Handling invalid ATTACH request"); + ret = -EINVAL; + goto unwind; } /* Add BO to VM internal data structures */ @@ -1058,8 +1232,7 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem, static int update_gpuvm_pte(struct kgd_mem *mem, struct kfd_mem_attachment *entry, - struct amdgpu_sync *sync, - bool *table_freed) + struct amdgpu_sync *sync) { struct amdgpu_bo_va *bo_va = entry->bo_va; struct amdgpu_device *adev = entry->adev; @@ -1070,7 +1243,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem, return ret; /* Update the page tables */ - ret = amdgpu_vm_bo_update(adev, bo_va, false, table_freed); + ret = amdgpu_vm_bo_update(adev, bo_va, false); if (ret) { pr_err("amdgpu_vm_bo_update failed\n"); return ret; @@ -1082,8 +1255,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem, static int map_bo_to_gpuvm(struct kgd_mem *mem, struct kfd_mem_attachment *entry, struct amdgpu_sync *sync, - bool no_update_pte, - bool *table_freed) + bool no_update_pte) { int ret; @@ -1100,7 +1272,7 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem, if (no_update_pte) return 0; - ret = update_gpuvm_pte(mem, entry, sync, table_freed); + ret = update_gpuvm_pte(mem, entry, sync); if (ret) { pr_err("update_gpuvm_pte() failed\n"); goto update_gpuvm_pte_failed; @@ -1113,24 +1285,6 @@ update_gpuvm_pte_failed: return ret; } -static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size) -{ - struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL); - - if (!sg) - return NULL; - if (sg_alloc_table(sg, 1, GFP_KERNEL)) { - kfree(sg); - return NULL; - } - sg->sgl->dma_address = addr; - sg->sgl->length = size; -#ifdef CONFIG_NEED_SG_DMA_LENGTH - sg->sgl->dma_length = size; -#endif - return sg; -} - static int process_validate_vms(struct amdkfd_process_info *process_info) { struct amdgpu_vm *peer_vm; @@ -1366,16 +1520,10 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct amdkfd_process_info *process_info = vm->process_info; - struct amdgpu_bo *pd = vm->root.bo; if (!process_info) return; - /* Release eviction fence from PD */ - amdgpu_bo_reserve(pd, false); - amdgpu_bo_fence(pd, NULL, false); - amdgpu_bo_unreserve(pd); - /* Update process info */ mutex_lock(&process_info->lock); process_info->n_vms--; @@ -1459,6 +1607,21 @@ out_unlock: return ret; } +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev) +{ + uint64_t reserved_for_pt = + ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); + size_t available; + spin_lock(&kfd_mem_limit.mem_limit_lock); + available = adev->gmc.real_vram_size + - adev->kfd.vram_used_aligned + - atomic64_read(&adev->vram_pin_size) + - reserved_for_pt; + spin_unlock(&kfd_mem_limit.mem_limit_lock); + + return ALIGN_DOWN(available, VRAM_AVAILABLITY_ALIGN); +} + int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct amdgpu_device *adev, uint64_t va, uint64_t size, void *drm_priv, struct kgd_mem **mem, @@ -1485,26 +1648,26 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT; alloc_flags = 0; - } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { + } else { domain = AMDGPU_GEM_DOMAIN_GTT; alloc_domain = AMDGPU_GEM_DOMAIN_CPU; alloc_flags = AMDGPU_GEM_CREATE_PREEMPTIBLE; - if (!offset || !*offset) - return -EINVAL; - user_addr = untagged_addr(*offset); - } else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | - KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { - domain = AMDGPU_GEM_DOMAIN_GTT; - alloc_domain = AMDGPU_GEM_DOMAIN_CPU; - bo_type = ttm_bo_type_sg; - alloc_flags = 0; - if (size > UINT_MAX) + + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { + if (!offset || !*offset) + return -EINVAL; + user_addr = untagged_addr(*offset); + } else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | + KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { + bo_type = ttm_bo_type_sg; + if (size > UINT_MAX) + return -EINVAL; + sg = create_sg_table(*offset, size); + if (!sg) + return -ENOMEM; + } else { return -EINVAL; - sg = create_doorbell_sg(*offset, size); - if (!sg) - return -ENOMEM; - } else { - return -EINVAL; + } } *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); @@ -1565,7 +1728,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); if (user_addr) { - pr_debug("creating userptr BO for user_addr = %llu\n", user_addr); + pr_debug("creating userptr BO for user_addr = %llx\n", user_addr); ret = init_user_pages(*mem, user_addr, criu_resume); if (ret) goto allocate_init_user_pages_failed; @@ -1593,7 +1756,7 @@ err_node_allow: /* Don't unreserve system mem limit twice */ goto err_reserve_limit; err_bo_create: - unreserve_mem_limit(adev, size, flags); + amdgpu_amdkfd_unreserve_mem_limit(adev, size, flags); err_reserve_limit: mutex_destroy(&(*mem)->lock); if (gobj) @@ -1614,6 +1777,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( { struct amdkfd_process_info *process_info = mem->process_info; unsigned long bo_size = mem->bo->tbo.base.size; + bool use_release_notifier = (mem->bo->kfd_bo == mem); struct kfd_mem_attachment *entry, *tmp; struct bo_vm_reservation_context ctx; struct ttm_validate_buffer *bo_list_entry; @@ -1623,7 +1787,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( mutex_lock(&mem->lock); - /* Unpin MMIO/DOORBELL BO's that were pinnned during allocation */ + /* Unpin MMIO/DOORBELL BO's that were pinned during allocation */ if (mem->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { @@ -1705,12 +1869,19 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( */ drm_gem_object_put(&mem->bo->tbo.base); + /* + * For kgd_mem allocated in amdgpu_amdkfd_gpuvm_import_dmabuf(), + * explicitly free it here. + */ + if (!use_release_notifier) + kfree(mem); + return ret; } int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, - void *drm_priv, bool *table_freed) + void *drm_priv) { struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); int ret; @@ -1797,7 +1968,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( entry->va, entry->va + bo_size, entry); ret = map_bo_to_gpuvm(mem, entry, ctx.sync, - is_invalid_userptr, table_freed); + is_invalid_userptr); if (ret) { pr_err("Failed to map bo to gpuvm\n"); goto out_unreserve; @@ -1909,8 +2080,69 @@ int amdgpu_amdkfd_gpuvm_sync_memory( return ret; } -int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev, - struct kgd_mem *mem, void **kptr, uint64_t *size) +/** + * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment reference count + * @adev: Device to which allocated BO belongs + * @bo: Buffer object to be mapped + * + * Before return, bo reference count is incremented. To release the reference and unpin/ + * unmap the BO, call amdgpu_amdkfd_free_gtt_mem. + */ +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo) +{ + int ret; + + ret = amdgpu_bo_reserve(bo, true); + if (ret) { + pr_err("Failed to reserve bo. ret %d\n", ret); + goto err_reserve_bo_failed; + } + + ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); + if (ret) { + pr_err("Failed to pin bo. ret %d\n", ret); + goto err_pin_bo_failed; + } + + ret = amdgpu_ttm_alloc_gart(&bo->tbo); + if (ret) { + pr_err("Failed to bind bo to GART. ret %d\n", ret); + goto err_map_bo_gart_failed; + } + + amdgpu_amdkfd_remove_eviction_fence( + bo, bo->kfd_bo->process_info->eviction_fence); + + amdgpu_bo_unreserve(bo); + + bo = amdgpu_bo_ref(bo); + + return 0; + +err_map_bo_gart_failed: + amdgpu_bo_unpin(bo); +err_pin_bo_failed: + amdgpu_bo_unreserve(bo); +err_reserve_bo_failed: + + return ret; +} + +/** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Map a GTT BO for kernel CPU access + * + * @mem: Buffer object to be mapped for CPU access + * @kptr[out]: pointer in kernel CPU address space + * @size[out]: size of the buffer + * + * Pins the BO and maps it for kernel CPU access. The eviction fence is removed + * from the BO, since pinned BOs cannot be evicted. The bo must remain on the + * validate_list, so the GPU mapping can be restored after a page table was + * evicted. + * + * Return: 0 on success, error code on failure + */ +int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem, + void **kptr, uint64_t *size) { int ret; struct amdgpu_bo *bo = mem->bo; @@ -1920,9 +2152,6 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev, return -EINVAL; } - /* delete kgd_mem from kfd_bo_list to avoid re-validating - * this BO in BO's restoring after eviction. - */ mutex_lock(&mem->process_info->lock); ret = amdgpu_bo_reserve(bo, true); @@ -1945,7 +2174,6 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev, amdgpu_amdkfd_remove_eviction_fence( bo, mem->process_info->eviction_fence); - list_del_init(&mem->validate_list.head); if (size) *size = amdgpu_bo_size(bo); @@ -1965,8 +2193,15 @@ bo_reserve_failed: return ret; } -void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device *adev, - struct kgd_mem *mem) +/** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Unmap a GTT BO for kernel CPU access + * + * @mem: Buffer object to be unmapped for CPU access + * + * Removes the kernel CPU mapping and unpins the BO. It does not restore the + * eviction fence, so this function should only be used for cleanup before the + * BO is destroyed. + */ +void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem) { struct amdgpu_bo *bo = mem->bo; @@ -2078,7 +2313,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, evicted_bos = atomic_inc_return(&process_info->evicted_bos); if (evicted_bos == 1) { /* First eviction, stop the queues */ - r = kgd2kfd_quiesce_mm(mm); + r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_USERPTR); if (r) pr_err("Failed to quiesce KFD\n"); schedule_delayed_work(&process_info->restore_userptr_work, @@ -2265,7 +2500,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) continue; kfd_mem_dmaunmap_attachment(mem, attachment); - ret = update_gpuvm_pte(mem, attachment, &sync, NULL); + ret = update_gpuvm_pte(mem, attachment, &sync); if (ret) { pr_err("%s: update PTE failed\n", __func__); /* make sure this gets validated again */ @@ -2352,13 +2587,16 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) unlock_out: mutex_unlock(&process_info->lock); - mmput(mm); - put_task_struct(usertask); /* If validation failed, reschedule another attempt */ - if (evicted_bos) + if (evicted_bos) { schedule_delayed_work(&process_info->restore_userptr_work, msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); + + kfd_smi_event_queue_restore_rescheduled(mm); + } + mmput(mm); + put_task_struct(usertask); } /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given @@ -2476,7 +2714,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) continue; kfd_mem_dmaunmap_attachment(mem, attachment); - ret = update_gpuvm_pte(mem, attachment, &sync_obj, NULL); + ret = update_gpuvm_pte(mem, attachment, &sync_obj); if (ret) { pr_debug("Memory eviction: update PTE failed. Try again\n"); goto validate_map_fail; @@ -2514,12 +2752,15 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) process_info->eviction_fence = new_fence; *ef = dma_fence_get(&new_fence->base); - /* Attach new eviction fence to all BOs */ + /* Attach new eviction fence to all BOs except pinned ones */ list_for_each_entry(mem, &process_info->kfd_bo_list, - validate_list.head) + validate_list.head) { + if (mem->bo->tbo.pin_count) + continue; + amdgpu_bo_fence(mem->bo, &process_info->eviction_fence->base, true); - + } /* Attach eviction fence to PD / PT BOs */ list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { @@ -2657,3 +2898,22 @@ bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem * } return false; } + +#if defined(CONFIG_DEBUG_FS) + +int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data) +{ + + spin_lock(&kfd_mem_limit.mem_limit_lock); + seq_printf(m, "System mem used %lldM out of %lluM\n", + (kfd_mem_limit.system_mem_used >> 20), + (kfd_mem_limit.max_system_mem_limit >> 20)); + seq_printf(m, "TTM mem used %lldM out of %lluM\n", + (kfd_mem_limit.ttm_mem_used >> 20), + (kfd_mem_limit.max_ttm_mem_limit >> 20)); + spin_unlock(&kfd_mem_limit.mem_limit_lock); + + return 0; +} + +#endif |