diff options
author | Daniel Phillips <Daniel.Phillips@amd.com> | 2022-05-30 11:21:22 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2022-06-14 21:38:40 -0400 |
commit | 9731dd4cadc53251ef80b3655c8d841fed52fa3d (patch) | |
tree | d2385079c1d0da89501f0e8cc3aacb33244d9479 /drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | |
parent | 1a65327a84db5b9081a51ccb1c562083f59bfcec (diff) | |
download | linux-9731dd4cadc53251ef80b3655c8d841fed52fa3d.tar.bz2 |
drm/amdkfd: Add available memory ioctl
Add a new KFD ioctl to return the largest possible memory size that
can be allocated as a buffer object using
kfd_ioctl_alloc_memory_of_gpu. It attempts to use exactly the same
accept/reject criteria as that function so that allocating a new
buffer object of the size returned by this new ioctl is guaranteed to
succeed, barring races with other allocating tasks.
This IOCTL will be used by libhsakmt:
https://www.mail-archive.com/amd-gfx@lists.freedesktop.org/msg75743.html
Signed-off-by: Daniel Phillips <Daniel.Phillips@amd.com>
Signed-off-by: David Yat Sin <David.YatSin@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 38 |
1 files changed, 34 insertions, 4 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index f386b0d256d2..8805bd1eed37 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -38,6 +38,12 @@ */ #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 +/* + * Align VRAM allocations to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB + * BO chunk + */ +#define VRAM_ALLOCATION_ALIGN (1 << 21) + /* Impose limit on how much memory KFD can use */ static struct { uint64_t max_system_mem_limit; @@ -108,7 +114,7 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size) * compromise that should work in most cases without reserving too * much memory for page tables unnecessarily (factor 16K, >> 14). */ -#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14) +#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM) static size_t amdgpu_amdkfd_acc_size(uint64_t size) { @@ -148,7 +154,13 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { system_mem_needed = acc_size; ttm_mem_needed = acc_size; - vram_needed = size; + + /* + * Conservatively round up the allocation requirement to 2 MB + * to avoid fragmentation caused by 4K allocations in the tail + * 2M BO chunk. + */ + vram_needed = ALIGN(size, VRAM_ALLOCATION_ALIGN); } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { system_mem_needed = acc_size + size; ttm_mem_needed = acc_size; @@ -173,7 +185,9 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > kfd_mem_limit.max_ttm_mem_limit) || (adev->kfd.vram_used + vram_needed > - adev->gmc.real_vram_size - reserved_for_pt)) { + adev->gmc.real_vram_size - + atomic64_read(&adev->vram_pin_size) - + reserved_for_pt)) { ret = -ENOMEM; goto release; } @@ -205,7 +219,7 @@ static void unreserve_mem_limit(struct amdgpu_device *adev, } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { kfd_mem_limit.system_mem_used -= acc_size; kfd_mem_limit.ttm_mem_used -= acc_size; - adev->kfd.vram_used -= size; + adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN); } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { kfd_mem_limit.system_mem_used -= (acc_size + size); kfd_mem_limit.ttm_mem_used -= acc_size; @@ -1633,6 +1647,22 @@ out_unlock: return ret; } +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev) +{ + uint64_t reserved_for_pt = + ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); + size_t available; + + spin_lock(&kfd_mem_limit.mem_limit_lock); + available = adev->gmc.real_vram_size + - adev->kfd.vram_used + - atomic64_read(&adev->vram_pin_size) + - reserved_for_pt; + spin_unlock(&kfd_mem_limit.mem_limit_lock); + + return ALIGN_DOWN(available, VRAM_ALLOCATION_ALIGN); +} + int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct amdgpu_device *adev, uint64_t va, uint64_t size, void *drm_priv, struct kgd_mem **mem, |