summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPhilip Yang <Philip.Yang@amd.com>2021-04-15 17:43:32 -0400
committerAlex Deucher <alexander.deucher@amd.com>2021-04-20 21:48:00 -0400
commitc46ebb6a6d9d28fce66595b56db070b0cc4fab71 (patch)
tree1f4261847073083b15dd04869e2e989aeb43c182
parent814ab9930cfd709768439799eae3c7ef0a658b54 (diff)
downloadlinux-c46ebb6a6d9d28fce66595b56db070b0cc4fab71.tar.bz2
drm/amdkfd: set memory limit to avoid OOM with HMM enabled
HMM migration alloc sizeof(struct page) on system memory for each VRAM page, it is 1GB system memory reserved for 64GB VRAM. To avoid application OOM, increase system memory used size based on VRAM size of all GPUs, then application alloc memory will fail if system memory usage reach the limit. Signed-off-by: Philip Yang <Philip.Yang@amd.com> Reviewed-by: Oak Zeng <Oak.Zeng@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.c8
3 files changed, 14 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 670e9f8d58b4..b3e5ecec316c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -275,6 +275,7 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
+void amdgpu_amdkfd_reserve_system_mem(uint64_t size);
#else
static inline
void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 618614fe3eb4..d03088d1eb60 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -108,6 +108,11 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
(kfd_mem_limit.max_ttm_mem_limit >> 20));
}
+void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
+{
+ kfd_mem_limit.system_mem_used += size;
+}
+
/* Estimate page table size needed to represent a given memory size
*
* With 4KB pages, we need one 8 byte PTE for each 4KB of memory
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index b32d8fdfe8e9..d8cec5ebe1d4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -57,6 +57,9 @@ static const struct dev_pagemap_ops svm_migrate_pgmap_ops = {
.migrate_to_ram = svm_migrate_to_ram,
};
+/* Each VRAM page uses sizeof(struct page) on system memory */
+#define SVM_HMM_PAGE_STRUCT_SIZE(size) ((size)/PAGE_SIZE * sizeof(struct page))
+
int svm_migrate_init(struct amdgpu_device *adev)
{
struct kfd_dev *kfddev = adev->kfd.dev;
@@ -93,6 +96,11 @@ int svm_migrate_init(struct amdgpu_device *adev)
return PTR_ERR(r);
}
+ pr_debug("reserve %ldMB system memory for VRAM pages struct\n",
+ SVM_HMM_PAGE_STRUCT_SIZE(size) >> 20);
+
+ amdgpu_amdkfd_reserve_system_mem(SVM_HMM_PAGE_STRUCT_SIZE(size));
+
pr_info("HMM registered %ldMB device memory\n", size >> 20);
return 0;