diff options
Diffstat (limited to 'drivers/misc/habanalabs/common/memory.c')
-rw-r--r-- | drivers/misc/habanalabs/common/memory.c | 334 |
1 files changed, 238 insertions, 96 deletions
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index bfe223abf142..cbe9da4e0211 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -11,7 +11,6 @@ #include <linux/uaccess.h> #include <linux/slab.h> -#include <linux/genalloc.h> #define HL_MMU_DEBUG 0 @@ -46,7 +45,7 @@ * @ret_handle : result handle * * This function does the following: - * - Allocate the requested size rounded up to 2MB pages + * - Allocate the requested size rounded up to 'dram_page_size' pages * - Return unique handle */ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, @@ -81,6 +80,16 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, num_pgs, total_size); return -ENOMEM; } + + if (hdev->memory_scrub) { + rc = hdev->asic_funcs->scrub_device_mem(hdev, paddr, + total_size); + if (rc) { + dev_err(hdev->dev, + "Failed to scrub contiguous device memory\n"); + goto pages_pack_err; + } + } } phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL); @@ -118,6 +127,17 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, goto page_err; } + if (hdev->memory_scrub) { + rc = hdev->asic_funcs->scrub_device_mem(hdev, + phys_pg_pack->pages[i], + page_size); + if (rc) { + dev_err(hdev->dev, + "Failed to scrub device memory\n"); + goto page_err; + } + } + num_curr_pgs++; } } @@ -601,6 +621,87 @@ out: } /* + * hl_reserve_va_block() - reserve a virtual block of a given size. + * @hdev: pointer to the habanalabs device structure. + * @ctx: current context + * @type: virtual addresses range type. + * @size: requested block size. + * @alignment: required alignment in bytes of the virtual block start address, + * 0 means no alignment. + * + * This function does the following: + * - Iterate on the virtual block list to find a suitable virtual block for the + * given size and alignment. + * - Reserve the requested block and update the list. + * - Return the start address of the virtual block. + */ +u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, + enum hl_va_range_type type, u32 size, u32 alignment) +{ + return get_va_block(hdev, ctx->va_range[type], size, 0, + max(alignment, ctx->va_range[type]->page_size)); +} + +/** + * hl_get_va_range_type() - get va_range type for the given address and size. + * @address: The start address of the area we want to validate. + * @size: The size in bytes of the area we want to validate. + * @type: returned va_range type + * + * Return: true if the area is inside a valid range, false otherwise. + */ +static int hl_get_va_range_type(struct hl_ctx *ctx, u64 address, u64 size, + enum hl_va_range_type *type) +{ + int i; + + for (i = 0 ; i < HL_VA_RANGE_TYPE_MAX; i++) { + if (hl_mem_area_inside_range(address, size, + ctx->va_range[i]->start_addr, + ctx->va_range[i]->end_addr)) { + *type = i; + return 0; + } + } + + return -EINVAL; +} + +/* + * hl_unreserve_va_block - wrapper for add_va_block for unreserving a va block + * + * @hdev: pointer to the habanalabs device structure + * @ctx: current context + * @start: start virtual address + * @end: end virtual address + * + * This function does the following: + * - Takes the list lock and calls add_va_block_locked + */ +int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, + u64 start_addr, u64 size) +{ + enum hl_va_range_type type; + int rc; + + rc = hl_get_va_range_type(ctx, start_addr, size, &type); + if (rc) { + dev_err(hdev->dev, + "cannot find va_range for va %#llx size %llu", + start_addr, size); + return rc; + } + + rc = add_va_block(hdev, ctx->va_range[type], start_addr, + start_addr + size - 1); + if (rc) + dev_warn(hdev->dev, + "add va block failed for vaddr: 0x%llx\n", start_addr); + + return rc; +} + +/* * get_sg_info - get number of pages and the DMA address from SG list * * @sg : the SG list @@ -742,7 +843,7 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, for (i = 0 ; i < phys_pg_pack->npages ; i++) { paddr = phys_pg_pack->pages[i]; - rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size, + rc = hl_mmu_map_page(ctx, next_vaddr, paddr, page_size, (i + 1) == phys_pg_pack->npages); if (rc) { dev_err(hdev->dev, @@ -761,7 +862,7 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, err: next_vaddr = vaddr; for (i = 0 ; i < mapped_pg_cnt ; i++) { - if (hl_mmu_unmap(ctx, next_vaddr, page_size, + if (hl_mmu_unmap_page(ctx, next_vaddr, page_size, (i + 1) == mapped_pg_cnt)) dev_warn_ratelimited(hdev->dev, "failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n", @@ -791,7 +892,7 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, next_vaddr = vaddr; for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) { - if (hl_mmu_unmap(ctx, next_vaddr, page_size, + if (hl_mmu_unmap_page(ctx, next_vaddr, page_size, (i + 1) == phys_pg_pack->npages)) dev_warn_ratelimited(hdev->dev, "unmap failed for vaddr: 0x%llx\n", next_vaddr); @@ -888,7 +989,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, /* get required alignment */ if (phys_pg_pack->page_size == page_size) { - va_range = ctx->host_va_range; + va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST]; /* * huge page alignment may be needed in case of regular @@ -903,7 +1004,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, * huge page alignment is needed in case of huge page * mapping */ - va_range = ctx->host_huge_va_range; + va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]; va_block_align = huge_page_size; } } else { @@ -928,7 +1029,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, hint_addr = args->map_device.hint_addr; /* DRAM VA alignment is the same as the DRAM page size */ - va_range = ctx->dram_va_range; + va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM]; va_block_align = hdev->asic_prop.dmmu.page_size; } @@ -1073,12 +1174,12 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free) if (phys_pg_pack->page_size == hdev->asic_prop.pmmu.page_size) - va_range = ctx->host_va_range; + va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST]; else - va_range = ctx->host_huge_va_range; + va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]; } else if (*vm_type == VM_TYPE_PHYS_PACK) { is_userptr = false; - va_range = ctx->dram_va_range; + va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM]; phys_pg_pack = hnode->ptr; } else { dev_warn(hdev->dev, @@ -1217,6 +1318,7 @@ out: int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) { + enum hl_device_status status; union hl_mem_args *args = data; struct hl_device *hdev = hpriv->hdev; struct hl_ctx *ctx = hpriv->ctx; @@ -1224,10 +1326,10 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) u32 handle = 0; int rc; - if (hl_device_disabled_or_in_reset(hdev)) { + if (!hl_device_operational(hdev, &status)) { dev_warn_ratelimited(hdev->dev, "Device is %s. Can't execute MEMORY IOCTL\n", - atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); + hdev->status[status]); return -EBUSY; } @@ -1236,18 +1338,35 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) switch (args->in.op) { case HL_MEM_OP_ALLOC: - if (!hdev->dram_supports_virtual_memory) { - dev_err(hdev->dev, "DRAM alloc is not supported\n"); - rc = -EINVAL; - goto out; - } - if (args->in.alloc.mem_size == 0) { dev_err(hdev->dev, "alloc size must be larger than 0\n"); rc = -EINVAL; goto out; } + + /* If DRAM does not support virtual memory the driver won't + * handle the allocation/freeing of that memory. However, for + * system administration/monitoring purposes, the driver will + * keep track of the amount of DRAM memory that is allocated + * and freed by the user. Because this code totally relies on + * the user's input, the driver can't ensure the validity + * of this accounting. + */ + if (!hdev->asic_prop.dram_supports_virtual_memory) { + atomic64_add(args->in.alloc.mem_size, + &ctx->dram_phys_mem); + atomic64_add(args->in.alloc.mem_size, + &hdev->dram_used_mem); + + dev_dbg(hdev->dev, "DRAM alloc is not supported\n"); + rc = 0; + + memset(args, 0, sizeof(*args)); + args->out.handle = 0; + goto out; + } + rc = alloc_device_memory(ctx, &args->in, &handle); memset(args, 0, sizeof(*args)); @@ -1255,6 +1374,26 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) break; case HL_MEM_OP_FREE: + /* If DRAM does not support virtual memory the driver won't + * handle the allocation/freeing of that memory. However, for + * system administration/monitoring purposes, the driver will + * keep track of the amount of DRAM memory that is allocated + * and freed by the user. Because this code totally relies on + * the user's input, the driver can't ensure the validity + * of this accounting. + */ + if (!hdev->asic_prop.dram_supports_virtual_memory) { + atomic64_sub(args->in.alloc.mem_size, + &ctx->dram_phys_mem); + atomic64_sub(args->in.alloc.mem_size, + &hdev->dram_used_mem); + + dev_dbg(hdev->dev, "DRAM alloc is not supported\n"); + rc = 0; + + goto out; + } + rc = free_device_memory(ctx, args->in.free.handle); break; @@ -1498,7 +1637,7 @@ bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, * addresses. */ static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range, - u64 start, u64 end) + u64 start, u64 end, u32 page_size) { int rc; @@ -1528,6 +1667,7 @@ static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range, va_range->start_addr = start; va_range->end_addr = end; + va_range->page_size = page_size; return 0; } @@ -1540,8 +1680,7 @@ static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range, * This function does the following: * - Frees the virtual addresses block list and its lock */ -static void va_range_fini(struct hl_device *hdev, - struct hl_va_range *va_range) +static void va_range_fini(struct hl_device *hdev, struct hl_va_range *va_range) { mutex_lock(&va_range->lock); clear_va_list_locked(hdev, &va_range->list); @@ -1571,102 +1710,97 @@ static void va_range_fini(struct hl_device *hdev, static int vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start, u64 host_range_end, + u32 host_page_size, u64 host_huge_range_start, u64 host_huge_range_end, + u32 host_huge_page_size, u64 dram_range_start, - u64 dram_range_end) + u64 dram_range_end, + u32 dram_page_size) { struct hl_device *hdev = ctx->hdev; - int rc; - - ctx->host_va_range = kzalloc(sizeof(*ctx->host_va_range), GFP_KERNEL); - if (!ctx->host_va_range) - return -ENOMEM; - - ctx->host_huge_va_range = kzalloc(sizeof(*ctx->host_huge_va_range), - GFP_KERNEL); - if (!ctx->host_huge_va_range) { - rc = -ENOMEM; - goto host_huge_va_range_err; - } - - ctx->dram_va_range = kzalloc(sizeof(*ctx->dram_va_range), GFP_KERNEL); - if (!ctx->dram_va_range) { - rc = -ENOMEM; - goto dram_va_range_err; + int i, rc; + + for (i = 0 ; i < HL_VA_RANGE_TYPE_MAX ; i++) { + ctx->va_range[i] = + kzalloc(sizeof(struct hl_va_range), GFP_KERNEL); + if (!ctx->va_range[i]) { + rc = -ENOMEM; + goto free_va_range; + } } rc = hl_mmu_ctx_init(ctx); if (rc) { dev_err(hdev->dev, "failed to init context %d\n", ctx->asid); - goto mmu_ctx_err; + goto free_va_range; } mutex_init(&ctx->mem_hash_lock); hash_init(ctx->mem_hash); - mutex_init(&ctx->host_va_range->lock); + mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock); - rc = va_range_init(hdev, ctx->host_va_range, host_range_start, - host_range_end); + rc = va_range_init(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST], + host_range_start, host_range_end, host_page_size); if (rc) { dev_err(hdev->dev, "failed to init host vm range\n"); - goto host_page_range_err; + goto mmu_ctx_fini; } if (hdev->pmmu_huge_range) { - mutex_init(&ctx->host_huge_va_range->lock); + mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock); - rc = va_range_init(hdev, ctx->host_huge_va_range, - host_huge_range_start, - host_huge_range_end); + rc = va_range_init(hdev, + ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE], + host_huge_range_start, host_huge_range_end, + host_huge_page_size); if (rc) { dev_err(hdev->dev, "failed to init host huge vm range\n"); - goto host_hpage_range_err; + goto clear_host_va_range; } } else { - kfree(ctx->host_huge_va_range); - ctx->host_huge_va_range = ctx->host_va_range; + kfree(ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]); + ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE] = + ctx->va_range[HL_VA_RANGE_TYPE_HOST]; } - mutex_init(&ctx->dram_va_range->lock); + mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_DRAM]->lock); - rc = va_range_init(hdev, ctx->dram_va_range, dram_range_start, - dram_range_end); + rc = va_range_init(hdev, ctx->va_range[HL_VA_RANGE_TYPE_DRAM], + dram_range_start, dram_range_end, dram_page_size); if (rc) { dev_err(hdev->dev, "failed to init dram vm range\n"); - goto dram_vm_err; + goto clear_host_huge_va_range; } hl_debugfs_add_ctx_mem_hash(hdev, ctx); return 0; -dram_vm_err: - mutex_destroy(&ctx->dram_va_range->lock); +clear_host_huge_va_range: + mutex_destroy(&ctx->va_range[HL_VA_RANGE_TYPE_DRAM]->lock); if (hdev->pmmu_huge_range) { - mutex_lock(&ctx->host_huge_va_range->lock); - clear_va_list_locked(hdev, &ctx->host_huge_va_range->list); - mutex_unlock(&ctx->host_huge_va_range->lock); + mutex_lock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock); + clear_va_list_locked(hdev, + &ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->list); + mutex_unlock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock); } -host_hpage_range_err: +clear_host_va_range: if (hdev->pmmu_huge_range) - mutex_destroy(&ctx->host_huge_va_range->lock); - mutex_lock(&ctx->host_va_range->lock); - clear_va_list_locked(hdev, &ctx->host_va_range->list); - mutex_unlock(&ctx->host_va_range->lock); -host_page_range_err: - mutex_destroy(&ctx->host_va_range->lock); + mutex_destroy(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock); + mutex_lock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock); + clear_va_list_locked(hdev, &ctx->va_range[HL_VA_RANGE_TYPE_HOST]->list); + mutex_unlock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock); +mmu_ctx_fini: + mutex_destroy(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock); mutex_destroy(&ctx->mem_hash_lock); hl_mmu_ctx_fini(ctx); -mmu_ctx_err: - kfree(ctx->dram_va_range); -dram_va_range_err: - kfree(ctx->host_huge_va_range); -host_huge_va_range_err: - kfree(ctx->host_va_range); +free_va_range: + for (i = 0 ; i < HL_VA_RANGE_TYPE_MAX ; i++) + kfree(ctx->va_range[i]); return rc; } @@ -1676,6 +1810,7 @@ int hl_vm_ctx_init(struct hl_ctx *ctx) struct asic_fixed_properties *prop = &ctx->hdev->asic_prop; u64 host_range_start, host_range_end, host_huge_range_start, host_huge_range_end, dram_range_start, dram_range_end; + u32 host_page_size, host_huge_page_size, dram_page_size; atomic64_set(&ctx->dram_phys_mem, 0); @@ -1686,27 +1821,23 @@ int hl_vm_ctx_init(struct hl_ctx *ctx) * In case of DRAM mapping, the returned address is the physical * address of the memory related to the given handle. */ - if (ctx->hdev->mmu_enable) { - dram_range_start = prop->dmmu.start_addr; - dram_range_end = prop->dmmu.end_addr; - host_range_start = prop->pmmu.start_addr; - host_range_end = prop->pmmu.end_addr; - host_huge_range_start = prop->pmmu_huge.start_addr; - host_huge_range_end = prop->pmmu_huge.end_addr; - } else { - dram_range_start = prop->dram_user_base_address; - dram_range_end = prop->dram_end_address; - host_range_start = prop->dram_user_base_address; - host_range_end = prop->dram_end_address; - host_huge_range_start = prop->dram_user_base_address; - host_huge_range_end = prop->dram_end_address; - } + if (!ctx->hdev->mmu_enable) + return 0; + + dram_range_start = prop->dmmu.start_addr; + dram_range_end = prop->dmmu.end_addr; + dram_page_size = prop->dmmu.page_size; + host_range_start = prop->pmmu.start_addr; + host_range_end = prop->pmmu.end_addr; + host_page_size = prop->pmmu.page_size; + host_huge_range_start = prop->pmmu_huge.start_addr; + host_huge_range_end = prop->pmmu_huge.end_addr; + host_huge_page_size = prop->pmmu_huge.page_size; return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end, - host_huge_range_start, - host_huge_range_end, - dram_range_start, - dram_range_end); + host_page_size, host_huge_range_start, + host_huge_range_end, host_huge_page_size, + dram_range_start, dram_range_end, dram_page_size); } /* @@ -1738,6 +1869,9 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx) struct hlist_node *tmp_node; int i; + if (!ctx->hdev->mmu_enable) + return; + hl_debugfs_remove_ctx_mem_hash(hdev, ctx); /* @@ -1772,13 +1906,21 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx) } spin_unlock(&vm->idr_lock); - va_range_fini(hdev, ctx->dram_va_range); + va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_DRAM]); + va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST]); + if (hdev->pmmu_huge_range) - va_range_fini(hdev, ctx->host_huge_va_range); - va_range_fini(hdev, ctx->host_va_range); + va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]); mutex_destroy(&ctx->mem_hash_lock); hl_mmu_ctx_fini(ctx); + + /* In this case we need to clear the global accounting of DRAM usage + * because the user notifies us on allocations. If the user is no more, + * all DRAM is available + */ + if (!ctx->hdev->asic_prop.dram_supports_virtual_memory) + atomic64_set(&ctx->hdev->dram_used_mem, 0); } /* |