diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-09-03 10:34:44 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-09-03 10:34:44 -0700 |
commit | 3de18c865f504ab59ed2588b1e11acd4bcb9ea09 (patch) | |
tree | a718decfd27fef64f8f922eb210c99228e890632 /kernel/dma | |
parent | 14726903c835101cd8d0a703b609305094350d61 (diff) | |
parent | f3c4b1341e8320e63f197a554fc5a25686a11d22 (diff) | |
download | linux-3de18c865f504ab59ed2588b1e11acd4bcb9ea09.tar.bz2 |
Merge branch 'stable/for-linus-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/swiotlb
Pull swiotlb updates from Konrad Rzeszutek Wilk:
"A new feature called restricted DMA pools. It allows SWIOTLB to
utilize per-device (or per-platform) allocated memory pools instead of
using the global one.
The first big user of this is ARM Confidential Computing where the
memory for DMA operations can be set per platform"
* 'stable/for-linus-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/swiotlb: (23 commits)
swiotlb: use depends on for DMA_RESTRICTED_POOL
of: restricted dma: Don't fail device probe on rmem init failure
of: Move of_dma_set_restricted_buffer() into device.c
powerpc/svm: Don't issue ultracalls if !mem_encrypt_active()
s390/pv: fix the forcing of the swiotlb
swiotlb: Free tbl memory in swiotlb_exit()
swiotlb: Emit diagnostic in swiotlb_exit()
swiotlb: Convert io_default_tlb_mem to static allocation
of: Return success from of_dma_set_restricted_buffer() when !OF_ADDRESS
swiotlb: add overflow checks to swiotlb_bounce
swiotlb: fix implicit debugfs declarations
of: Add plumbing for restricted DMA pool
dt-bindings: of: Add restricted DMA pool
swiotlb: Add restricted DMA pool initialization
swiotlb: Add restricted DMA alloc/free support
swiotlb: Refactor swiotlb_tbl_unmap_single
swiotlb: Move alloc_size to swiotlb_find_slots
swiotlb: Use is_swiotlb_force_bounce for swiotlb data bouncing
swiotlb: Update is_swiotlb_active to add a struct device argument
swiotlb: Update is_swiotlb_buffer to add a struct device argument
...
Diffstat (limited to 'kernel/dma')
-rw-r--r-- | kernel/dma/Kconfig | 13 | ||||
-rw-r--r-- | kernel/dma/direct.c | 57 | ||||
-rw-r--r-- | kernel/dma/direct.h | 8 | ||||
-rw-r--r-- | kernel/dma/swiotlb.c | 352 |
4 files changed, 319 insertions, 111 deletions
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 725cfd51762b..1b02179758cb 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -80,6 +80,19 @@ config SWIOTLB bool select NEED_DMA_MAP_STATE +config DMA_RESTRICTED_POOL + bool "DMA Restricted Pool" + depends on OF && OF_RESERVED_MEM && SWIOTLB + help + This enables support for restricted DMA pools which provide a level of + DMA memory protection on systems with limited hardware protection + capabilities, such as those lacking an IOMMU. + + For more information see + <Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt> + and <kernel/dma/swiotlb.c>. + If unsure, say "n". + # # Should be selected if we can mmap non-coherent mappings to userspace. # The only thing that is really required is a way to set an uncached bit diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 8dca4f97d12d..4c6c5e0635e3 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -75,6 +75,15 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); } +static void __dma_direct_free_pages(struct device *dev, struct page *page, + size_t size) +{ + if (IS_ENABLED(CONFIG_DMA_RESTRICTED_POOL) && + swiotlb_free(dev, page, size)) + return; + dma_free_contiguous(dev, page, size); +} + static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, gfp_t gfp) { @@ -86,6 +95,16 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, &phys_limit); + if (IS_ENABLED(CONFIG_DMA_RESTRICTED_POOL) && + is_swiotlb_for_alloc(dev)) { + page = swiotlb_alloc(dev, size); + if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { + __dma_direct_free_pages(dev, page, size); + return NULL; + } + return page; + } + page = dma_alloc_contiguous(dev, size, gfp); if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { dma_free_contiguous(dev, page, size); @@ -142,7 +161,7 @@ void *dma_direct_alloc(struct device *dev, size_t size, gfp |= __GFP_NOWARN; if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && - !force_dma_unencrypted(dev)) { + !force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev)) { page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO); if (!page) return NULL; @@ -157,7 +176,8 @@ void *dma_direct_alloc(struct device *dev, size_t size, if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !IS_ENABLED(CONFIG_DMA_GLOBAL_POOL) && - !dev_is_dma_coherent(dev)) + !dev_is_dma_coherent(dev) && + !is_swiotlb_for_alloc(dev)) return arch_dma_alloc(dev, size, dma_handle, gfp, attrs); if (IS_ENABLED(CONFIG_DMA_GLOBAL_POOL) && @@ -167,11 +187,16 @@ void *dma_direct_alloc(struct device *dev, size_t size, /* * Remapping or decrypting memory may block. If either is required and * we can't block, allocate the memory from the atomic pools. + * If restricted DMA (i.e., is_swiotlb_for_alloc) is required, one must + * set up another device coherent pool by shared-dma-pool and use + * dma_alloc_from_dev_coherent instead. */ if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && !gfpflags_allow_blocking(gfp) && (force_dma_unencrypted(dev) || - (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !dev_is_dma_coherent(dev)))) + (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && + !dev_is_dma_coherent(dev))) && + !is_swiotlb_for_alloc(dev)) return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp); /* we always manually zero the memory once we are done */ @@ -242,7 +267,7 @@ out_encrypt_pages: return NULL; } out_free_pages: - dma_free_contiguous(dev, page, size); + __dma_direct_free_pages(dev, page, size); return NULL; } @@ -252,7 +277,7 @@ void dma_direct_free(struct device *dev, size_t size, unsigned int page_order = get_order(size); if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && - !force_dma_unencrypted(dev)) { + !force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev)) { /* cpu_addr is a struct page cookie, not a kernel address */ dma_free_contiguous(dev, cpu_addr, size); return; @@ -261,7 +286,8 @@ void dma_direct_free(struct device *dev, size_t size, if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !IS_ENABLED(CONFIG_DMA_GLOBAL_POOL) && - !dev_is_dma_coherent(dev)) { + !dev_is_dma_coherent(dev) && + !is_swiotlb_for_alloc(dev)) { arch_dma_free(dev, size, cpu_addr, dma_addr, attrs); return; } @@ -286,7 +312,7 @@ void dma_direct_free(struct device *dev, size_t size, else if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED)) arch_dma_clear_uncached(cpu_addr, size); - dma_free_contiguous(dev, dma_direct_to_page(dev, dma_addr), size); + __dma_direct_free_pages(dev, dma_direct_to_page(dev, dma_addr), size); } struct page *dma_direct_alloc_pages(struct device *dev, size_t size, @@ -296,7 +322,8 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size, void *ret; if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && - force_dma_unencrypted(dev) && !gfpflags_allow_blocking(gfp)) + force_dma_unencrypted(dev) && !gfpflags_allow_blocking(gfp) && + !is_swiotlb_for_alloc(dev)) return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp); page = __dma_direct_alloc_pages(dev, size, gfp); @@ -323,7 +350,7 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size, *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); return page; out_free_pages: - dma_free_contiguous(dev, page, size); + __dma_direct_free_pages(dev, page, size); return NULL; } @@ -342,7 +369,7 @@ void dma_direct_free_pages(struct device *dev, size_t size, if (force_dma_unencrypted(dev)) set_memory_encrypted((unsigned long)vaddr, 1 << page_order); - dma_free_contiguous(dev, page, size); + __dma_direct_free_pages(dev, page, size); } #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ @@ -356,7 +383,7 @@ void dma_direct_sync_sg_for_device(struct device *dev, for_each_sg(sgl, sg, nents, i) { phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg)); - if (unlikely(is_swiotlb_buffer(paddr))) + if (unlikely(is_swiotlb_buffer(dev, paddr))) swiotlb_sync_single_for_device(dev, paddr, sg->length, dir); @@ -382,7 +409,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev, if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_cpu(paddr, sg->length, dir); - if (unlikely(is_swiotlb_buffer(paddr))) + if (unlikely(is_swiotlb_buffer(dev, paddr))) swiotlb_sync_single_for_cpu(dev, paddr, sg->length, dir); @@ -510,8 +537,8 @@ int dma_direct_supported(struct device *dev, u64 mask) size_t dma_direct_max_mapping_size(struct device *dev) { /* If SWIOTLB is active, use its maximum mapping size */ - if (is_swiotlb_active() && - (dma_addressing_limited(dev) || swiotlb_force == SWIOTLB_FORCE)) + if (is_swiotlb_active(dev) && + (dma_addressing_limited(dev) || is_swiotlb_force_bounce(dev))) return swiotlb_max_mapping_size(dev); return SIZE_MAX; } @@ -519,7 +546,7 @@ size_t dma_direct_max_mapping_size(struct device *dev) bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr) { return !dev_is_dma_coherent(dev) || - is_swiotlb_buffer(dma_to_phys(dev, dma_addr)); + is_swiotlb_buffer(dev, dma_to_phys(dev, dma_addr)); } /** diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h index 50afc05b6f1d..4632b0f4f72e 100644 --- a/kernel/dma/direct.h +++ b/kernel/dma/direct.h @@ -56,7 +56,7 @@ static inline void dma_direct_sync_single_for_device(struct device *dev, { phys_addr_t paddr = dma_to_phys(dev, addr); - if (unlikely(is_swiotlb_buffer(paddr))) + if (unlikely(is_swiotlb_buffer(dev, paddr))) swiotlb_sync_single_for_device(dev, paddr, size, dir); if (!dev_is_dma_coherent(dev)) @@ -73,7 +73,7 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev, arch_sync_dma_for_cpu_all(); } - if (unlikely(is_swiotlb_buffer(paddr))) + if (unlikely(is_swiotlb_buffer(dev, paddr))) swiotlb_sync_single_for_cpu(dev, paddr, size, dir); if (dir == DMA_FROM_DEVICE) @@ -87,7 +87,7 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev, phys_addr_t phys = page_to_phys(page) + offset; dma_addr_t dma_addr = phys_to_dma(dev, phys); - if (unlikely(swiotlb_force == SWIOTLB_FORCE)) + if (is_swiotlb_force_bounce(dev)) return swiotlb_map(dev, phys, size, dir, attrs); if (unlikely(!dma_capable(dev, dma_addr, size, true))) { @@ -113,7 +113,7 @@ static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) dma_direct_sync_single_for_cpu(dev, addr, size, dir); - if (unlikely(is_swiotlb_buffer(phys))) + if (unlikely(is_swiotlb_buffer(dev, phys))) swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); } #endif /* _KERNEL_DMA_DIRECT_H */ diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index e50df8d8f87e..87c40517e822 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -39,6 +39,13 @@ #ifdef CONFIG_DEBUG_FS #include <linux/debugfs.h> #endif +#ifdef CONFIG_DMA_RESTRICTED_POOL +#include <linux/io.h> +#include <linux/of.h> +#include <linux/of_fdt.h> +#include <linux/of_reserved_mem.h> +#include <linux/slab.h> +#endif #include <asm/io.h> #include <asm/dma.h> @@ -63,7 +70,7 @@ enum swiotlb_force swiotlb_force; -struct io_tlb_mem *io_tlb_default_mem; +struct io_tlb_mem io_tlb_default_mem; /* * Max segment that we can provide which (if pages are contingous) will @@ -94,7 +101,7 @@ early_param("swiotlb", setup_io_tlb_npages); unsigned int swiotlb_max_segment(void) { - return io_tlb_default_mem ? max_segment : 0; + return io_tlb_default_mem.nslabs ? max_segment : 0; } EXPORT_SYMBOL_GPL(swiotlb_max_segment); @@ -127,9 +134,9 @@ void __init swiotlb_adjust_size(unsigned long size) void swiotlb_print_info(void) { - struct io_tlb_mem *mem = io_tlb_default_mem; + struct io_tlb_mem *mem = &io_tlb_default_mem; - if (!mem) { + if (!mem->nslabs) { pr_warn("No low mem\n"); return; } @@ -156,11 +163,11 @@ static inline unsigned long nr_slots(u64 val) */ void __init swiotlb_update_mem_attributes(void) { - struct io_tlb_mem *mem = io_tlb_default_mem; + struct io_tlb_mem *mem = &io_tlb_default_mem; void *vaddr; unsigned long bytes; - if (!mem || mem->late_alloc) + if (!mem->nslabs || mem->late_alloc) return; vaddr = phys_to_virt(mem->start); bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT); @@ -168,36 +175,50 @@ void __init swiotlb_update_mem_attributes(void) memset(vaddr, 0, bytes); } -int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) +static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, + unsigned long nslabs, bool late_alloc) { + void *vaddr = phys_to_virt(start); unsigned long bytes = nslabs << IO_TLB_SHIFT, i; - struct io_tlb_mem *mem; - size_t alloc_size; - - if (swiotlb_force == SWIOTLB_NO_FORCE) - return 0; - - /* protect against double initialization */ - if (WARN_ON_ONCE(io_tlb_default_mem)) - return -ENOMEM; - alloc_size = PAGE_ALIGN(struct_size(mem, slots, nslabs)); - mem = memblock_alloc(alloc_size, PAGE_SIZE); - if (!mem) - panic("%s: Failed to allocate %zu bytes align=0x%lx\n", - __func__, alloc_size, PAGE_SIZE); mem->nslabs = nslabs; - mem->start = __pa(tlb); + mem->start = start; mem->end = mem->start + bytes; mem->index = 0; + mem->late_alloc = late_alloc; + + if (swiotlb_force == SWIOTLB_FORCE) + mem->force_bounce = true; + spin_lock_init(&mem->lock); for (i = 0; i < mem->nslabs; i++) { mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i); mem->slots[i].orig_addr = INVALID_PHYS_ADDR; mem->slots[i].alloc_size = 0; } + memset(vaddr, 0, bytes); +} + +int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) +{ + struct io_tlb_mem *mem = &io_tlb_default_mem; + size_t alloc_size; + + if (swiotlb_force == SWIOTLB_NO_FORCE) + return 0; + + /* protect against double initialization */ + if (WARN_ON_ONCE(mem->nslabs)) + return -ENOMEM; + + alloc_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), nslabs)); + mem->slots = memblock_alloc(alloc_size, PAGE_SIZE); + if (!mem->slots) + panic("%s: Failed to allocate %zu bytes align=0x%lx\n", + __func__, alloc_size, PAGE_SIZE); + + swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, false); - io_tlb_default_mem = mem; if (verbose) swiotlb_print_info(); swiotlb_set_max_segment(mem->nslabs << IO_TLB_SHIFT); @@ -282,37 +303,24 @@ swiotlb_late_init_with_default_size(size_t default_size) int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) { - unsigned long bytes = nslabs << IO_TLB_SHIFT, i; - struct io_tlb_mem *mem; + struct io_tlb_mem *mem = &io_tlb_default_mem; + unsigned long bytes = nslabs << IO_TLB_SHIFT; if (swiotlb_force == SWIOTLB_NO_FORCE) return 0; /* protect against double initialization */ - if (WARN_ON_ONCE(io_tlb_default_mem)) + if (WARN_ON_ONCE(mem->nslabs)) return -ENOMEM; - mem = (void *)__get_free_pages(GFP_KERNEL, - get_order(struct_size(mem, slots, nslabs))); - if (!mem) + mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(array_size(sizeof(*mem->slots), nslabs))); + if (!mem->slots) return -ENOMEM; - mem->nslabs = nslabs; - mem->start = virt_to_phys(tlb); - mem->end = mem->start + bytes; - mem->index = 0; - mem->late_alloc = 1; - spin_lock_init(&mem->lock); - for (i = 0; i < mem->nslabs; i++) { - mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i); - mem->slots[i].orig_addr = INVALID_PHYS_ADDR; - mem->slots[i].alloc_size = 0; - } - set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT); - memset(tlb, 0, bytes); + swiotlb_init_io_tlb_mem(mem, virt_to_phys(tlb), nslabs, true); - io_tlb_default_mem = mem; swiotlb_print_info(); swiotlb_set_max_segment(mem->nslabs << IO_TLB_SHIFT); return 0; @@ -320,18 +328,28 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) void __init swiotlb_exit(void) { - struct io_tlb_mem *mem = io_tlb_default_mem; - size_t size; + struct io_tlb_mem *mem = &io_tlb_default_mem; + unsigned long tbl_vaddr; + size_t tbl_size, slots_size; - if (!mem) + if (!mem->nslabs) return; - size = struct_size(mem, slots, mem->nslabs); - if (mem->late_alloc) - free_pages((unsigned long)mem, get_order(size)); - else - memblock_free_late(__pa(mem), PAGE_ALIGN(size)); - io_tlb_default_mem = NULL; + pr_info("tearing down default memory pool\n"); + tbl_vaddr = (unsigned long)phys_to_virt(mem->start); + tbl_size = PAGE_ALIGN(mem->end - mem->start); + slots_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), mem->nslabs)); + + set_memory_encrypted(tbl_vaddr, tbl_size >> PAGE_SHIFT); + if (mem->late_alloc) { + free_pages(tbl_vaddr, get_order(tbl_size)); + free_pages((unsigned long)mem->slots, get_order(slots_size)); + } else { + memblock_free_late(mem->start, tbl_size); + memblock_free_late(__pa(mem->slots), slots_size); + } + + memset(mem, 0, sizeof(*mem)); } /* @@ -348,19 +366,33 @@ static unsigned int swiotlb_align_offset(struct device *dev, u64 addr) static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size, enum dma_data_direction dir) { - struct io_tlb_mem *mem = io_tlb_default_mem; + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT; phys_addr_t orig_addr = mem->slots[index].orig_addr; size_t alloc_size = mem->slots[index].alloc_size; unsigned long pfn = PFN_DOWN(orig_addr); unsigned char *vaddr = phys_to_virt(tlb_addr); - unsigned int tlb_offset; + unsigned int tlb_offset, orig_addr_offset; if (orig_addr == INVALID_PHYS_ADDR) return; - tlb_offset = (tlb_addr & (IO_TLB_SIZE - 1)) - - swiotlb_align_offset(dev, orig_addr); + tlb_offset = tlb_addr & (IO_TLB_SIZE - 1); + orig_addr_offset = swiotlb_align_offset(dev, orig_addr); + if (tlb_offset < orig_addr_offset) { + dev_WARN_ONCE(dev, 1, + "Access before mapping start detected. orig offset %u, requested offset %u.\n", + orig_addr_offset, tlb_offset); + return; + } + + tlb_offset -= orig_addr_offset; + if (tlb_offset > alloc_size) { + dev_WARN_ONCE(dev, 1, + "Buffer overflow detected. Allocation size: %zu. Mapping size: %zu+%u.\n", + alloc_size, size, tlb_offset); + return; + } orig_addr += tlb_offset; alloc_size -= tlb_offset; @@ -426,10 +458,10 @@ static unsigned int wrap_index(struct io_tlb_mem *mem, unsigned int index) * Find a suitable number of IO TLB entries size that will fit this request and * allocate a buffer from that IO TLB pool. */ -static int find_slots(struct device *dev, phys_addr_t orig_addr, - size_t alloc_size) +static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, + size_t alloc_size) { - struct io_tlb_mem *mem = io_tlb_default_mem; + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; unsigned long boundary_mask = dma_get_seg_boundary(dev); dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(dev, mem->start) & boundary_mask; @@ -438,6 +470,7 @@ static int find_slots(struct device *dev, phys_addr_t orig_addr, dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1); unsigned int nslots = nr_slots(alloc_size), stride; unsigned int index, wrap, count = 0, i; + unsigned int offset = swiotlb_align_offset(dev, orig_addr); unsigned long flags; BUG_ON(!nslots); @@ -457,8 +490,9 @@ static int find_slots(struct device *dev, phys_addr_t orig_addr, index = wrap = wrap_index(mem, ALIGN(mem->index, stride)); do { - if ((slot_addr(tbl_dma_addr, index) & iotlb_align_mask) != - (orig_addr & iotlb_align_mask)) { + if (orig_addr && + (slot_addr(tbl_dma_addr, index) & iotlb_align_mask) != + (orig_addr & iotlb_align_mask)) { index = wrap_index(mem, index + 1); continue; } @@ -482,8 +516,11 @@ not_found: return -1; found: - for (i = index; i < index + nslots; i++) + for (i = index; i < index + nslots; i++) { mem->slots[i].list = 0; + mem->slots[i].alloc_size = + alloc_size - (offset + ((i - index) << IO_TLB_SHIFT)); + } for (i = index - 1; io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && mem->slots[i].list; i--) @@ -506,7 +543,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, size_t mapping_size, size_t alloc_size, enum dma_data_direction dir, unsigned long attrs) { - struct io_tlb_mem *mem = io_tlb_default_mem; + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; unsigned int offset = swiotlb_align_offset(dev, orig_addr); unsigned int i; int index; @@ -524,7 +561,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, return (phys_addr_t)DMA_MAPPING_ERROR; } - index = find_slots(dev, orig_addr, alloc_size + offset); + index = swiotlb_find_slots(dev, orig_addr, alloc_size + offset); if (index == -1) { if (!(attrs & DMA_ATTR_NO_WARN)) dev_warn_ratelimited(dev, @@ -538,11 +575,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, * This is needed when we sync the memory. Then we sync the buffer if * needed. */ - for (i = 0; i < nr_slots(alloc_size + offset); i++) { + for (i = 0; i < nr_slots(alloc_size + offset); i++) mem->slots[index + i].orig_addr = slot_addr(orig_addr, i); - mem->slots[index + i].alloc_size = - alloc_size - (i << IO_TLB_SHIFT); - } tlb_addr = slot_addr(mem->start, index) + offset; if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) @@ -550,28 +584,16 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, return tlb_addr; } -/* - * tlb_addr is the physical address of the bounce buffer to unmap. - */ -void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, - size_t mapping_size, enum dma_data_direction dir, - unsigned long attrs) +static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) { - struct io_tlb_mem *mem = io_tlb_default_mem; + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; unsigned long flags; - unsigned int offset = swiotlb_align_offset(hwdev, tlb_addr); + unsigned int offset = swiotlb_align_offset(dev, tlb_addr); int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT; int nslots = nr_slots(mem->slots[index].alloc_size + offset); int count, i; /* - * First, sync the memory before unmapping the entry - */ - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(hwdev, tlb_addr, mapping_size, DMA_FROM_DEVICE); - - /* * Return the buffer to the free list by setting the corresponding * entries to indicate the number of contiguous entries available. * While returning the entries to the free list, we merge the entries @@ -605,6 +627,23 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, spin_unlock_irqrestore(&mem->lock, flags); } +/* + * tlb_addr is the physical address of the bounce buffer to unmap. + */ +void swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr, + size_t mapping_size, enum dma_data_direction dir, + unsigned long attrs) +{ + /* + * First, sync the memory before unmapping the entry + */ + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && + (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) + swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_FROM_DEVICE); + + swiotlb_release_slots(dev, tlb_addr); +} + void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr, size_t size, enum dma_data_direction dir) { @@ -662,26 +701,155 @@ size_t swiotlb_max_mapping_size(struct device *dev) return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE; } -bool is_swiotlb_active(void) +bool is_swiotlb_active(struct device *dev) { - return io_tlb_default_mem != NULL; + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + + return mem && mem->nslabs; } EXPORT_SYMBOL_GPL(is_swiotlb_active); #ifdef CONFIG_DEBUG_FS +static struct dentry *debugfs_dir; -static int __init swiotlb_create_debugfs(void) +static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem) { - struct io_tlb_mem *mem = io_tlb_default_mem; - - if (!mem) - return 0; - mem->debugfs = debugfs_create_dir("swiotlb", NULL); debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs); debugfs_create_ulong("io_tlb_used", 0400, mem->debugfs, &mem->used); +} + +static int __init swiotlb_create_default_debugfs(void) +{ + struct io_tlb_mem *mem = &io_tlb_default_mem; + + debugfs_dir = debugfs_create_dir("swiotlb", NULL); + if (mem->nslabs) { + mem->debugfs = debugfs_dir; + swiotlb_create_debugfs_files(mem); + } return 0; } -late_initcall(swiotlb_create_debugfs); +late_initcall(swiotlb_create_default_debugfs); + +#endif +#ifdef CONFIG_DMA_RESTRICTED_POOL + +#ifdef CONFIG_DEBUG_FS +static void rmem_swiotlb_debugfs_init(struct reserved_mem *rmem) +{ + struct io_tlb_mem *mem = rmem->priv; + + mem->debugfs = debugfs_create_dir(rmem->name, debugfs_dir); + swiotlb_create_debugfs_files(mem); +} +#else +static void rmem_swiotlb_debugfs_init(struct reserved_mem *rmem) +{ +} #endif + +struct page *swiotlb_alloc(struct device *dev, size_t size) +{ + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + phys_addr_t tlb_addr; + int index; + + if (!mem) + return NULL; + + index = swiotlb_find_slots(dev, 0, size); + if (index == -1) + return NULL; + + tlb_addr = slot_addr(mem->start, index); + + return pfn_to_page(PFN_DOWN(tlb_addr)); +} + +bool swiotlb_free(struct device *dev, struct page *page, size_t size) +{ + phys_addr_t tlb_addr = page_to_phys(page); + + if (!is_swiotlb_buffer(dev, tlb_addr)) + return false; + + swiotlb_release_slots(dev, tlb_addr); + + return true; +} + +static int rmem_swiotlb_device_init(struct reserved_mem *rmem, + struct device *dev) +{ + struct io_tlb_mem *mem = rmem->priv; + unsigned long nslabs = rmem->size >> IO_TLB_SHIFT; + + /* + * Since multiple devices can share the same pool, the private data, + * io_tlb_mem struct, will be initialized by the first device attached + * to it. + */ + if (!mem) { + mem = kzalloc(sizeof(*mem), GFP_KERNEL); + if (!mem) + return -ENOMEM; + + mem->slots = kzalloc(array_size(sizeof(*mem->slots), nslabs), + GFP_KERNEL); + if (!mem->slots) { + kfree(mem); + return -ENOMEM; + } + + set_memory_decrypted((unsigned long)phys_to_virt(rmem->base), + rmem->size >> PAGE_SHIFT); + swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, false); + mem->force_bounce = true; + mem->for_alloc = true; + + rmem->priv = mem; + + rmem_swiotlb_debugfs_init(rmem); + } + + dev->dma_io_tlb_mem = mem; + + return 0; +} + +static void rmem_swiotlb_device_release(struct reserved_mem *rmem, + struct device *dev) +{ + dev->dma_io_tlb_mem = &io_tlb_default_mem; +} + +static const struct reserved_mem_ops rmem_swiotlb_ops = { + .device_init = rmem_swiotlb_device_init, + .device_release = rmem_swiotlb_device_release, +}; + +static int __init rmem_swiotlb_setup(struct reserved_mem *rmem) +{ + unsigned long node = rmem->fdt_node; + + if (of_get_flat_dt_prop(node, "reusable", NULL) || + of_get_flat_dt_prop(node, "linux,cma-default", NULL) || + of_get_flat_dt_prop(node, "linux,dma-default", NULL) || + of_get_flat_dt_prop(node, "no-map", NULL)) + return -EINVAL; + + if (PageHighMem(pfn_to_page(PHYS_PFN(rmem->base)))) { + pr_err("Restricted DMA pool must be accessible within the linear mapping."); + return -EINVAL; + } + + rmem->ops = &rmem_swiotlb_ops; + pr_info("Reserved memory: created restricted DMA pool at %pa, size %ld MiB\n", + &rmem->base, (unsigned long)rmem->size / SZ_1M); + return 0; +} + +RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", rmem_swiotlb_setup); +#endif /* CONFIG_DMA_RESTRICTED_POOL */ |