From 1522ac3ec95ff0230e7aa516f86b674fdf72866c Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 12 Mar 2009 17:03:48 +0000 Subject: [ARM] Fix virtual to physical translation macro corner cases The current use of these macros works well when the conversion is entirely linear. In this case, we can be assured that the following holds true: __va(p + s) - s = __va(p) However, this is not always the case, especially when there is a non-linear conversion (eg, when there is a 3.5GB hole in memory.) In this case, if 's' is the size of the region (eg, PAGE_SIZE) and 'p' is the final page, the above is most definitely not true. So, we must ensure that __va() and __pa() are only used with valid kernel direct mapped RAM addresses. This patch tweaks the code to achieve this. Tested-by: Charles Moschel Signed-off-by: Russell King --- arch/arm/mm/dma-mapping.c | 20 ++++++++++++-------- arch/arm/mm/init.c | 2 +- arch/arm/mm/mmap.c | 2 +- 3 files changed, 14 insertions(+), 10 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 310e479309ef..f1ef5613ccd4 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -490,26 +490,30 @@ core_initcall(consistent_init); */ void dma_cache_maint(const void *start, size_t size, int direction) { - const void *end = start + size; + void (*inner_op)(const void *, const void *); + void (*outer_op)(unsigned long, unsigned long); - BUG_ON(!virt_addr_valid(start) || !virt_addr_valid(end - 1)); + BUG_ON(!virt_addr_valid(start) || !virt_addr_valid(start + size - 1)); switch (direction) { case DMA_FROM_DEVICE: /* invalidate only */ - dmac_inv_range(start, end); - outer_inv_range(__pa(start), __pa(end)); + inner_op = dmac_inv_range; + outer_op = outer_inv_range; break; case DMA_TO_DEVICE: /* writeback only */ - dmac_clean_range(start, end); - outer_clean_range(__pa(start), __pa(end)); + inner_op = dmac_clean_range; + outer_op = outer_clean_range; break; case DMA_BIDIRECTIONAL: /* writeback and invalidate */ - dmac_flush_range(start, end); - outer_flush_range(__pa(start), __pa(end)); + inner_op = dmac_flush_range; + outer_op = outer_flush_range; break; default: BUG(); } + + inner_op(start, start + size); + outer_op(__pa(start), __pa(start) + size); } EXPORT_SYMBOL(dma_cache_maint); diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 34df4d9d03a6..80fd3b69ae1f 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -382,7 +382,7 @@ void __init bootmem_init(void) for_each_node(node) bootmem_free_node(node, mi); - high_memory = __va(memend_pfn << PAGE_SHIFT); + high_memory = __va((memend_pfn << PAGE_SHIFT) - 1) + 1; /* * This doesn't seem to be used by the Linux memory manager any diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 5358fcc7f61e..f7457fea6de8 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -124,7 +124,7 @@ int valid_phys_addr_range(unsigned long addr, size_t size) { if (addr < PHYS_OFFSET) return 0; - if (addr + size > __pa(high_memory)) + if (addr + size >= __pa(high_memory - 1)) return 0; return 1; -- cgit v1.2.3 From 5f0fbf9ecaf354fa4bbf266fffdea2ea3d14a0ed Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 16 Sep 2008 13:05:53 -0400 Subject: [ARM] fixmap support This is the minimum fixmap interface expected to be implemented by architectures supporting highmem. We have a second level page table already allocated and covering 0xfff00000-0xffffffff because the exception vector page is located at 0xffff0000, and various cache tricks already use some entries above 0xffff0000. Therefore the PTEs covering 0xfff00000-0xfffeffff are free to be used. However the XScale cache flushing code already uses virtual addresses between 0xfffe0000 and 0xfffeffff. So this reserves the 0xfff00000-0xfffdffff range for fixmap stuff. The Documentation/arm/memory.txt information is updated accordingly, including the information about the actual top of DMA memory mapping region which didn't match the code. Signed-off-by: Nicolas Pitre --- Documentation/arm/memory.txt | 9 ++++++++- arch/arm/include/asm/fixmap.h | 41 +++++++++++++++++++++++++++++++++++++++++ arch/arm/mm/mm.h | 3 +-- 3 files changed, 50 insertions(+), 3 deletions(-) create mode 100644 arch/arm/include/asm/fixmap.h (limited to 'arch/arm/mm') diff --git a/Documentation/arm/memory.txt b/Documentation/arm/memory.txt index dc6045577a8b..43cb1004d35f 100644 --- a/Documentation/arm/memory.txt +++ b/Documentation/arm/memory.txt @@ -29,7 +29,14 @@ ffff0000 ffff0fff CPU vector page. CPU supports vector relocation (control register V bit.) -ffc00000 fffeffff DMA memory mapping region. Memory returned +fffe0000 fffeffff XScale cache flush area. This is used + in proc-xscale.S to flush the whole data + cache. Free for other usage on non-XScale. + +fff00000 fffdffff Fixmap mapping region. Addresses provided + by fix_to_virt() will be located here. + +ffc00000 ffefffff DMA memory mapping region. Memory returned by the dma_alloc_xxx functions will be dynamically mapped here. diff --git a/arch/arm/include/asm/fixmap.h b/arch/arm/include/asm/fixmap.h new file mode 100644 index 000000000000..bbae919bceb4 --- /dev/null +++ b/arch/arm/include/asm/fixmap.h @@ -0,0 +1,41 @@ +#ifndef _ASM_FIXMAP_H +#define _ASM_FIXMAP_H + +/* + * Nothing too fancy for now. + * + * On ARM we already have well known fixed virtual addresses imposed by + * the architecture such as the vector page which is located at 0xffff0000, + * therefore a second level page table is already allocated covering + * 0xfff00000 upwards. + * + * The cache flushing code in proc-xscale.S uses the virtual area between + * 0xfffe0000 and 0xfffeffff. + */ + +#define FIXADDR_START 0xfff00000UL +#define FIXADDR_TOP 0xfffe0000UL +#define FIXADDR_SIZE (FIXADDR_TOP - FIXADDR_START) + +#define FIX_KMAP_BEGIN 0 +#define FIX_KMAP_END (FIXADDR_SIZE >> PAGE_SHIFT) + +#define __fix_to_virt(x) (FIXADDR_START + ((x) << PAGE_SHIFT)) +#define __virt_to_fix(x) (((x) - FIXADDR_START) >> PAGE_SHIFT) + +extern void __this_fixmap_does_not_exist(void); + +static inline unsigned long fix_to_virt(const unsigned int idx) +{ + if (idx >= FIX_KMAP_END) + __this_fixmap_does_not_exist(); + return __fix_to_virt(idx); +} + +static inline unsigned int virt_to_fix(const unsigned long vaddr) +{ + BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); + return __virt_to_fix(vaddr); +} + +#endif diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h index 95bbe112965e..c4f6f05198e0 100644 --- a/arch/arm/mm/mm.h +++ b/arch/arm/mm/mm.h @@ -1,7 +1,6 @@ -/* the upper-most page table pointer */ - #ifdef CONFIG_MMU +/* the upper-most page table pointer */ extern pmd_t *top_pmd; #define TOP_PTE(x) pte_offset_kernel(top_pmd, x) -- cgit v1.2.3 From d73cd42893f4cdc06e6829fea2347bb92cb789d1 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 15 Sep 2008 16:44:55 -0400 Subject: [ARM] kmap support The kmap virtual area borrows a 2MB range at the top of the 16MB area below PAGE_OFFSET currently reserved for kernel modules and/or the XIP kernel. This 2MB corresponds to the range covered by 2 consecutive second-level page tables, or a single pmd entry as seen by the Linux page table abstraction. Because XIP kernels are unlikely to be seen on systems needing highmem support, there shouldn't be any shortage of VM space for modules (14 MB for modules is still way more than twice the typical usage). Because the virtual mapping of highmem pages can go away at any moment after kunmap() is called on them, we need to bypass the delayed cache flushing provided by flush_dcache_page() in that case. The atomic kmap versions are based on fixmaps, and __cpuc_flush_dcache_page() is used directly in that case. Signed-off-by: Nicolas Pitre --- arch/arm/include/asm/highmem.h | 28 ++++++++++ arch/arm/include/asm/memory.h | 13 +++-- arch/arm/mm/Makefile | 1 + arch/arm/mm/flush.c | 2 +- arch/arm/mm/highmem.c | 116 +++++++++++++++++++++++++++++++++++++++++ arch/arm/mm/mmu.c | 13 +++++ 6 files changed, 169 insertions(+), 4 deletions(-) create mode 100644 arch/arm/include/asm/highmem.h create mode 100644 arch/arm/mm/highmem.c (limited to 'arch/arm/mm') diff --git a/arch/arm/include/asm/highmem.h b/arch/arm/include/asm/highmem.h new file mode 100644 index 000000000000..023d5b374544 --- /dev/null +++ b/arch/arm/include/asm/highmem.h @@ -0,0 +1,28 @@ +#ifndef _ASM_HIGHMEM_H +#define _ASM_HIGHMEM_H + +#include + +#define PKMAP_BASE (PAGE_OFFSET - PMD_SIZE) +#define LAST_PKMAP PTRS_PER_PTE +#define LAST_PKMAP_MASK (LAST_PKMAP - 1) +#define PKMAP_NR(virt) (((virt) - PKMAP_BASE) >> PAGE_SHIFT) +#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) + +#define kmap_prot PAGE_KERNEL + +#define flush_cache_kmaps() flush_cache_all() + +extern pte_t *pkmap_page_table; + +extern void *kmap_high(struct page *page); +extern void kunmap_high(struct page *page); + +extern void *kmap(struct page *page); +extern void kunmap(struct page *page); +extern void *kmap_atomic(struct page *page, enum km_type type); +extern void kunmap_atomic(void *kvaddr, enum km_type type); +extern void *kmap_atomic_pfn(unsigned long pfn, enum km_type type); +extern struct page *kmap_atomic_to_page(const void *ptr); + +#endif diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 0202a7c20e62..ae472bc376d3 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -44,13 +44,20 @@ * The module space lives between the addresses given by TASK_SIZE * and PAGE_OFFSET - it must be within 32MB of the kernel text. */ -#define MODULES_END (PAGE_OFFSET) -#define MODULES_VADDR (MODULES_END - 16*1048576) - +#define MODULES_VADDR (PAGE_OFFSET - 16*1024*1024) #if TASK_SIZE > MODULES_VADDR #error Top of user space clashes with start of module space #endif +/* + * The highmem pkmap virtual space shares the end of the module area. + */ +#ifdef CONFIG_HIGHMEM +#define MODULES_END (PAGE_OFFSET - PMD_SIZE) +#else +#define MODULES_END (PAGE_OFFSET) +#endif + /* * The XIP kernel gets mapped at the bottom of the module vm area. * Since we use sections to map it, this macro replaces the physical address diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 480f78a3611a..185e7dc7dcf2 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_MODULES) += proc-syms.o obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o obj-$(CONFIG_DISCONTIGMEM) += discontig.o +obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_CPU_ABRT_NOMMU) += abort-nommu.o obj-$(CONFIG_CPU_ABRT_EV4) += abort-ev4.o diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 0fa9bf388f0b..4e283481cee1 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -192,7 +192,7 @@ void flush_dcache_page(struct page *page) struct address_space *mapping = page_mapping(page); #ifndef CONFIG_SMP - if (mapping && !mapping_mapped(mapping)) + if (!PageHighMem(page) && mapping && !mapping_mapped(mapping)) set_bit(PG_dcache_dirty, &page->flags); else #endif diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c new file mode 100644 index 000000000000..a34954d9df7d --- /dev/null +++ b/arch/arm/mm/highmem.c @@ -0,0 +1,116 @@ +/* + * arch/arm/mm/highmem.c -- ARM highmem support + * + * Author: Nicolas Pitre + * Created: september 8, 2008 + * Copyright: Marvell Semiconductors Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include "mm.h" + +void *kmap(struct page *page) +{ + might_sleep(); + if (!PageHighMem(page)) + return page_address(page); + return kmap_high(page); +} +EXPORT_SYMBOL(kmap); + +void kunmap(struct page *page) +{ + BUG_ON(in_interrupt()); + if (!PageHighMem(page)) + return; + kunmap_high(page); +} +EXPORT_SYMBOL(kunmap); + +void *kmap_atomic(struct page *page, enum km_type type) +{ + unsigned int idx; + unsigned long vaddr; + + pagefault_disable(); + if (!PageHighMem(page)) + return page_address(page); + + idx = type + KM_TYPE_NR * smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); +#ifdef CONFIG_DEBUG_HIGHMEM + /* + * With debugging enabled, kunmap_atomic forces that entry to 0. + * Make sure it was indeed properly unmapped. + */ + BUG_ON(!pte_none(*(TOP_PTE(vaddr)))); +#endif + set_pte_ext(TOP_PTE(vaddr), mk_pte(page, kmap_prot), 0); + /* + * When debugging is off, kunmap_atomic leaves the previous mapping + * in place, so this TLB flush ensures the TLB is updated with the + * new mapping. + */ + local_flush_tlb_kernel_page(vaddr); + + return (void *)vaddr; +} +EXPORT_SYMBOL(kmap_atomic); + +void kunmap_atomic(void *kvaddr, enum km_type type) +{ + unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; + unsigned int idx = type + KM_TYPE_NR * smp_processor_id(); + + if (kvaddr >= (void *)FIXADDR_START) { + __cpuc_flush_dcache_page((void *)vaddr); +#ifdef CONFIG_DEBUG_HIGHMEM + BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); + set_pte_ext(TOP_PTE(vaddr), __pte(0), 0); + local_flush_tlb_kernel_page(vaddr); +#else + (void) idx; /* to kill a warning */ +#endif + } + pagefault_enable(); +} +EXPORT_SYMBOL(kunmap_atomic); + +void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) +{ + unsigned int idx; + unsigned long vaddr; + + pagefault_disable(); + + idx = type + KM_TYPE_NR * smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); +#ifdef CONFIG_DEBUG_HIGHMEM + BUG_ON(!pte_none(*(TOP_PTE(vaddr)))); +#endif + set_pte_ext(TOP_PTE(vaddr), pfn_pte(pfn, kmap_prot), 0); + local_flush_tlb_kernel_page(vaddr); + + return (void *)vaddr; +} + +struct page *kmap_atomic_to_page(const void *ptr) +{ + unsigned long vaddr = (unsigned long)ptr; + pte_t *pte; + + if (vaddr < FIXADDR_START) + return virt_to_page(ptr); + + pte = TOP_PTE(vaddr); + return pte_page(*pte); +} diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index d4d082c5c2d4..4810a4c9ffce 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -895,6 +896,17 @@ static void __init devicemaps_init(struct machine_desc *mdesc) flush_cache_all(); } +static void __init kmap_init(void) +{ +#ifdef CONFIG_HIGHMEM + pmd_t *pmd = pmd_off_k(PKMAP_BASE); + pte_t *pte = alloc_bootmem_low_pages(2 * PTRS_PER_PTE * sizeof(pte_t)); + BUG_ON(!pmd_none(*pmd) || !pte); + __pmd_populate(pmd, __pa(pte) | _PAGE_KERNEL_TABLE); + pkmap_page_table = pte + PTRS_PER_PTE; +#endif +} + /* * paging_init() sets up the page tables, initialises the zone memory * maps, and sets up the zero page, bad page and bad page tables. @@ -908,6 +920,7 @@ void __init paging_init(struct machine_desc *mdesc) prepare_page_table(); bootmem_init(); devicemaps_init(mdesc); + kmap_init(); top_pmd = pmd_off_k(0xffff0000); -- cgit v1.2.3 From 3835f6cb645bdb9a58aa6e062fe1d5777f1a9748 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 17 Sep 2008 15:21:55 -0400 Subject: [ARM] mem_init(): make highmem pages available for use Signed-off-by: Nicolas Pitre --- arch/arm/mm/init.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 80fd3b69ae1f..8277802ec859 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -485,7 +486,7 @@ void __init mem_init(void) int i, node; #ifndef CONFIG_DISCONTIGMEM - max_mapnr = virt_to_page(high_memory) - mem_map; + max_mapnr = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map; #endif /* this will put all unused low memory onto the freelists */ @@ -504,6 +505,19 @@ void __init mem_init(void) __phys_to_pfn(__pa(swapper_pg_dir)), NULL); #endif +#ifdef CONFIG_HIGHMEM + /* set highmem page free */ + for_each_online_node(node) { + for_each_nodebank (i, &meminfo, node) { + unsigned long start = bank_pfn_start(&meminfo.bank[i]); + unsigned long end = bank_pfn_end(&meminfo.bank[i]); + if (start >= max_low_pfn + PHYS_PFN_OFFSET) + totalhigh_pages += free_area(start, end, NULL); + } + } + totalram_pages += totalhigh_pages; +#endif + /* * Since our memory may not be contiguous, calculate the * real number of pages we have in this system @@ -521,9 +535,10 @@ void __init mem_init(void) initsize = __init_end - __init_begin; printk(KERN_NOTICE "Memory: %luKB available (%dK code, " - "%dK data, %dK init)\n", + "%dK data, %dK init, %luK highmem)\n", (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), - codesize >> 10, datasize >> 10, initsize >> 10); + codesize >> 10, datasize >> 10, initsize >> 10, + (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))); if (PAGE_SIZE >= 16384 && num_physpages <= 128) { extern int sysctl_overcommit_memory; -- cgit v1.2.3 From 43377453af83b8ff8c1c731da1508bd6b84ebfea Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 12 Mar 2009 22:52:09 -0400 Subject: [ARM] introduce dma_cache_maint_page() This is a helper to be used by the DMA mapping API to handle cache maintenance for memory identified by a page structure instead of a virtual address. Those pages may or may not be highmem pages, and when they're highmem pages, they may or may not be virtually mapped. When they're not mapped then there is no L1 cache to worry about. But even in that case the L2 cache must be processed since unmapped highmem pages can still be L2 cached. Signed-off-by: Nicolas Pitre --- arch/arm/include/asm/dma-mapping.h | 4 ++- arch/arm/include/asm/highmem.h | 3 ++ arch/arm/mm/dma-mapping.c | 72 +++++++++++++++++++++++++++++++++++++- 3 files changed, 77 insertions(+), 2 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index 22cb14ec3438..59fa762e9c66 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -57,6 +57,8 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) * Use the driver DMA support - see dma-mapping.h (dma_sync_*) */ extern void dma_cache_maint(const void *kaddr, size_t size, int rw); +extern void dma_cache_maint_page(struct page *page, unsigned long offset, + size_t size, int rw); /* * Return whether the given device DMA address mask can be supported @@ -316,7 +318,7 @@ static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, BUG_ON(!valid_dma_direction(dir)); if (!arch_is_coherent()) - dma_cache_maint(page_address(page) + offset, size, dir); + dma_cache_maint_page(page, offset, size, dir); return page_to_dma(dev, page) + offset; } diff --git a/arch/arm/include/asm/highmem.h b/arch/arm/include/asm/highmem.h index 023d5b374544..7f36d00600b4 100644 --- a/arch/arm/include/asm/highmem.h +++ b/arch/arm/include/asm/highmem.h @@ -15,7 +15,10 @@ extern pte_t *pkmap_page_table; +#define ARCH_NEEDS_KMAP_HIGH_GET + extern void *kmap_high(struct page *page); +extern void *kmap_high_get(struct page *page); extern void kunmap_high(struct page *page); extern void *kmap(struct page *page); diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index f1ef5613ccd4..510c179b0ac8 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -517,6 +518,74 @@ void dma_cache_maint(const void *start, size_t size, int direction) } EXPORT_SYMBOL(dma_cache_maint); +static void dma_cache_maint_contiguous(struct page *page, unsigned long offset, + size_t size, int direction) +{ + void *vaddr; + unsigned long paddr; + void (*inner_op)(const void *, const void *); + void (*outer_op)(unsigned long, unsigned long); + + switch (direction) { + case DMA_FROM_DEVICE: /* invalidate only */ + inner_op = dmac_inv_range; + outer_op = outer_inv_range; + break; + case DMA_TO_DEVICE: /* writeback only */ + inner_op = dmac_clean_range; + outer_op = outer_clean_range; + break; + case DMA_BIDIRECTIONAL: /* writeback and invalidate */ + inner_op = dmac_flush_range; + outer_op = outer_flush_range; + break; + default: + BUG(); + } + + if (!PageHighMem(page)) { + vaddr = page_address(page) + offset; + inner_op(vaddr, vaddr + size); + } else { + vaddr = kmap_high_get(page); + if (vaddr) { + vaddr += offset; + inner_op(vaddr, vaddr + size); + kunmap_high(page); + } + } + + paddr = page_to_phys(page) + offset; + outer_op(paddr, paddr + size); +} + +void dma_cache_maint_page(struct page *page, unsigned long offset, + size_t size, int dir) +{ + /* + * A single sg entry may refer to multiple physically contiguous + * pages. But we still need to process highmem pages individually. + * If highmem is not configured then the bulk of this loop gets + * optimized out. + */ + size_t left = size; + do { + size_t len = left; + if (PageHighMem(page) && len + offset > PAGE_SIZE) { + if (offset >= PAGE_SIZE) { + page += offset / PAGE_SIZE; + offset %= PAGE_SIZE; + } + len = PAGE_SIZE - offset; + } + dma_cache_maint_contiguous(page, offset, len, dir); + offset = 0; + page++; + left -= len; + } while (left); +} +EXPORT_SYMBOL(dma_cache_maint_page); + /** * dma_map_sg - map a set of SG buffers for streaming mode DMA * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices @@ -614,7 +683,8 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, continue; if (!arch_is_coherent()) - dma_cache_maint(sg_virt(s), s->length, dir); + dma_cache_maint_page(sg_page(s), s->offset, + s->length, dir); } } EXPORT_SYMBOL(dma_sync_sg_for_device); -- cgit v1.2.3 From 1bb772679ffb0ba1ff1d40d8c6b855ab029f177d Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 12 Sep 2008 16:11:51 -0400 Subject: [ARM] Feroceon: add highmem support to L2 cache handling code The choice is between looping over the physical range and performing single cache line operations, or to map highmem pages somewhere, as cache range ops are possible only on virtual addresses. Because L2 range ops are much faster, we go with the later by factoring the physical-to-virtual address conversion and use a fixmap entry for it in the HIGHMEM case. Possible future optimizations to avoid the pte setup cost: - do the pte setup for highmem pages only - determine a threshold for doing a line-by-line processing on physical addresses when the range is small Signed-off-by: Nicolas Pitre --- arch/arm/include/asm/kmap_types.h | 1 + arch/arm/mm/cache-feroceon-l2.c | 54 +++++++++++++++++++++++++++------------ 2 files changed, 38 insertions(+), 17 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/include/asm/kmap_types.h b/arch/arm/include/asm/kmap_types.h index 45def13ee17a..d16ec97ec9a9 100644 --- a/arch/arm/include/asm/kmap_types.h +++ b/arch/arm/include/asm/kmap_types.h @@ -18,6 +18,7 @@ enum km_type { KM_IRQ1, KM_SOFTIRQ0, KM_SOFTIRQ1, + KM_L2_CACHE, KM_TYPE_NR }; diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c index 80cd207cbaea..d6dd83826f8a 100644 --- a/arch/arm/mm/cache-feroceon-l2.c +++ b/arch/arm/mm/cache-feroceon-l2.c @@ -14,8 +14,12 @@ #include #include +#include +#include +#include +#include #include - +#include "mm.h" /* * Low-level cache maintenance operations. @@ -34,14 +38,36 @@ * The range operations require two successive cp15 writes, in * between which we don't want to be preempted. */ + +static inline unsigned long l2_start_va(unsigned long paddr) +{ +#ifdef CONFIG_HIGHMEM + /* + * Let's do our own fixmap stuff in a minimal way here. + * Because range ops can't be done on physical addresses, + * we simply install a virtual mapping for it only for the + * TLB lookup to occur, hence no need to flush the untouched + * memory mapping. This is protected with the disabling of + * interrupts by the caller. + */ + unsigned long idx = KM_L2_CACHE + KM_TYPE_NR * smp_processor_id(); + unsigned long vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + set_pte_ext(TOP_PTE(vaddr), pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL), 0); + local_flush_tlb_kernel_page(vaddr); + return vaddr + (paddr & ~PAGE_MASK); +#else + return __phys_to_virt(paddr); +#endif +} + static inline void l2_clean_pa(unsigned long addr) { __asm__("mcr p15, 1, %0, c15, c9, 3" : : "r" (addr)); } -static inline void l2_clean_mva_range(unsigned long start, unsigned long end) +static inline void l2_clean_pa_range(unsigned long start, unsigned long end) { - unsigned long flags; + unsigned long va_start, va_end, flags; /* * Make sure 'start' and 'end' reference the same page, as @@ -51,17 +77,14 @@ static inline void l2_clean_mva_range(unsigned long start, unsigned long end) BUG_ON((start ^ end) >> PAGE_SHIFT); raw_local_irq_save(flags); + va_start = l2_start_va(start); + va_end = va_start + (end - start); __asm__("mcr p15, 1, %0, c15, c9, 4\n\t" "mcr p15, 1, %1, c15, c9, 5" - : : "r" (start), "r" (end)); + : : "r" (va_start), "r" (va_end)); raw_local_irq_restore(flags); } -static inline void l2_clean_pa_range(unsigned long start, unsigned long end) -{ - l2_clean_mva_range(__phys_to_virt(start), __phys_to_virt(end)); -} - static inline void l2_clean_inv_pa(unsigned long addr) { __asm__("mcr p15, 1, %0, c15, c10, 3" : : "r" (addr)); @@ -72,9 +95,9 @@ static inline void l2_inv_pa(unsigned long addr) __asm__("mcr p15, 1, %0, c15, c11, 3" : : "r" (addr)); } -static inline void l2_inv_mva_range(unsigned long start, unsigned long end) +static inline void l2_inv_pa_range(unsigned long start, unsigned long end) { - unsigned long flags; + unsigned long va_start, va_end, flags; /* * Make sure 'start' and 'end' reference the same page, as @@ -84,17 +107,14 @@ static inline void l2_inv_mva_range(unsigned long start, unsigned long end) BUG_ON((start ^ end) >> PAGE_SHIFT); raw_local_irq_save(flags); + va_start = l2_start_va(start); + va_end = va_start + (end - start); __asm__("mcr p15, 1, %0, c15, c11, 4\n\t" "mcr p15, 1, %1, c15, c11, 5" - : : "r" (start), "r" (end)); + : : "r" (va_start), "r" (va_end)); raw_local_irq_restore(flags); } -static inline void l2_inv_pa_range(unsigned long start, unsigned long end) -{ - l2_inv_mva_range(__phys_to_virt(start), __phys_to_virt(end)); -} - /* * Linux primitives. -- cgit v1.2.3 From 3902a15e784e9b1efa8e6ad246489c609e0ef880 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 18 Sep 2008 22:55:47 -0400 Subject: [ARM] xsc3: add highmem support to L2 cache handling code On xsc3, L2 cache ops are possible only on virtual addresses. The code is rearranged so to have a linear progression requiring the least amount of pte setups in the highmem case. To protect the virtual mapping so created, interrupts must be disabled currently up to a page worth of address range. The interrupt disabling is done in a way to minimize the overhead within the inner loop. The alternative would consist in separate code for the highmem and non highmem compilation which is less preferable. Signed-off-by: Nicolas Pitre --- arch/arm/mm/cache-xsc3l2.c | 107 +++++++++++++++++++++++++++++++++------------ 1 file changed, 80 insertions(+), 27 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/cache-xsc3l2.c b/arch/arm/mm/cache-xsc3l2.c index 464de893a988..5d180cb0bd94 100644 --- a/arch/arm/mm/cache-xsc3l2.c +++ b/arch/arm/mm/cache-xsc3l2.c @@ -17,12 +17,14 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include -#include -#include - #include #include #include +#include +#include +#include +#include +#include "mm.h" #define CR_L2 (1 << 26) @@ -47,21 +49,11 @@ static inline void xsc3_l2_clean_mva(unsigned long addr) __asm__("mcr p15, 1, %0, c7, c11, 1" : : "r" (addr)); } -static inline void xsc3_l2_clean_pa(unsigned long addr) -{ - xsc3_l2_clean_mva(__phys_to_virt(addr)); -} - static inline void xsc3_l2_inv_mva(unsigned long addr) { __asm__("mcr p15, 1, %0, c7, c7, 1" : : "r" (addr)); } -static inline void xsc3_l2_inv_pa(unsigned long addr) -{ - xsc3_l2_inv_mva(__phys_to_virt(addr)); -} - static inline void xsc3_l2_inv_all(void) { unsigned long l2ctype, set_way; @@ -79,50 +71,103 @@ static inline void xsc3_l2_inv_all(void) dsb(); } +#ifdef CONFIG_HIGHMEM +#define l2_map_save_flags(x) raw_local_save_flags(x) +#define l2_map_restore_flags(x) raw_local_irq_restore(x) +#else +#define l2_map_save_flags(x) ((x) = 0) +#define l2_map_restore_flags(x) ((void)(x)) +#endif + +static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va, + unsigned long flags) +{ +#ifdef CONFIG_HIGHMEM + unsigned long va = prev_va & PAGE_MASK; + unsigned long pa_offset = pa << (32 - PAGE_SHIFT); + if (unlikely(pa_offset < (prev_va << (32 - PAGE_SHIFT)))) { + /* + * Switching to a new page. Because cache ops are + * using virtual addresses only, we must put a mapping + * in place for it. We also enable interrupts for a + * short while and disable them again to protect this + * mapping. + */ + unsigned long idx; + raw_local_irq_restore(flags); + idx = KM_L2_CACHE + KM_TYPE_NR * smp_processor_id(); + va = __fix_to_virt(FIX_KMAP_BEGIN + idx); + raw_local_irq_restore(flags | PSR_I_BIT); + set_pte_ext(TOP_PTE(va), pfn_pte(pa >> PAGE_SHIFT, PAGE_KERNEL), 0); + local_flush_tlb_kernel_page(va); + } + return va + (pa_offset >> (32 - PAGE_SHIFT)); +#else + return __phys_to_virt(pa); +#endif +} + static void xsc3_l2_inv_range(unsigned long start, unsigned long end) { + unsigned long vaddr, flags; + if (start == 0 && end == -1ul) { xsc3_l2_inv_all(); return; } + vaddr = -1; /* to force the first mapping */ + l2_map_save_flags(flags); + /* * Clean and invalidate partial first cache line. */ if (start & (CACHE_LINE_SIZE - 1)) { - xsc3_l2_clean_pa(start & ~(CACHE_LINE_SIZE - 1)); - xsc3_l2_inv_pa(start & ~(CACHE_LINE_SIZE - 1)); + vaddr = l2_map_va(start & ~(CACHE_LINE_SIZE - 1), vaddr, flags); + xsc3_l2_clean_mva(vaddr); + xsc3_l2_inv_mva(vaddr); start = (start | (CACHE_LINE_SIZE - 1)) + 1; } /* - * Clean and invalidate partial last cache line. + * Invalidate all full cache lines between 'start' and 'end'. */ - if (start < end && (end & (CACHE_LINE_SIZE - 1))) { - xsc3_l2_clean_pa(end & ~(CACHE_LINE_SIZE - 1)); - xsc3_l2_inv_pa(end & ~(CACHE_LINE_SIZE - 1)); - end &= ~(CACHE_LINE_SIZE - 1); + while (start < (end & ~(CACHE_LINE_SIZE - 1))) { + vaddr = l2_map_va(start, vaddr, flags); + xsc3_l2_inv_mva(vaddr); + start += CACHE_LINE_SIZE; } /* - * Invalidate all full cache lines between 'start' and 'end'. + * Clean and invalidate partial last cache line. */ - while (start < end) { - xsc3_l2_inv_pa(start); - start += CACHE_LINE_SIZE; + if (start < end) { + vaddr = l2_map_va(start, vaddr, flags); + xsc3_l2_clean_mva(vaddr); + xsc3_l2_inv_mva(vaddr); } + l2_map_restore_flags(flags); + dsb(); } static void xsc3_l2_clean_range(unsigned long start, unsigned long end) { + unsigned long vaddr, flags; + + vaddr = -1; /* to force the first mapping */ + l2_map_save_flags(flags); + start &= ~(CACHE_LINE_SIZE - 1); while (start < end) { - xsc3_l2_clean_pa(start); + vaddr = l2_map_va(start, vaddr, flags); + xsc3_l2_clean_mva(vaddr); start += CACHE_LINE_SIZE; } + l2_map_restore_flags(flags); + dsb(); } @@ -148,18 +193,26 @@ static inline void xsc3_l2_flush_all(void) static void xsc3_l2_flush_range(unsigned long start, unsigned long end) { + unsigned long vaddr, flags; + if (start == 0 && end == -1ul) { xsc3_l2_flush_all(); return; } + vaddr = -1; /* to force the first mapping */ + l2_map_save_flags(flags); + start &= ~(CACHE_LINE_SIZE - 1); while (start < end) { - xsc3_l2_clean_pa(start); - xsc3_l2_inv_pa(start); + vaddr = l2_map_va(start, vaddr, flags); + xsc3_l2_clean_mva(vaddr); + xsc3_l2_inv_mva(vaddr); start += CACHE_LINE_SIZE; } + l2_map_restore_flags(flags); + dsb(); } -- cgit v1.2.3 From 3f973e22160257c5bda85815be5b1540d391a671 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 4 Nov 2008 00:48:42 -0500 Subject: [ARM] ignore high memory with VIPT aliasing caches VIPT aliasing caches have issues of their own which are not yet handled. Usage of discard_old_kernel_data() in copypage-v6.c is not highmem ready, kmap/fixmap stuff doesn't take account of cache colouring, etc. If/when those issues are handled then this could be reverted. Signed-off-by: Nicolas Pitre --- arch/arm/mm/mmu.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 4810a4c9ffce..cf504885a5fb 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -678,6 +679,10 @@ static void __init sanity_check_meminfo(void) if (meminfo.nr_banks >= NR_BANKS) { printk(KERN_CRIT "NR_BANKS too low, " "ignoring high memory\n"); + } else if (cache_is_vipt_aliasing()) { + printk(KERN_CRIT "HIGHMEM is not yet supported " + "with VIPT aliasing cache, " + "ignoring high memory\n"); } else { memmove(bank + 1, bank, (meminfo.nr_banks - i) * sizeof(*bank)); -- cgit v1.2.3