diff options
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/Makefile | 6 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 51 | ||||
-rw-r--r-- | arch/x86/mm/highmem_32.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/iomap_32.c | 27 | ||||
-rw-r--r-- | arch/x86/mm/ioremap.c | 90 | ||||
-rw-r--r-- | arch/x86/mm/mmap.c | 17 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 29 | ||||
-rw-r--r-- | arch/x86/mm/pat.c | 355 | ||||
-rw-r--r-- | arch/x86/mm/physaddr.c | 70 | ||||
-rw-r--r-- | arch/x86/mm/physaddr.h | 10 | ||||
-rw-r--r-- | arch/x86/mm/srat_32.c | 4 |
11 files changed, 425 insertions, 237 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index eefdeee8a871..9b5a9f59a478 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -1,5 +1,9 @@ obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ - pat.o pgtable.o gup.o + pat.o pgtable.o physaddr.o gup.o + +# Make sure __phys_addr has no stackprotector +nostackp := $(call cc-option, -fno-stack-protector) +CFLAGS_physaddr.o := $(nostackp) obj-$(CONFIG_SMP) += tlb.o diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index bfae139182ff..775a020990a5 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -285,26 +285,25 @@ check_v8086_mode(struct pt_regs *regs, unsigned long address, tsk->thread.screen_bitmap |= 1 << bit; } -static void dump_pagetable(unsigned long address) +static bool low_pfn(unsigned long pfn) { - __typeof__(pte_val(__pte(0))) page; + return pfn < max_low_pfn; +} - page = read_cr3(); - page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT]; +static void dump_pagetable(unsigned long address) +{ + pgd_t *base = __va(read_cr3()); + pgd_t *pgd = &base[pgd_index(address)]; + pmd_t *pmd; + pte_t *pte; #ifdef CONFIG_X86_PAE - printk("*pdpt = %016Lx ", page); - if ((page >> PAGE_SHIFT) < max_low_pfn - && page & _PAGE_PRESENT) { - page &= PAGE_MASK; - page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT) - & (PTRS_PER_PMD - 1)]; - printk(KERN_CONT "*pde = %016Lx ", page); - page &= ~_PAGE_NX; - } -#else - printk("*pde = %08lx ", page); + printk("*pdpt = %016Lx ", pgd_val(*pgd)); + if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd)) + goto out; #endif + pmd = pmd_offset(pud_offset(pgd, address), address); + printk(KERN_CONT "*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd)); /* * We must not directly access the pte in the highpte @@ -312,16 +311,12 @@ static void dump_pagetable(unsigned long address) * And let's rather not kmap-atomic the pte, just in case * it's allocated already: */ - if ((page >> PAGE_SHIFT) < max_low_pfn - && (page & _PAGE_PRESENT) - && !(page & _PAGE_PSE)) { - - page &= PAGE_MASK; - page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT) - & (PTRS_PER_PTE - 1)]; - printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page); - } + if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_large(*pmd)) + goto out; + pte = pte_offset_kernel(pmd, address); + printk("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte)); +out: printk("\n"); } @@ -450,16 +445,12 @@ static int bad_address(void *p) static void dump_pagetable(unsigned long address) { - pgd_t *pgd; + pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK); + pgd_t *pgd = base + pgd_index(address); pud_t *pud; pmd_t *pmd; pte_t *pte; - pgd = (pgd_t *)read_cr3(); - - pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK); - - pgd += pgd_index(address); if (bad_address(pgd)) goto bad; diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 2112ed55e7ea..63a6ba66cbe0 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -24,7 +24,7 @@ void kunmap(struct page *page) * no global lock is needed and because the kmap code must perform a global TLB * invalidation when the kmap pool wraps. * - * However when holding an atomic kmap is is not legal to sleep, so atomic + * However when holding an atomic kmap it is not legal to sleep, so atomic * kmaps are appropriate for short, tight code paths only. */ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) @@ -104,6 +104,7 @@ EXPORT_SYMBOL(kunmap); EXPORT_SYMBOL(kmap_atomic); EXPORT_SYMBOL(kunmap_atomic); EXPORT_SYMBOL(kmap_atomic_prot); +EXPORT_SYMBOL(kmap_atomic_to_page); void __init set_highmem_pages_init(void) { diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index fe6f84ca121e..84e236ce76ba 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -21,7 +21,7 @@ #include <linux/module.h> #include <linux/highmem.h> -int is_io_mapping_possible(resource_size_t base, unsigned long size) +static int is_io_mapping_possible(resource_size_t base, unsigned long size) { #if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT) /* There is no way to map greater than 1 << 32 address without PAE */ @@ -30,7 +30,30 @@ int is_io_mapping_possible(resource_size_t base, unsigned long size) #endif return 1; } -EXPORT_SYMBOL_GPL(is_io_mapping_possible); + +int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot) +{ + unsigned long flag = _PAGE_CACHE_WC; + int ret; + + if (!is_io_mapping_possible(base, size)) + return -EINVAL; + + ret = io_reserve_memtype(base, base + size, &flag); + if (ret) + return ret; + + *prot = __pgprot(__PAGE_KERNEL | flag); + return 0; +} +EXPORT_SYMBOL_GPL(iomap_create_wc); + +void +iomap_free(resource_size_t base, unsigned long size) +{ + io_free_memtype(base, base + size); +} +EXPORT_SYMBOL_GPL(iomap_free); void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) { diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 8a450930834f..334e63ca7b2b 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -22,77 +22,7 @@ #include <asm/pgalloc.h> #include <asm/pat.h> -static inline int phys_addr_valid(resource_size_t addr) -{ -#ifdef CONFIG_PHYS_ADDR_T_64BIT - return !(addr >> boot_cpu_data.x86_phys_bits); -#else - return 1; -#endif -} - -#ifdef CONFIG_X86_64 - -unsigned long __phys_addr(unsigned long x) -{ - if (x >= __START_KERNEL_map) { - x -= __START_KERNEL_map; - VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE); - x += phys_base; - } else { - VIRTUAL_BUG_ON(x < PAGE_OFFSET); - x -= PAGE_OFFSET; - VIRTUAL_BUG_ON(!phys_addr_valid(x)); - } - return x; -} -EXPORT_SYMBOL(__phys_addr); - -bool __virt_addr_valid(unsigned long x) -{ - if (x >= __START_KERNEL_map) { - x -= __START_KERNEL_map; - if (x >= KERNEL_IMAGE_SIZE) - return false; - x += phys_base; - } else { - if (x < PAGE_OFFSET) - return false; - x -= PAGE_OFFSET; - if (!phys_addr_valid(x)) - return false; - } - - return pfn_valid(x >> PAGE_SHIFT); -} -EXPORT_SYMBOL(__virt_addr_valid); - -#else - -#ifdef CONFIG_DEBUG_VIRTUAL -unsigned long __phys_addr(unsigned long x) -{ - /* VMALLOC_* aren't constants */ - VIRTUAL_BUG_ON(x < PAGE_OFFSET); - VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x)); - return x - PAGE_OFFSET; -} -EXPORT_SYMBOL(__phys_addr); -#endif - -bool __virt_addr_valid(unsigned long x) -{ - if (x < PAGE_OFFSET) - return false; - if (__vmalloc_start_set && is_vmalloc_addr((void *) x)) - return false; - if (x >= FIXADDR_START) - return false; - return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT); -} -EXPORT_SYMBOL(__virt_addr_valid); - -#endif +#include "physaddr.h" int page_is_ram(unsigned long pagenr) { @@ -228,24 +158,14 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, retval = reserve_memtype(phys_addr, (u64)phys_addr + size, prot_val, &new_prot_val); if (retval) { - pr_debug("Warning: reserve_memtype returned %d\n", retval); + printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval); return NULL; } if (prot_val != new_prot_val) { - /* - * Do not fallback to certain memory types with certain - * requested type: - * - request is uc-, return cannot be write-back - * - request is uc-, return cannot be write-combine - * - request is write-combine, return cannot be write-back - */ - if ((prot_val == _PAGE_CACHE_UC_MINUS && - (new_prot_val == _PAGE_CACHE_WB || - new_prot_val == _PAGE_CACHE_WC)) || - (prot_val == _PAGE_CACHE_WC && - new_prot_val == _PAGE_CACHE_WB)) { - pr_debug( + if (!is_new_memtype_allowed(phys_addr, size, + prot_val, new_prot_val)) { + printk(KERN_ERR "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n", (unsigned long long)phys_addr, (unsigned long long)(phys_addr + size), diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 165829600566..c8191defc38a 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -29,13 +29,26 @@ #include <linux/random.h> #include <linux/limits.h> #include <linux/sched.h> +#include <asm/elf.h> + +static unsigned int stack_maxrandom_size(void) +{ + unsigned int max = 0; + if ((current->flags & PF_RANDOMIZE) && + !(current->personality & ADDR_NO_RANDOMIZE)) { + max = ((-1U) & STACK_RND_MASK) << PAGE_SHIFT; + } + + return max; +} + /* * Top of mmap area (just below the process stack). * - * Leave an at least ~128 MB hole. + * Leave an at least ~128 MB hole with possible stack randomization. */ -#define MIN_GAP (128*1024*1024) +#define MIN_GAP (128*1024*1024UL + stack_maxrandom_size()) #define MAX_GAP (TASK_SIZE/6*5) /* diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 7e600c1962db..24952fdc7e40 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -12,6 +12,7 @@ #include <linux/seq_file.h> #include <linux/debugfs.h> #include <linux/pfn.h> +#include <linux/percpu.h> #include <asm/e820.h> #include <asm/processor.h> @@ -686,7 +687,7 @@ static int cpa_process_alias(struct cpa_data *cpa) { struct cpa_data alias_cpa; unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT); - unsigned long vaddr, remapped; + unsigned long vaddr; int ret; if (cpa->pfn >= max_pfn_mapped) @@ -744,24 +745,6 @@ static int cpa_process_alias(struct cpa_data *cpa) } #endif - /* - * If the PMD page was partially used for per-cpu remapping, - * the recycled area needs to be split and modified. Because - * the area is always proper subset of a PMD page - * cpa->numpages is guaranteed to be 1 for these areas, so - * there's no need to loop over and check for further remaps. - */ - remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr); - if (remapped) { - WARN_ON(cpa->numpages > 1); - alias_cpa = *cpa; - alias_cpa.vaddr = &remapped; - alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); - ret = __change_page_attr_set_clr(&alias_cpa, 0); - if (ret) - return ret; - } - return 0; } @@ -822,6 +805,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, { struct cpa_data cpa; int ret, cache, checkalias; + unsigned long baddr = 0; /* * Check, if we are requested to change a not supported @@ -853,6 +837,11 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, */ WARN_ON_ONCE(1); } + /* + * Save address for cache flush. *addr is modified in the call + * to __change_page_attr_set_clr() below. + */ + baddr = *addr; } /* Must avoid aliasing mappings in the highmem code */ @@ -900,7 +889,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, cpa_flush_array(addr, numpages, cache, cpa.flags, pages); } else - cpa_flush_range(*addr, numpages, cache); + cpa_flush_range(baddr, numpages, cache); } else cpa_flush_all(cache); diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 352aa9e927e2..7257cf3decf9 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -15,6 +15,7 @@ #include <linux/gfp.h> #include <linux/mm.h> #include <linux/fs.h> +#include <linux/rbtree.h> #include <asm/cacheflush.h> #include <asm/processor.h> @@ -148,11 +149,10 @@ static char *cattr_name(unsigned long flags) * areas). All the aliases have the same cache attributes of course. * Zero attributes are represented as holes. * - * Currently the data structure is a list because the number of mappings - * are expected to be relatively small. If this should be a problem - * it could be changed to a rbtree or similar. + * The data structure is a list that is also organized as an rbtree + * sorted on the start address of memtype range. * - * memtype_lock protects the whole list. + * memtype_lock protects both the linear list and rbtree. */ struct memtype { @@ -160,11 +160,53 @@ struct memtype { u64 end; unsigned long type; struct list_head nd; + struct rb_node rb; }; +static struct rb_root memtype_rbroot = RB_ROOT; static LIST_HEAD(memtype_list); static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ +static struct memtype *memtype_rb_search(struct rb_root *root, u64 start) +{ + struct rb_node *node = root->rb_node; + struct memtype *last_lower = NULL; + + while (node) { + struct memtype *data = container_of(node, struct memtype, rb); + + if (data->start < start) { + last_lower = data; + node = node->rb_right; + } else if (data->start > start) { + node = node->rb_left; + } else + return data; + } + + /* Will return NULL if there is no entry with its start <= start */ + return last_lower; +} + +static void memtype_rb_insert(struct rb_root *root, struct memtype *data) +{ + struct rb_node **new = &(root->rb_node); + struct rb_node *parent = NULL; + + while (*new) { + struct memtype *this = container_of(*new, struct memtype, rb); + + parent = *new; + if (data->start <= this->start) + new = &((*new)->rb_left); + else if (data->start > this->start) + new = &((*new)->rb_right); + } + + rb_link_node(&data->rb, parent, new); + rb_insert_color(&data->rb, root); +} + /* * Does intersection of PAT memory type and MTRR memory type and returns * the resulting memory type as PAT understands it. @@ -218,9 +260,6 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type) return -EBUSY; } -static struct memtype *cached_entry; -static u64 cached_start; - static int pat_pagerange_is_ram(unsigned long start, unsigned long end) { int ram_page = 0, not_rampage = 0; @@ -249,63 +288,61 @@ static int pat_pagerange_is_ram(unsigned long start, unsigned long end) } /* - * For RAM pages, mark the pages as non WB memory type using - * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or - * set_memory_wc() on a RAM page at a time before marking it as WB again. - * This is ok, because only one driver will be owning the page and - * doing set_memory_*() calls. + * For RAM pages, we use page flags to mark the pages with appropriate type. + * Here we do two pass: + * - Find the memtype of all the pages in the range, look for any conflicts + * - In case of no conflicts, set the new memtype for pages in the range * - * For now, we use PageNonWB to track that the RAM page is being mapped - * as non WB. In future, we will have to use one more flag - * (or some other mechanism in page_struct) to distinguish between - * UC and WC mapping. + * Caller must hold memtype_lock for atomicity. */ static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, unsigned long *new_type) { struct page *page; - u64 pfn, end_pfn; + u64 pfn; + + if (req_type == _PAGE_CACHE_UC) { + /* We do not support strong UC */ + WARN_ON_ONCE(1); + req_type = _PAGE_CACHE_UC_MINUS; + } for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { - page = pfn_to_page(pfn); - if (page_mapped(page) || PageNonWB(page)) - goto out; + unsigned long type; - SetPageNonWB(page); + page = pfn_to_page(pfn); + type = get_page_memtype(page); + if (type != -1) { + printk(KERN_INFO "reserve_ram_pages_type failed " + "0x%Lx-0x%Lx, track 0x%lx, req 0x%lx\n", + start, end, type, req_type); + if (new_type) + *new_type = type; + + return -EBUSY; + } } - return 0; -out: - end_pfn = pfn; - for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) { + if (new_type) + *new_type = req_type; + + for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { page = pfn_to_page(pfn); - ClearPageNonWB(page); + set_page_memtype(page, req_type); } - - return -EINVAL; + return 0; } static int free_ram_pages_type(u64 start, u64 end) { struct page *page; - u64 pfn, end_pfn; + u64 pfn; for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { page = pfn_to_page(pfn); - if (page_mapped(page) || !PageNonWB(page)) - goto out; - - ClearPageNonWB(page); + set_page_memtype(page, -1); } return 0; - -out: - end_pfn = pfn; - for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) { - page = pfn_to_page(pfn); - SetPageNonWB(page); - } - return -EINVAL; } /* @@ -339,6 +376,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, if (new_type) { if (req_type == -1) *new_type = _PAGE_CACHE_WB; + else if (req_type == _PAGE_CACHE_WC) + *new_type = _PAGE_CACHE_UC_MINUS; else *new_type = req_type & _PAGE_CACHE_MASK; } @@ -364,11 +403,16 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, *new_type = actual_type; is_range_ram = pat_pagerange_is_ram(start, end); - if (is_range_ram == 1) - return reserve_ram_pages_type(start, end, req_type, - new_type); - else if (is_range_ram < 0) + if (is_range_ram == 1) { + + spin_lock(&memtype_lock); + err = reserve_ram_pages_type(start, end, req_type, new_type); + spin_unlock(&memtype_lock); + + return err; + } else if (is_range_ram < 0) { return -EINVAL; + } new = kmalloc(sizeof(struct memtype), GFP_KERNEL); if (!new) @@ -380,17 +424,11 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, spin_lock(&memtype_lock); - if (cached_entry && start >= cached_start) - entry = cached_entry; - else - entry = list_entry(&memtype_list, struct memtype, nd); - /* Search for existing mapping that overlaps the current range */ where = NULL; - list_for_each_entry_continue(entry, &memtype_list, nd) { + list_for_each_entry(entry, &memtype_list, nd) { if (end <= entry->start) { where = entry->nd.prev; - cached_entry = list_entry(where, struct memtype, nd); break; } else if (start <= entry->start) { /* end > entry->start */ err = chk_conflict(new, entry, new_type); @@ -398,8 +436,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, dprintk("Overlap at 0x%Lx-0x%Lx\n", entry->start, entry->end); where = entry->nd.prev; - cached_entry = list_entry(where, - struct memtype, nd); } break; } else if (start < entry->end) { /* start > entry->start */ @@ -407,8 +443,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, if (!err) { dprintk("Overlap at 0x%Lx-0x%Lx\n", entry->start, entry->end); - cached_entry = list_entry(entry->nd.prev, - struct memtype, nd); /* * Move to right position in the linked @@ -436,13 +470,13 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, return err; } - cached_start = start; - if (where) list_add(&new->nd, where); else list_add_tail(&new->nd, &memtype_list); + memtype_rb_insert(&memtype_rbroot, new); + spin_unlock(&memtype_lock); dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", @@ -454,7 +488,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, int free_memtype(u64 start, u64 end) { - struct memtype *entry; + struct memtype *entry, *saved_entry; int err = -EINVAL; int is_range_ram; @@ -466,23 +500,58 @@ int free_memtype(u64 start, u64 end) return 0; is_range_ram = pat_pagerange_is_ram(start, end); - if (is_range_ram == 1) - return free_ram_pages_type(start, end); - else if (is_range_ram < 0) + if (is_range_ram == 1) { + + spin_lock(&memtype_lock); + err = free_ram_pages_type(start, end); + spin_unlock(&memtype_lock); + + return err; + } else if (is_range_ram < 0) { return -EINVAL; + } spin_lock(&memtype_lock); - list_for_each_entry(entry, &memtype_list, nd) { + + entry = memtype_rb_search(&memtype_rbroot, start); + if (unlikely(entry == NULL)) + goto unlock_ret; + + /* + * Saved entry points to an entry with start same or less than what + * we searched for. Now go through the list in both directions to look + * for the entry that matches with both start and end, with list stored + * in sorted start address + */ + saved_entry = entry; + list_for_each_entry_from(entry, &memtype_list, nd) { if (entry->start == start && entry->end == end) { - if (cached_entry == entry || cached_start == start) - cached_entry = NULL; + rb_erase(&entry->rb, &memtype_rbroot); + list_del(&entry->nd); + kfree(entry); + err = 0; + break; + } else if (entry->start > start) { + break; + } + } + + if (!err) + goto unlock_ret; + entry = saved_entry; + list_for_each_entry_reverse(entry, &memtype_list, nd) { + if (entry->start == start && entry->end == end) { + rb_erase(&entry->rb, &memtype_rbroot); list_del(&entry->nd); kfree(entry); err = 0; break; + } else if (entry->start < start) { + break; } } +unlock_ret: spin_unlock(&memtype_lock); if (err) { @@ -496,6 +565,101 @@ int free_memtype(u64 start, u64 end) } +/** + * lookup_memtype - Looksup the memory type for a physical address + * @paddr: physical address of which memory type needs to be looked up + * + * Only to be called when PAT is enabled + * + * Returns _PAGE_CACHE_WB, _PAGE_CACHE_WC, _PAGE_CACHE_UC_MINUS or + * _PAGE_CACHE_UC + */ +static unsigned long lookup_memtype(u64 paddr) +{ + int rettype = _PAGE_CACHE_WB; + struct memtype *entry; + + if (is_ISA_range(paddr, paddr + PAGE_SIZE - 1)) + return rettype; + + if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { + struct page *page; + spin_lock(&memtype_lock); + page = pfn_to_page(paddr >> PAGE_SHIFT); + rettype = get_page_memtype(page); + spin_unlock(&memtype_lock); + /* + * -1 from get_page_memtype() implies RAM page is in its + * default state and not reserved, and hence of type WB + */ + if (rettype == -1) + rettype = _PAGE_CACHE_WB; + + return rettype; + } + + spin_lock(&memtype_lock); + + entry = memtype_rb_search(&memtype_rbroot, paddr); + if (entry != NULL) + rettype = entry->type; + else + rettype = _PAGE_CACHE_UC_MINUS; + + spin_unlock(&memtype_lock); + return rettype; +} + +/** + * io_reserve_memtype - Request a memory type mapping for a region of memory + * @start: start (physical address) of the region + * @end: end (physical address) of the region + * @type: A pointer to memtype, with requested type. On success, requested + * or any other compatible type that was available for the region is returned + * + * On success, returns 0 + * On failure, returns non-zero + */ +int io_reserve_memtype(resource_size_t start, resource_size_t end, + unsigned long *type) +{ + resource_size_t size = end - start; + unsigned long req_type = *type; + unsigned long new_type; + int ret; + + WARN_ON_ONCE(iomem_map_sanity_check(start, size)); + + ret = reserve_memtype(start, end, req_type, &new_type); + if (ret) + goto out_err; + + if (!is_new_memtype_allowed(start, size, req_type, new_type)) + goto out_free; + + if (kernel_map_sync_memtype(start, size, new_type) < 0) + goto out_free; + + *type = new_type; + return 0; + +out_free: + free_memtype(start, end); + ret = -EBUSY; +out_err: + return ret; +} + +/** + * io_free_memtype - Release a memory type mapping for a region of memory + * @start: start (physical address) of the region + * @end: end (physical address) of the region + */ +void io_free_memtype(resource_size_t start, resource_size_t end) +{ + free_memtype(start, end); +} + pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { @@ -577,7 +741,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags) { unsigned long id_sz; - if (!pat_enabled || base >= __pa(high_memory)) + if (base >= __pa(high_memory)) return 0; id_sz = (__pa(high_memory) < base + size) ? @@ -612,11 +776,29 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, is_ram = pat_pagerange_is_ram(paddr, paddr + size); /* - * reserve_pfn_range() doesn't support RAM pages. Maintain the current - * behavior with RAM pages by returning success. + * reserve_pfn_range() for RAM pages. We do not refcount to keep + * track of number of mappings of RAM pages. We can assert that + * the type requested matches the type of first page in the range. */ - if (is_ram != 0) + if (is_ram) { + if (!pat_enabled) + return 0; + + flags = lookup_memtype(paddr); + if (want_flags != flags) { + printk(KERN_WARNING + "%s:%d map pfn RAM range req %s for %Lx-%Lx, got %s\n", + current->comm, current->pid, + cattr_name(want_flags), + (unsigned long long)paddr, + (unsigned long long)(paddr + size), + cattr_name(flags)); + *vma_prot = __pgprot((pgprot_val(*vma_prot) & + (~_PAGE_CACHE_MASK)) | + flags); + } return 0; + } ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); if (ret) @@ -678,14 +860,6 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) unsigned long vma_size = vma->vm_end - vma->vm_start; pgprot_t pgprot; - if (!pat_enabled) - return 0; - - /* - * For now, only handle remap_pfn_range() vmas where - * is_linear_pfn_mapping() == TRUE. Handling of - * vm_insert_pfn() is TBD. - */ if (is_linear_pfn_mapping(vma)) { /* * reserve the whole chunk covered by vma. We need the @@ -713,23 +887,24 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, unsigned long pfn, unsigned long size) { + unsigned long flags; resource_size_t paddr; unsigned long vma_size = vma->vm_end - vma->vm_start; - if (!pat_enabled) - return 0; - - /* - * For now, only handle remap_pfn_range() vmas where - * is_linear_pfn_mapping() == TRUE. Handling of - * vm_insert_pfn() is TBD. - */ if (is_linear_pfn_mapping(vma)) { /* reserve the whole chunk starting from vm_pgoff */ paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; return reserve_pfn_range(paddr, vma_size, prot, 0); } + if (!pat_enabled) + return 0; + + /* for vm_insert_pfn and friends, we set prot based on lookup */ + flags = lookup_memtype(pfn << PAGE_SHIFT); + *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) | + flags); + return 0; } @@ -744,14 +919,6 @@ void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, resource_size_t paddr; unsigned long vma_size = vma->vm_end - vma->vm_start; - if (!pat_enabled) - return; - - /* - * For now, only handle remap_pfn_range() vmas where - * is_linear_pfn_mapping() == TRUE. Handling of - * vm_insert_pfn() is TBD. - */ if (is_linear_pfn_mapping(vma)) { /* free the whole chunk starting from vm_pgoff */ paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; @@ -827,7 +994,7 @@ static int memtype_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations memtype_seq_ops = { +static const struct seq_operations memtype_seq_ops = { .start = memtype_seq_start, .next = memtype_seq_next, .stop = memtype_seq_stop, diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c new file mode 100644 index 000000000000..d2e2735327b4 --- /dev/null +++ b/arch/x86/mm/physaddr.c @@ -0,0 +1,70 @@ +#include <linux/mmdebug.h> +#include <linux/module.h> +#include <linux/mm.h> + +#include <asm/page.h> + +#include "physaddr.h" + +#ifdef CONFIG_X86_64 + +unsigned long __phys_addr(unsigned long x) +{ + if (x >= __START_KERNEL_map) { + x -= __START_KERNEL_map; + VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE); + x += phys_base; + } else { + VIRTUAL_BUG_ON(x < PAGE_OFFSET); + x -= PAGE_OFFSET; + VIRTUAL_BUG_ON(!phys_addr_valid(x)); + } + return x; +} +EXPORT_SYMBOL(__phys_addr); + +bool __virt_addr_valid(unsigned long x) +{ + if (x >= __START_KERNEL_map) { + x -= __START_KERNEL_map; + if (x >= KERNEL_IMAGE_SIZE) + return false; + x += phys_base; + } else { + if (x < PAGE_OFFSET) + return false; + x -= PAGE_OFFSET; + if (!phys_addr_valid(x)) + return false; + } + + return pfn_valid(x >> PAGE_SHIFT); +} +EXPORT_SYMBOL(__virt_addr_valid); + +#else + +#ifdef CONFIG_DEBUG_VIRTUAL +unsigned long __phys_addr(unsigned long x) +{ + /* VMALLOC_* aren't constants */ + VIRTUAL_BUG_ON(x < PAGE_OFFSET); + VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x)); + return x - PAGE_OFFSET; +} +EXPORT_SYMBOL(__phys_addr); +#endif + +bool __virt_addr_valid(unsigned long x) +{ + if (x < PAGE_OFFSET) + return false; + if (__vmalloc_start_set && is_vmalloc_addr((void *) x)) + return false; + if (x >= FIXADDR_START) + return false; + return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT); +} +EXPORT_SYMBOL(__virt_addr_valid); + +#endif /* CONFIG_X86_64 */ diff --git a/arch/x86/mm/physaddr.h b/arch/x86/mm/physaddr.h new file mode 100644 index 000000000000..a3cd5a0c97b3 --- /dev/null +++ b/arch/x86/mm/physaddr.h @@ -0,0 +1,10 @@ +#include <asm/processor.h> + +static inline int phys_addr_valid(resource_size_t addr) +{ +#ifdef CONFIG_PHYS_ADDR_T_64BIT + return !(addr >> boot_cpu_data.x86_phys_bits); +#else + return 1; +#endif +} diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c index 29a0e37114f8..6f8aa33031c7 100644 --- a/arch/x86/mm/srat_32.c +++ b/arch/x86/mm/srat_32.c @@ -215,7 +215,7 @@ int __init get_memcfg_from_srat(void) goto out_fail; if (num_memory_chunks == 0) { - printk(KERN_WARNING + printk(KERN_DEBUG "could not find any ACPI SRAT memory areas.\n"); goto out_fail; } @@ -277,7 +277,7 @@ int __init get_memcfg_from_srat(void) } return 1; out_fail: - printk(KERN_ERR "failed to get NUMA memory information from SRAT" + printk(KERN_DEBUG "failed to get NUMA memory information from SRAT" " table\n"); return 0; } |