diff options
Diffstat (limited to 'include')
38 files changed, 521 insertions, 238 deletions
diff --git a/include/asm-alpha/barrier.h b/include/asm-alpha/barrier.h index 229c83fe77cb..681ff581afa5 100644 --- a/include/asm-alpha/barrier.h +++ b/include/asm-alpha/barrier.h @@ -1,6 +1,8 @@ #ifndef __BARRIER_H #define __BARRIER_H +#include <asm/compiler.h> + #define mb() \ __asm__ __volatile__("mb": : :"memory") diff --git a/include/asm-alpha/rwsem.h b/include/asm-alpha/rwsem.h index 8e058a67c9a4..fafdd4f7010a 100644 --- a/include/asm-alpha/rwsem.h +++ b/include/asm-alpha/rwsem.h @@ -262,5 +262,10 @@ static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem) #endif } +static inline int rwsem_is_locked(struct rw_semaphore *sem) +{ + return (sem->count != 0); +} + #endif /* __KERNEL__ */ #endif /* _ALPHA_RWSEM_H */ diff --git a/include/asm-arm/tlb.h b/include/asm-arm/tlb.h index 9bb325c54645..f49bfb78c221 100644 --- a/include/asm-arm/tlb.h +++ b/include/asm-arm/tlb.h @@ -27,11 +27,7 @@ */ struct mmu_gather { struct mm_struct *mm; - unsigned int freed; unsigned int fullmm; - - unsigned int flushes; - unsigned int avoided_flushes; }; DECLARE_PER_CPU(struct mmu_gather, mmu_gathers); @@ -39,11 +35,9 @@ DECLARE_PER_CPU(struct mmu_gather, mmu_gathers); static inline struct mmu_gather * tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush) { - int cpu = smp_processor_id(); - struct mmu_gather *tlb = &per_cpu(mmu_gathers, cpu); + struct mmu_gather *tlb = &get_cpu_var(mmu_gathers); tlb->mm = mm; - tlb->freed = 0; tlb->fullmm = full_mm_flush; return tlb; @@ -52,24 +46,13 @@ tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush) static inline void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) { - struct mm_struct *mm = tlb->mm; - unsigned long freed = tlb->freed; - int rss = get_mm_counter(mm, rss); - - if (rss < freed) - freed = rss; - add_mm_counter(mm, rss, -freed); - if (tlb->fullmm) - flush_tlb_mm(mm); + flush_tlb_mm(tlb->mm); /* keep the page table cache within bounds */ check_pgt_cache(); -} -static inline unsigned int tlb_is_full_mm(struct mmu_gather *tlb) -{ - return tlb->fullmm; + put_cpu_var(mmu_gathers); } #define tlb_remove_tlb_entry(tlb,ptep,address) do { } while (0) diff --git a/include/asm-arm26/tlb.h b/include/asm-arm26/tlb.h index 1316352a58f3..08ddd85b8d35 100644 --- a/include/asm-arm26/tlb.h +++ b/include/asm-arm26/tlb.h @@ -10,24 +10,20 @@ */ struct mmu_gather { struct mm_struct *mm; - unsigned int freed; - unsigned int fullmm; - - unsigned int flushes; - unsigned int avoided_flushes; + unsigned int need_flush; + unsigned int fullmm; }; -extern struct mmu_gather mmu_gathers[NR_CPUS]; +DECLARE_PER_CPU(struct mmu_gather, mmu_gathers); static inline struct mmu_gather * tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush) { - int cpu = smp_processor_id(); - struct mmu_gather *tlb = &mmu_gathers[cpu]; + struct mmu_gather *tlb = &get_cpu_var(mmu_gathers); tlb->mm = mm; - tlb->freed = 0; - tlb->fullmm = full_mm_flush; + tlb->need_flush = 0; + tlb->fullmm = full_mm_flush; return tlb; } @@ -35,30 +31,13 @@ tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush) static inline void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) { - struct mm_struct *mm = tlb->mm; - unsigned long freed = tlb->freed; - int rss = get_mm_counter(mm, rss); - - if (rss < freed) - freed = rss; - add_mm_counter(mm, rss, -freed); - - if (freed) { - flush_tlb_mm(mm); - tlb->flushes++; - } else { - tlb->avoided_flushes++; - } + if (tlb->need_flush) + flush_tlb_mm(tlb->mm); /* keep the page table cache within bounds */ check_pgt_cache(); -} - -static inline unsigned int -tlb_is_full_mm(struct mmu_gather *tlb) -{ - return tlb->fullmm; + put_cpu_var(mmu_gathers); } #define tlb_remove_tlb_entry(tlb,ptep,address) do { } while (0) @@ -71,7 +50,13 @@ tlb_is_full_mm(struct mmu_gather *tlb) } while (0) #define tlb_end_vma(tlb,vma) do { } while (0) -#define tlb_remove_page(tlb,page) free_page_and_swap_cache(page) +static inline void +tlb_remove_page(struct mmu_gather *tlb, struct page *page) +{ + tlb->need_flush = 1; + free_page_and_swap_cache(page); +} + #define pte_free_tlb(tlb,ptep) pte_free(ptep) #define pmd_free_tlb(tlb,pmdp) pmd_free(pmdp) diff --git a/include/asm-generic/4level-fixup.h b/include/asm-generic/4level-fixup.h index c20ec257ecc0..68c6fea994d9 100644 --- a/include/asm-generic/4level-fixup.h +++ b/include/asm-generic/4level-fixup.h @@ -10,14 +10,9 @@ #define pud_t pgd_t -#define pmd_alloc(mm, pud, address) \ -({ pmd_t *ret; \ - if (pgd_none(*pud)) \ - ret = __pmd_alloc(mm, pud, address); \ - else \ - ret = pmd_offset(pud, address); \ - ret; \ -}) +#define pmd_alloc(mm, pud, address) \ + ((unlikely(pgd_none(*(pud))) && __pmd_alloc(mm, pud, address))? \ + NULL: pmd_offset(pud, address)) #define pud_alloc(mm, pgd, address) (pgd) #define pud_offset(pgd, start) (pgd) diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index ff28c8b31f58..7dca30a26c53 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -8,7 +8,7 @@ * - update the page tables * - inform the TLB about the new one * - * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock. + * We hold the mm semaphore for reading, and the pte lock. * * Note: the old pte is known to not be writable, so we don't need to * worry about dirty bits etc getting lost. diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 7d0298347ee7..cdd4145243cd 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -35,16 +35,13 @@ #endif /* struct mmu_gather is an opaque type used by the mm code for passing around - * any data needed by arch specific code for tlb_remove_page. This structure - * can be per-CPU or per-MM as the page table lock is held for the duration of - * TLB shootdown. + * any data needed by arch specific code for tlb_remove_page. */ struct mmu_gather { struct mm_struct *mm; unsigned int nr; /* set to ~0U means fast mode */ unsigned int need_flush;/* Really unmapped some ptes? */ unsigned int fullmm; /* non-zero means full mm flush */ - unsigned long freed; struct page * pages[FREE_PTE_NR]; }; @@ -57,7 +54,7 @@ DECLARE_PER_CPU(struct mmu_gather, mmu_gathers); static inline struct mmu_gather * tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush) { - struct mmu_gather *tlb = &per_cpu(mmu_gathers, smp_processor_id()); + struct mmu_gather *tlb = &get_cpu_var(mmu_gathers); tlb->mm = mm; @@ -65,7 +62,6 @@ tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush) tlb->nr = num_online_cpus() > 1 ? 0U : ~0U; tlb->fullmm = full_mm_flush; - tlb->freed = 0; return tlb; } @@ -85,28 +81,17 @@ tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) /* tlb_finish_mmu * Called at the end of the shootdown operation to free up any resources - * that were required. The page table lock is still held at this point. + * that were required. */ static inline void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) { - int freed = tlb->freed; - struct mm_struct *mm = tlb->mm; - int rss = get_mm_counter(mm, rss); - - if (rss < freed) - freed = rss; - add_mm_counter(mm, rss, -freed); tlb_flush_mmu(tlb, start, end); /* keep the page table cache within bounds */ check_pgt_cache(); -} -static inline unsigned int -tlb_is_full_mm(struct mmu_gather *tlb) -{ - return tlb->fullmm; + put_cpu_var(mmu_gathers); } /* tlb_remove_page diff --git a/include/asm-i386/mmzone.h b/include/asm-i386/mmzone.h index 348fe3a4879d..620a90641ea8 100644 --- a/include/asm-i386/mmzone.h +++ b/include/asm-i386/mmzone.h @@ -88,12 +88,6 @@ static inline int pfn_to_nid(unsigned long pfn) __pgdat->node_start_pfn + __pgdat->node_spanned_pages; \ }) -#define local_mapnr(kvaddr) \ -({ \ - unsigned long __pfn = __pa(kvaddr) >> PAGE_SHIFT; \ - (__pfn - node_start_pfn(pfn_to_nid(__pfn))); \ -}) - /* XXX: FIXME -- wli */ #define kern_addr_valid(kaddr) (0) diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index d101ac414f07..0e3ec809352d 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -203,7 +203,8 @@ extern unsigned long pg0[]; #define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE)) #define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0) -#define pmd_none(x) (!pmd_val(x)) +/* To avoid harmful races, pmd_none(x) should check only the lower when PAE */ +#define pmd_none(x) (!(unsigned long)pmd_val(x)) #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) #define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) diff --git a/include/asm-i386/rwsem.h b/include/asm-i386/rwsem.h index 7625a675852f..be4ab859238e 100644 --- a/include/asm-i386/rwsem.h +++ b/include/asm-i386/rwsem.h @@ -284,5 +284,10 @@ LOCK_PREFIX "xadd %0,(%2)" return tmp+delta; } +static inline int rwsem_is_locked(struct rw_semaphore *sem) +{ + return (sem->count != 0); +} + #endif /* __KERNEL__ */ #endif /* _I386_RWSEM_H */ diff --git a/include/asm-ia64/rwsem.h b/include/asm-ia64/rwsem.h index e18b5ab0cb75..1327c91ea39c 100644 --- a/include/asm-ia64/rwsem.h +++ b/include/asm-ia64/rwsem.h @@ -186,4 +186,9 @@ __downgrade_write (struct rw_semaphore *sem) #define rwsem_atomic_add(delta, sem) atomic64_add(delta, (atomic64_t *)(&(sem)->count)) #define rwsem_atomic_update(delta, sem) atomic64_add_return(delta, (atomic64_t *)(&(sem)->count)) +static inline int rwsem_is_locked(struct rw_semaphore *sem) +{ + return (sem->count != 0); +} + #endif /* _ASM_IA64_RWSEM_H */ diff --git a/include/asm-ia64/tlb.h b/include/asm-ia64/tlb.h index 3a9a6d1be75c..834370b9dea1 100644 --- a/include/asm-ia64/tlb.h +++ b/include/asm-ia64/tlb.h @@ -60,7 +60,6 @@ struct mmu_gather { unsigned int nr; /* == ~0U => fast mode */ unsigned char fullmm; /* non-zero means full mm flush */ unsigned char need_flush; /* really unmapped some PTEs? */ - unsigned long freed; /* number of pages freed */ unsigned long start_addr; unsigned long end_addr; struct page *pages[FREE_PTE_NR]; @@ -129,7 +128,7 @@ ia64_tlb_flush_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long e static inline struct mmu_gather * tlb_gather_mmu (struct mm_struct *mm, unsigned int full_mm_flush) { - struct mmu_gather *tlb = &__get_cpu_var(mmu_gathers); + struct mmu_gather *tlb = &get_cpu_var(mmu_gathers); tlb->mm = mm; /* @@ -147,25 +146,17 @@ tlb_gather_mmu (struct mm_struct *mm, unsigned int full_mm_flush) */ tlb->nr = (num_online_cpus() == 1) ? ~0U : 0; tlb->fullmm = full_mm_flush; - tlb->freed = 0; tlb->start_addr = ~0UL; return tlb; } /* * Called at the end of the shootdown operation to free up any resources that were - * collected. The page table lock is still held at this point. + * collected. */ static inline void tlb_finish_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end) { - unsigned long freed = tlb->freed; - struct mm_struct *mm = tlb->mm; - unsigned long rss = get_mm_counter(mm, rss); - - if (rss < freed) - freed = rss; - add_mm_counter(mm, rss, -freed); /* * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and * tlb->end_addr. @@ -174,12 +165,8 @@ tlb_finish_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end) /* keep the page table cache within bounds */ check_pgt_cache(); -} -static inline unsigned int -tlb_is_full_mm(struct mmu_gather *tlb) -{ - return tlb->fullmm; + put_cpu_var(mmu_gathers); } /* diff --git a/include/asm-m32r/mmzone.h b/include/asm-m32r/mmzone.h index d58878ec899e..adc7970a77ec 100644 --- a/include/asm-m32r/mmzone.h +++ b/include/asm-m32r/mmzone.h @@ -21,12 +21,6 @@ extern struct pglist_data *node_data[]; __pgdat->node_start_pfn + __pgdat->node_spanned_pages - 1; \ }) -#define local_mapnr(kvaddr) \ -({ \ - unsigned long __pfn = __pa(kvaddr) >> PAGE_SHIFT; \ - (__pfn - node_start_pfn(pfn_to_nid(__pfn))); \ -}) - #define pfn_to_page(pfn) \ ({ \ unsigned long __pfn = pfn; \ diff --git a/include/asm-parisc/cacheflush.h b/include/asm-parisc/cacheflush.h index aa592d8c0e39..1bc3c83ee74b 100644 --- a/include/asm-parisc/cacheflush.h +++ b/include/asm-parisc/cacheflush.h @@ -100,30 +100,34 @@ static inline void flush_cache_range(struct vm_area_struct *vma, /* Simple function to work out if we have an existing address translation * for a user space vma. */ -static inline pte_t *__translation_exists(struct mm_struct *mm, - unsigned long addr) +static inline int translation_exists(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn) { - pgd_t *pgd = pgd_offset(mm, addr); + pgd_t *pgd = pgd_offset(vma->vm_mm, addr); pmd_t *pmd; - pte_t *pte; + pte_t pte; if(pgd_none(*pgd)) - return NULL; + return 0; pmd = pmd_offset(pgd, addr); if(pmd_none(*pmd) || pmd_bad(*pmd)) - return NULL; + return 0; - pte = pte_offset_map(pmd, addr); + /* We cannot take the pte lock here: flush_cache_page is usually + * called with pte lock already held. Whereas flush_dcache_page + * takes flush_dcache_mmap_lock, which is lower in the hierarchy: + * the vma itself is secure, but the pte might come or go racily. + */ + pte = *pte_offset_map(pmd, addr); + /* But pte_unmap() does nothing on this architecture */ - /* The PA flush mappings show up as pte_none, but they're - * valid none the less */ - if(pte_none(*pte) && ((pte_val(*pte) & _PAGE_FLUSH) == 0)) - return NULL; - return pte; -} -#define translation_exists(vma, addr) __translation_exists((vma)->vm_mm, addr) + /* Filter out coincidental file entries and swap entries */ + if (!(pte_val(pte) & (_PAGE_FLUSH|_PAGE_PRESENT))) + return 0; + return pte_pfn(pte) == pfn; +} /* Private function to flush a page from the cache of a non-current * process. cr25 contains the Page Directory of the current user @@ -175,9 +179,8 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long { BUG_ON(!vma->vm_mm->context); - if(likely(translation_exists(vma, vmaddr))) + if (likely(translation_exists(vma, vmaddr, pfn))) __flush_cache_page(vma, vmaddr); } #endif - diff --git a/include/asm-parisc/mmzone.h b/include/asm-parisc/mmzone.h index 595d3dce120a..ae039f4fd711 100644 --- a/include/asm-parisc/mmzone.h +++ b/include/asm-parisc/mmzone.h @@ -27,12 +27,6 @@ extern struct node_map_data node_data[]; }) #define node_localnr(pfn, nid) ((pfn) - node_start_pfn(nid)) -#define local_mapnr(kvaddr) \ -({ \ - unsigned long __pfn = __pa(kvaddr) >> PAGE_SHIFT; \ - (__pfn - node_start_pfn(pfn_to_nid(__pfn))); \ -}) - #define pfn_to_page(pfn) \ ({ \ unsigned long __pfn = (pfn); \ diff --git a/include/asm-parisc/tlbflush.h b/include/asm-parisc/tlbflush.h index 84af4ab1fe51..e97aa8d1eff5 100644 --- a/include/asm-parisc/tlbflush.h +++ b/include/asm-parisc/tlbflush.h @@ -88,7 +88,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, if (npages >= 512) /* 2MB of space: arbitrary, should be tuned */ flush_tlb_all(); else { - + preempt_disable(); mtsp(vma->vm_mm->context,1); purge_tlb_start(); if (split_tlb) { @@ -102,6 +102,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, pdtlb(start); start += PAGE_SIZE; } + preempt_enable(); } purge_tlb_end(); } diff --git a/include/asm-ppc/rwsem.h b/include/asm-ppc/rwsem.h index 3e738f483c11..3501ea72f88c 100644 --- a/include/asm-ppc/rwsem.h +++ b/include/asm-ppc/rwsem.h @@ -168,5 +168,10 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) return atomic_add_return(delta, (atomic_t *)(&sem->count)); } +static inline int rwsem_is_locked(struct rw_semaphore *sem) +{ + return (sem->count != 0); +} + #endif /* __KERNEL__ */ #endif /* _PPC_RWSEM_XADD_H */ diff --git a/include/asm-ppc64/mmzone.h b/include/asm-ppc64/mmzone.h index ed473f4b0152..80a708e7093a 100644 --- a/include/asm-ppc64/mmzone.h +++ b/include/asm-ppc64/mmzone.h @@ -67,9 +67,6 @@ static inline int pa_to_nid(unsigned long pa) #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) #define node_end_pfn(nid) (NODE_DATA(nid)->node_end_pfn) -#define local_mapnr(kvaddr) \ - ( (__pa(kvaddr) >> PAGE_SHIFT) - node_start_pfn(kvaddr_to_nid(kvaddr)) - #ifdef CONFIG_DISCONTIGMEM /* diff --git a/include/asm-ppc64/pgtable.h b/include/asm-ppc64/pgtable.h index c83679c9d2b0..2eb1778a3a15 100644 --- a/include/asm-ppc64/pgtable.h +++ b/include/asm-ppc64/pgtable.h @@ -478,10 +478,12 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, #define __HAVE_ARCH_PTE_SAME #define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0) +#define pte_ERROR(e) \ + printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) #define pmd_ERROR(e) \ printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) #define pud_ERROR(e) \ - printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pud_val(e)) + printk("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e)) #define pgd_ERROR(e) \ printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) diff --git a/include/asm-ppc64/rwsem.h b/include/asm-ppc64/rwsem.h index bd5c2f093575..7a647fae3765 100644 --- a/include/asm-ppc64/rwsem.h +++ b/include/asm-ppc64/rwsem.h @@ -163,5 +163,10 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) return atomic_add_return(delta, (atomic_t *)(&sem->count)); } +static inline int rwsem_is_locked(struct rw_semaphore *sem) +{ + return (sem->count != 0); +} + #endif /* __KERNEL__ */ #endif /* _PPC_RWSEM_XADD_H */ diff --git a/include/asm-s390/rwsem.h b/include/asm-s390/rwsem.h index 8c0cebbfc034..0422a085dd56 100644 --- a/include/asm-s390/rwsem.h +++ b/include/asm-s390/rwsem.h @@ -351,5 +351,10 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) return new; } +static inline int rwsem_is_locked(struct rw_semaphore *sem) +{ + return (sem->count != 0); +} + #endif /* __KERNEL__ */ #endif /* _S390_RWSEM_H */ diff --git a/include/asm-sh/rwsem.h b/include/asm-sh/rwsem.h index 1be4337f5259..0262d3d1e5e0 100644 --- a/include/asm-sh/rwsem.h +++ b/include/asm-sh/rwsem.h @@ -166,5 +166,10 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) return atomic_add_return(delta, (atomic_t *)(&sem->count)); } +static inline int rwsem_is_locked(struct rw_semaphore *sem) +{ + return (sem->count != 0); +} + #endif /* __KERNEL__ */ #endif /* _ASM_SH_RWSEM_H */ diff --git a/include/asm-sparc64/rwsem.h b/include/asm-sparc64/rwsem.h index 4568ee4022df..cef5e8270421 100644 --- a/include/asm-sparc64/rwsem.h +++ b/include/asm-sparc64/rwsem.h @@ -56,6 +56,11 @@ static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) atomic_add(delta, (atomic_t *)(&sem->count)); } +static inline int rwsem_is_locked(struct rw_semaphore *sem) +{ + return (sem->count != 0); +} + #endif /* __KERNEL__ */ #endif /* _SPARC64_RWSEM_H */ diff --git a/include/asm-sparc64/tlb.h b/include/asm-sparc64/tlb.h index 9baf57db01d2..66138d959df5 100644 --- a/include/asm-sparc64/tlb.h +++ b/include/asm-sparc64/tlb.h @@ -25,9 +25,8 @@ struct mmu_gather { struct mm_struct *mm; unsigned int pages_nr; unsigned int need_flush; - unsigned int tlb_frozen; + unsigned int fullmm; unsigned int tlb_nr; - unsigned long freed; unsigned long vaddrs[TLB_BATCH_NR]; struct page *pages[FREE_PTE_NR]; }; @@ -44,14 +43,13 @@ extern void flush_tlb_pending(void); static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush) { - struct mmu_gather *mp = &__get_cpu_var(mmu_gathers); + struct mmu_gather *mp = &get_cpu_var(mmu_gathers); BUG_ON(mp->tlb_nr); mp->mm = mm; mp->pages_nr = num_online_cpus() > 1 ? 0U : ~0U; - mp->tlb_frozen = full_mm_flush; - mp->freed = 0; + mp->fullmm = full_mm_flush; return mp; } @@ -78,30 +76,19 @@ extern void smp_flush_tlb_mm(struct mm_struct *mm); static inline void tlb_finish_mmu(struct mmu_gather *mp, unsigned long start, unsigned long end) { - unsigned long freed = mp->freed; - struct mm_struct *mm = mp->mm; - unsigned long rss = get_mm_counter(mm, rss); - - if (rss < freed) - freed = rss; - add_mm_counter(mm, rss, -freed); - tlb_flush_mmu(mp); - if (mp->tlb_frozen) { - if (CTX_VALID(mm->context)) - do_flush_tlb_mm(mm); - mp->tlb_frozen = 0; + if (mp->fullmm) { + if (CTX_VALID(mp->mm->context)) + do_flush_tlb_mm(mp->mm); + mp->fullmm = 0; } else flush_tlb_pending(); /* keep the page table cache within bounds */ check_pgt_cache(); -} -static inline unsigned int tlb_is_full_mm(struct mmu_gather *mp) -{ - return mp->tlb_frozen; + put_cpu_var(mmu_gathers); } static inline void tlb_remove_page(struct mmu_gather *mp, struct page *page) diff --git a/include/asm-um/pgtable.h b/include/asm-um/pgtable.h index 616d02b57ea9..ac64eb955868 100644 --- a/include/asm-um/pgtable.h +++ b/include/asm-um/pgtable.h @@ -138,7 +138,7 @@ extern unsigned long pg0[1024]; #define pte_clear(mm,addr,xp) pte_set_val(*(xp), (phys_t) 0, __pgprot(_PAGE_NEWPAGE)) -#define pmd_none(x) (!(pmd_val(x) & ~_PAGE_NEWPAGE)) +#define pmd_none(x) (!((unsigned long)pmd_val(x) & ~_PAGE_NEWPAGE)) #define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) #define pmd_clear(xp) do { pmd_val(*(xp)) = _PAGE_NEWPAGE; } while (0) diff --git a/include/asm-x86_64/rwsem.h b/include/asm-x86_64/rwsem.h index c002175b6e82..46077e9c1910 100644 --- a/include/asm-x86_64/rwsem.h +++ b/include/asm-x86_64/rwsem.h @@ -274,5 +274,10 @@ LOCK_PREFIX "xaddl %0,(%2)" return tmp+delta; } +static inline int rwsem_is_locked(struct rw_semaphore *sem) +{ + return (sem->count != 0); +} + #endif /* __KERNEL__ */ #endif /* _X8664_RWSEM_H */ diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 88af42f5e04a..c937d6e65502 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -126,8 +126,8 @@ BUFFER_FNS(Eopnotsupp, eopnotsupp) /* If we *know* page->private refers to buffer_heads */ #define page_buffers(page) \ ({ \ - BUG_ON(!PagePrivate(page)); \ - ((struct buffer_head *)(page)->private); \ + BUG_ON(!PagePrivate(page)); \ + ((struct buffer_head *)page_private(page)); \ }) #define page_has_buffers(page) PagePrivate(page) @@ -219,7 +219,7 @@ static inline void attach_page_buffers(struct page *page, { page_cache_get(page); SetPagePrivate(page); - page->private = (unsigned long)head; + set_page_private(page, (unsigned long)head); } static inline void get_bh(struct buffer_head *bh) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index d664330d900e..0cea162b08c0 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -16,7 +16,6 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int); -void zap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); int hugetlb_prefault(struct address_space *, struct vm_area_struct *); int hugetlb_report_meminfo(char *); @@ -87,7 +86,6 @@ static inline unsigned long hugetlb_total_pages(void) #define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL) #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) #define hugetlb_prefault(mapping, vma) ({ BUG(); 0; }) -#define zap_hugepage_range(vma, start, len) BUG() #define unmap_hugepage_range(vma, start, end) BUG() #define is_hugepage_mem_enough(size) 0 #define hugetlb_report_meminfo(buf) 0 diff --git a/include/linux/memory.h b/include/linux/memory.h new file mode 100644 index 000000000000..0def328ab5cf --- /dev/null +++ b/include/linux/memory.h @@ -0,0 +1,94 @@ +/* + * include/linux/memory.h - generic memory definition + * + * This is mainly for topological representation. We define the + * basic "struct memory_block" here, which can be embedded in per-arch + * definitions or NUMA information. + * + * Basic handling of the devices is done in drivers/base/memory.c + * and system devices are handled in drivers/base/sys.c. + * + * Memory block are exported via sysfs in the class/memory/devices/ + * directory. + * + */ +#ifndef _LINUX_MEMORY_H_ +#define _LINUX_MEMORY_H_ + +#include <linux/sysdev.h> +#include <linux/node.h> +#include <linux/compiler.h> + +#include <asm/semaphore.h> + +struct memory_block { + unsigned long phys_index; + unsigned long state; + /* + * This serializes all state change requests. It isn't + * held during creation because the control files are + * created long after the critical areas during + * initialization. + */ + struct semaphore state_sem; + int phys_device; /* to which fru does this belong? */ + void *hw; /* optional pointer to fw/hw data */ + int (*phys_callback)(struct memory_block *); + struct sys_device sysdev; +}; + +/* These states are exposed to userspace as text strings in sysfs */ +#define MEM_ONLINE (1<<0) /* exposed to userspace */ +#define MEM_GOING_OFFLINE (1<<1) /* exposed to userspace */ +#define MEM_OFFLINE (1<<2) /* exposed to userspace */ + +/* + * All of these states are currently kernel-internal for notifying + * kernel components and architectures. + * + * For MEM_MAPPING_INVALID, all notifier chains with priority >0 + * are called before pfn_to_page() becomes invalid. The priority=0 + * entry is reserved for the function that actually makes + * pfn_to_page() stop working. Any notifiers that want to be called + * after that should have priority <0. + */ +#define MEM_MAPPING_INVALID (1<<3) + +#ifndef CONFIG_MEMORY_HOTPLUG +static inline int memory_dev_init(void) +{ + return 0; +} +static inline int register_memory_notifier(struct notifier_block *nb) +{ + return 0; +} +static inline void unregister_memory_notifier(struct notifier_block *nb) +{ +} +#else +extern int register_memory(struct memory_block *, struct mem_section *section, struct node *); +extern int register_new_memory(struct mem_section *); +extern int unregister_memory_section(struct mem_section *); +extern int memory_dev_init(void); +extern int register_memory_notifier(struct notifier_block *nb); +extern void unregister_memory_notifier(struct notifier_block *nb); + +#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT) + +extern int invalidate_phys_mapping(unsigned long, unsigned long); +struct notifier_block; + +extern int register_memory_notifier(struct notifier_block *nb); +extern void unregister_memory_notifier(struct notifier_block *nb); + +extern struct sysdev_class memory_sysdev_class; +#endif /* CONFIG_MEMORY_HOTPLUG */ + +#define hotplug_memory_notifier(fn, pri) { \ + static struct notifier_block fn##_mem_nb = \ + { .notifier_call = fn, .priority = pri }; \ + register_memory_notifier(&fn##_mem_nb); \ +} + +#endif /* _LINUX_MEMORY_H_ */ diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h new file mode 100644 index 000000000000..01f03bc06eff --- /dev/null +++ b/include/linux/memory_hotplug.h @@ -0,0 +1,104 @@ +#ifndef __LINUX_MEMORY_HOTPLUG_H +#define __LINUX_MEMORY_HOTPLUG_H + +#include <linux/mmzone.h> +#include <linux/spinlock.h> +#include <linux/mmzone.h> +#include <linux/notifier.h> + +#ifdef CONFIG_MEMORY_HOTPLUG +/* + * pgdat resizing functions + */ +static inline +void pgdat_resize_lock(struct pglist_data *pgdat, unsigned long *flags) +{ + spin_lock_irqsave(&pgdat->node_size_lock, *flags); +} +static inline +void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags) +{ + spin_unlock_irqrestore(&pgdat->node_size_lock, *flags); +} +static inline +void pgdat_resize_init(struct pglist_data *pgdat) +{ + spin_lock_init(&pgdat->node_size_lock); +} +/* + * Zone resizing functions + */ +static inline unsigned zone_span_seqbegin(struct zone *zone) +{ + return read_seqbegin(&zone->span_seqlock); +} +static inline int zone_span_seqretry(struct zone *zone, unsigned iv) +{ + return read_seqretry(&zone->span_seqlock, iv); +} +static inline void zone_span_writelock(struct zone *zone) +{ + write_seqlock(&zone->span_seqlock); +} +static inline void zone_span_writeunlock(struct zone *zone) +{ + write_sequnlock(&zone->span_seqlock); +} +static inline void zone_seqlock_init(struct zone *zone) +{ + seqlock_init(&zone->span_seqlock); +} +extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages); +extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages); +extern int add_one_highpage(struct page *page, int pfn, int bad_ppro); +/* need some defines for these for archs that don't support it */ +extern void online_page(struct page *page); +/* VM interface that may be used by firmware interface */ +extern int add_memory(u64 start, u64 size); +extern int remove_memory(u64 start, u64 size); +extern int online_pages(unsigned long, unsigned long); + +/* reasonably generic interface to expand the physical pages in a zone */ +extern int __add_pages(struct zone *zone, unsigned long start_pfn, + unsigned long nr_pages); +#else /* ! CONFIG_MEMORY_HOTPLUG */ +/* + * Stub functions for when hotplug is off + */ +static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {} +static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {} +static inline void pgdat_resize_init(struct pglist_data *pgdat) {} + +static inline unsigned zone_span_seqbegin(struct zone *zone) +{ + return 0; +} +static inline int zone_span_seqretry(struct zone *zone, unsigned iv) +{ + return 0; +} +static inline void zone_span_writelock(struct zone *zone) {} +static inline void zone_span_writeunlock(struct zone *zone) {} +static inline void zone_seqlock_init(struct zone *zone) {} + +static inline int mhp_notimplemented(const char *func) +{ + printk(KERN_WARNING "%s() called, with CONFIG_MEMORY_HOTPLUG disabled\n", func); + dump_stack(); + return -ENOSYS; +} + +static inline int __add_pages(struct zone *zone, unsigned long start_pfn, + unsigned long nr_pages) +{ + return mhp_notimplemented(__FUNCTION__); +} +#endif /* ! CONFIG_MEMORY_HOTPLUG */ +static inline int __remove_pages(struct zone *zone, unsigned long start_pfn, + unsigned long nr_pages) +{ + printk(KERN_WARNING "%s() called, not yet supported\n", __FUNCTION__); + dump_stack(); + return -ENOSYS; +} +#endif /* __LINUX_MEMORY_HOTPLUG_H */ diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 58385ee1c0ac..7af8cb836e78 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -27,10 +27,10 @@ #include <linux/config.h> #include <linux/mmzone.h> -#include <linux/bitmap.h> #include <linux/slab.h> #include <linux/rbtree.h> #include <linux/spinlock.h> +#include <linux/nodemask.h> struct vm_area_struct; @@ -47,8 +47,7 @@ struct vm_area_struct; * Locking policy for interlave: * In process context there is no locking because only the process accesses * its own state. All vma manipulation is somewhat protected by a down_read on - * mmap_sem. For allocating in the interleave policy the page_table_lock - * must be also aquired to protect il_next. + * mmap_sem. * * Freeing policy: * When policy is MPOL_BIND v.zonelist is kmalloc'ed and must be kfree'd. @@ -63,7 +62,7 @@ struct mempolicy { union { struct zonelist *zonelist; /* bind */ short preferred_node; /* preferred */ - DECLARE_BITMAP(nodes, MAX_NUMNODES); /* interleave */ + nodemask_t nodes; /* interleave */ /* undefined for default */ } v; }; diff --git a/include/linux/mm.h b/include/linux/mm.h index e1649578fb0c..5c1fb0a2e806 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -157,7 +157,7 @@ extern unsigned int kobjsize(const void *objp); #define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */ #define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */ -#define VM_RESERVED 0x00080000 /* Don't unmap it from swap_out */ +#define VM_RESERVED 0x00080000 /* Pages managed in a special way */ #define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ @@ -226,13 +226,18 @@ struct page { * to show when page is mapped * & limit reverse map searches. */ - unsigned long private; /* Mapping-private opaque data: + union { + unsigned long private; /* Mapping-private opaque data: * usually used for buffer_heads * if PagePrivate set; used for * swp_entry_t if PageSwapCache * When page is free, this indicates * order in the buddy system. */ +#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS + spinlock_t ptl; +#endif + } u; struct address_space *mapping; /* If low bit clear, points to * inode address_space, or NULL. * If page mapped as anonymous @@ -260,6 +265,9 @@ struct page { #endif /* WANT_PAGE_VIRTUAL */ }; +#define page_private(page) ((page)->u.private) +#define set_page_private(page, v) ((page)->u.private = (v)) + /* * FIXME: take this include out, include page-flags.h in * files which need it (119 of them) @@ -311,17 +319,17 @@ extern void FASTCALL(__page_cache_release(struct page *)); #ifdef CONFIG_HUGETLB_PAGE -static inline int page_count(struct page *p) +static inline int page_count(struct page *page) { - if (PageCompound(p)) - p = (struct page *)p->private; - return atomic_read(&(p)->_count) + 1; + if (PageCompound(page)) + page = (struct page *)page_private(page); + return atomic_read(&page->_count) + 1; } static inline void get_page(struct page *page) { if (unlikely(PageCompound(page))) - page = (struct page *)page->private; + page = (struct page *)page_private(page); atomic_inc(&page->_count); } @@ -338,7 +346,7 @@ static inline void get_page(struct page *page) static inline void put_page(struct page *page) { - if (!PageReserved(page) && put_page_testzero(page)) + if (put_page_testzero(page)) __page_cache_release(page); } @@ -587,7 +595,7 @@ static inline int PageAnon(struct page *page) static inline pgoff_t page_index(struct page *page) { if (unlikely(PageSwapCache(page))) - return page->private; + return page_private(page); return page->index; } @@ -682,7 +690,7 @@ struct zap_details { unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned long size, struct zap_details *); -unsigned long unmap_vmas(struct mmu_gather **tlb, struct mm_struct *mm, +unsigned long unmap_vmas(struct mmu_gather **tlb, struct vm_area_struct *start_vma, unsigned long start_addr, unsigned long end_addr, unsigned long *nr_accounted, struct zap_details *); @@ -704,10 +712,6 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, } extern int vmtruncate(struct inode * inode, loff_t offset); -extern pud_t *FASTCALL(__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)); -extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)); -extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); -extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot); extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot); extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access); @@ -723,6 +727,7 @@ void install_arg_page(struct vm_area_struct *, struct page *, unsigned long); int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int len, int write, int force, struct page **pages, struct vm_area_struct **vmas); +void print_bad_pte(struct vm_area_struct *, pte_t, unsigned long); int __set_page_dirty_buffers(struct page *page); int __set_page_dirty_nobuffers(struct page *page); @@ -759,38 +764,83 @@ struct shrinker; extern struct shrinker *set_shrinker(int, shrinker_t); extern void remove_shrinker(struct shrinker *shrinker); -/* - * On a two-level or three-level page table, this ends up being trivial. Thus - * the inlining and the symmetry break with pte_alloc_map() that does all - * of this out-of-line. - */ +int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); +int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address); +int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address); +int __pte_alloc_kernel(pmd_t *pmd, unsigned long address); + /* * The following ifdef needed to get the 4level-fixup.h header to work. * Remove it when 4level-fixup.h has been removed. */ -#ifdef CONFIG_MMU -#ifndef __ARCH_HAS_4LEVEL_HACK +#if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK) static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) { - if (pgd_none(*pgd)) - return __pud_alloc(mm, pgd, address); - return pud_offset(pgd, address); + return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))? + NULL: pud_offset(pgd, address); } static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) { - if (pud_none(*pud)) - return __pmd_alloc(mm, pud, address); - return pmd_offset(pud, address); + return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))? + NULL: pmd_offset(pud, address); } -#endif -#endif /* CONFIG_MMU */ +#endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */ + +#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS +/* + * We tuck a spinlock to guard each pagetable page into its struct page, + * at page->private, with BUILD_BUG_ON to make sure that this will not + * overflow into the next struct page (as it might with DEBUG_SPINLOCK). + * When freeing, reset page->mapping so free_pages_check won't complain. + */ +#define __pte_lockptr(page) &((page)->u.ptl) +#define pte_lock_init(_page) do { \ + spin_lock_init(__pte_lockptr(_page)); \ +} while (0) +#define pte_lock_deinit(page) ((page)->mapping = NULL) +#define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));}) +#else +/* + * We use mm->page_table_lock to guard all pagetable pages of the mm. + */ +#define pte_lock_init(page) do {} while (0) +#define pte_lock_deinit(page) do {} while (0) +#define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;}) +#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ + +#define pte_offset_map_lock(mm, pmd, address, ptlp) \ +({ \ + spinlock_t *__ptl = pte_lockptr(mm, pmd); \ + pte_t *__pte = pte_offset_map(pmd, address); \ + *(ptlp) = __ptl; \ + spin_lock(__ptl); \ + __pte; \ +}) + +#define pte_unmap_unlock(pte, ptl) do { \ + spin_unlock(ptl); \ + pte_unmap(pte); \ +} while (0) + +#define pte_alloc_map(mm, pmd, address) \ + ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \ + NULL: pte_offset_map(pmd, address)) + +#define pte_alloc_map_lock(mm, pmd, address, ptlp) \ + ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \ + NULL: pte_offset_map_lock(mm, pmd, address, ptlp)) + +#define pte_alloc_kernel(pmd, address) \ + ((unlikely(!pmd_present(*(pmd))) && __pte_alloc_kernel(pmd, address))? \ + NULL: pte_offset_kernel(pmd, address)) extern void free_area_init(unsigned long * zones_size); extern void free_area_init_node(int nid, pg_data_t *pgdat, unsigned long * zones_size, unsigned long zone_start_pfn, unsigned long *zholes_size); extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long); +extern void setup_per_zone_pages_min(void); extern void mem_init(void); extern void show_mem(void); extern void si_meminfo(struct sysinfo * val); @@ -834,6 +884,7 @@ extern int split_vma(struct mm_struct *, extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *, struct rb_node **, struct rb_node *); +extern void unlink_file_vma(struct vm_area_struct *); extern struct vm_area_struct *copy_vma(struct vm_area_struct **, unsigned long addr, unsigned long len, pgoff_t pgoff); extern void exit_mmap(struct mm_struct *); @@ -894,7 +945,8 @@ void handle_ra_miss(struct address_space *mapping, unsigned long max_sane_readahead(unsigned long nr); /* Do stack extension */ -extern int expand_stack(struct vm_area_struct * vma, unsigned long address); +extern int expand_stack(struct vm_area_struct *vma, unsigned long address); +extern int expand_upwards(struct vm_area_struct *vma, unsigned long address); /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr); @@ -917,40 +969,28 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma) return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; } -extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr); +struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr); +struct page *vmalloc_to_page(void *addr); +unsigned long vmalloc_to_pfn(void *addr); +int remap_pfn_range(struct vm_area_struct *, unsigned long addr, + unsigned long pfn, unsigned long size, pgprot_t); -extern struct page * vmalloc_to_page(void *addr); -extern unsigned long vmalloc_to_pfn(void *addr); -extern struct page * follow_page(struct mm_struct *mm, unsigned long address, - int write); -extern int check_user_page_readable(struct mm_struct *mm, unsigned long address); -int remap_pfn_range(struct vm_area_struct *, unsigned long, - unsigned long, unsigned long, pgprot_t); +struct page *follow_page(struct mm_struct *, unsigned long address, + unsigned int foll_flags); +#define FOLL_WRITE 0x01 /* check pte is writable */ +#define FOLL_TOUCH 0x02 /* mark page accessed */ +#define FOLL_GET 0x04 /* do get_page on page */ +#define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */ #ifdef CONFIG_PROC_FS -void __vm_stat_account(struct mm_struct *, unsigned long, struct file *, long); +void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long); #else -static inline void __vm_stat_account(struct mm_struct *mm, +static inline void vm_stat_account(struct mm_struct *mm, unsigned long flags, struct file *file, long pages) { } #endif /* CONFIG_PROC_FS */ -static inline void vm_stat_account(struct vm_area_struct *vma) -{ - __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, - vma_pages(vma)); -} - -static inline void vm_stat_unaccount(struct vm_area_struct *vma) -{ - __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, - -vma_pages(vma)); -} - -/* update per process rss and vm hiwater data */ -extern void update_mem_hiwater(struct task_struct *tsk); - #ifndef CONFIG_DEBUG_PAGEALLOC static inline void kernel_map_pages(struct page *page, int numpages, int enable) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 7519eb4191e7..f5fa3082fd6a 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -12,6 +12,7 @@ #include <linux/threads.h> #include <linux/numa.h> #include <linux/init.h> +#include <linux/seqlock.h> #include <asm/atomic.h> /* Free memory management - zoned buddy allocator. */ @@ -137,6 +138,10 @@ struct zone { * free areas of different sizes */ spinlock_t lock; +#ifdef CONFIG_MEMORY_HOTPLUG + /* see spanned/present_pages for more description */ + seqlock_t span_seqlock; +#endif struct free_area free_area[MAX_ORDER]; @@ -220,6 +225,16 @@ struct zone { /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ unsigned long zone_start_pfn; + /* + * zone_start_pfn, spanned_pages and present_pages are all + * protected by span_seqlock. It is a seqlock because it has + * to be read outside of zone->lock, and it is done in the main + * allocator path. But, it is written quite infrequently. + * + * The lock is declared along with zone->lock because it is + * frequently read in proximity to zone->lock. It's good to + * give them a chance of being in the same cacheline. + */ unsigned long spanned_pages; /* total size, including holes */ unsigned long present_pages; /* amount of memory (excluding holes) */ @@ -273,6 +288,16 @@ typedef struct pglist_data { struct page *node_mem_map; #endif struct bootmem_data *bdata; +#ifdef CONFIG_MEMORY_HOTPLUG + /* + * Must be held any time you expect node_start_pfn, node_present_pages + * or node_spanned_pages stay constant. Holding this will also + * guarantee that any pfn_valid() stays that way. + * + * Nests above zone->lock and zone->size_seqlock. + */ + spinlock_t node_size_lock; +#endif unsigned long node_start_pfn; unsigned long node_present_pages; /* total number of physical pages */ unsigned long node_spanned_pages; /* total size of physical page @@ -293,6 +318,8 @@ typedef struct pglist_data { #endif #define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr)) +#include <linux/memory_hotplug.h> + extern struct pglist_data *pgdat_list; void __get_zone_counts(unsigned long *active, unsigned long *inactive, @@ -509,6 +536,7 @@ static inline struct mem_section *__nr_to_section(unsigned long nr) return NULL; return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; } +extern int __section_nr(struct mem_section* ms); /* * We use the lower bits of the mem_map pointer to store diff --git a/include/linux/rmap.h b/include/linux/rmap.h index e80fb7ee6efd..35b30e6c8cf8 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -95,8 +95,8 @@ int try_to_unmap(struct page *); /* * Called from mm/filemap_xip.c to unmap empty zero page */ -pte_t *page_check_address(struct page *, struct mm_struct *, unsigned long); - +pte_t *page_check_address(struct page *, struct mm_struct *, + unsigned long, spinlock_t **); /* * Used by swapoff to help locate where page is expected in vma. diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h index b52a2af25f1f..f30f805080ae 100644 --- a/include/linux/rwsem-spinlock.h +++ b/include/linux/rwsem-spinlock.h @@ -61,5 +61,10 @@ extern void FASTCALL(__up_read(struct rw_semaphore *sem)); extern void FASTCALL(__up_write(struct rw_semaphore *sem)); extern void FASTCALL(__downgrade_write(struct rw_semaphore *sem)); +static inline int rwsem_is_locked(struct rw_semaphore *sem) +{ + return (sem->activity != 0); +} + #endif /* __KERNEL__ */ #endif /* _LINUX_RWSEM_SPINLOCK_H */ diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 7f717e95ae37..66ff545552f7 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -1,14 +1,23 @@ #ifndef _LINUX_SCATTERLIST_H #define _LINUX_SCATTERLIST_H -static inline void sg_init_one(struct scatterlist *sg, - u8 *buf, unsigned int buflen) -{ - memset(sg, 0, sizeof(*sg)); +#include <asm/scatterlist.h> +#include <linux/mm.h> +#include <linux/string.h> +static inline void sg_set_buf(struct scatterlist *sg, void *buf, + unsigned int buflen) +{ sg->page = virt_to_page(buf); sg->offset = offset_in_page(buf); sg->length = buflen; } +static inline void sg_init_one(struct scatterlist *sg, void *buf, + unsigned int buflen) +{ + memset(sg, 0, sizeof(*sg)); + sg_set_buf(sg, buf, buflen); +} + #endif /* _LINUX_SCATTERLIST_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 27519df0f987..1c30bc308ef1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -249,6 +249,36 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, extern void arch_unmap_area(struct mm_struct *, unsigned long); extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); +#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS +/* + * The mm counters are not protected by its page_table_lock, + * so must be incremented atomically. + */ +#ifdef ATOMIC64_INIT +#define set_mm_counter(mm, member, value) atomic64_set(&(mm)->_##member, value) +#define get_mm_counter(mm, member) ((unsigned long)atomic64_read(&(mm)->_##member)) +#define add_mm_counter(mm, member, value) atomic64_add(value, &(mm)->_##member) +#define inc_mm_counter(mm, member) atomic64_inc(&(mm)->_##member) +#define dec_mm_counter(mm, member) atomic64_dec(&(mm)->_##member) +typedef atomic64_t mm_counter_t; +#else /* !ATOMIC64_INIT */ +/* + * The counters wrap back to 0 at 2^32 * PAGE_SIZE, + * that is, at 16TB if using 4kB page size. + */ +#define set_mm_counter(mm, member, value) atomic_set(&(mm)->_##member, value) +#define get_mm_counter(mm, member) ((unsigned long)atomic_read(&(mm)->_##member)) +#define add_mm_counter(mm, member, value) atomic_add(value, &(mm)->_##member) +#define inc_mm_counter(mm, member) atomic_inc(&(mm)->_##member) +#define dec_mm_counter(mm, member) atomic_dec(&(mm)->_##member) +typedef atomic_t mm_counter_t; +#endif /* !ATOMIC64_INIT */ + +#else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ +/* + * The mm counters are protected by its page_table_lock, + * so can be incremented directly. + */ #define set_mm_counter(mm, member, value) (mm)->_##member = (value) #define get_mm_counter(mm, member) ((mm)->_##member) #define add_mm_counter(mm, member, value) (mm)->_##member += (value) @@ -256,6 +286,20 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); #define dec_mm_counter(mm, member) (mm)->_##member-- typedef unsigned long mm_counter_t; +#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ + +#define get_mm_rss(mm) \ + (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss)) +#define update_hiwater_rss(mm) do { \ + unsigned long _rss = get_mm_rss(mm); \ + if ((mm)->hiwater_rss < _rss) \ + (mm)->hiwater_rss = _rss; \ +} while (0) +#define update_hiwater_vm(mm) do { \ + if ((mm)->hiwater_vm < (mm)->total_vm) \ + (mm)->hiwater_vm = (mm)->total_vm; \ +} while (0) + struct mm_struct { struct vm_area_struct * mmap; /* list of VMAs */ struct rb_root mm_rb; @@ -279,15 +323,20 @@ struct mm_struct { * by mmlist_lock */ + /* Special counters, in some configurations protected by the + * page_table_lock, in other configurations by being atomic. + */ + mm_counter_t _file_rss; + mm_counter_t _anon_rss; + + unsigned long hiwater_rss; /* High-watermark of RSS usage */ + unsigned long hiwater_vm; /* High-water virtual memory usage */ + + unsigned long total_vm, locked_vm, shared_vm, exec_vm; + unsigned long stack_vm, reserved_vm, def_flags, nr_ptes; unsigned long start_code, end_code, start_data, end_data; unsigned long start_brk, brk, start_stack; unsigned long arg_start, arg_end, env_start, env_end; - unsigned long total_vm, locked_vm, shared_vm; - unsigned long exec_vm, stack_vm, reserved_vm, def_flags, nr_ptes; - - /* Special counters protected by the page_table_lock */ - mm_counter_t _rss; - mm_counter_t _anon_rss; unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ @@ -308,11 +357,7 @@ struct mm_struct { /* aio bits */ rwlock_t ioctx_list_lock; struct kioctx *ioctx_list; - struct kioctx default_kioctx; - - unsigned long hiwater_rss; /* High-water RSS usage */ - unsigned long hiwater_vm; /* High-water virtual memory usage */ }; struct sighand_struct { diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 3701a0673d2c..1d5577b2b752 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -32,10 +32,14 @@ struct vm_struct { * Highlevel APIs for driver use */ extern void *vmalloc(unsigned long size); +extern void *vmalloc_node(unsigned long size, int node); extern void *vmalloc_exec(unsigned long size); extern void *vmalloc_32(unsigned long size); extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot); -extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot); +extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, + pgprot_t prot); +extern void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, + pgprot_t prot, int node); extern void vfree(void *addr); extern void *vmap(struct page **pages, unsigned int count, @@ -48,6 +52,8 @@ extern void vunmap(void *addr); extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags); extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, unsigned long start, unsigned long end); +extern struct vm_struct *get_vm_area_node(unsigned long size, + unsigned long flags, int node); extern struct vm_struct *remove_vm_area(void *addr); extern struct vm_struct *__remove_vm_area(void *addr); extern int map_vm_area(struct vm_struct *area, pgprot_t prot, |