diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-15 19:42:40 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-15 19:42:40 -0800 |
commit | 7c225c69f86c934e3be9be63ecde754e286838d7 (patch) | |
tree | ff2df419b0c4886b37407235f7d21215e4cf45e4 /mm/huge_memory.c | |
parent | 6363b3f3ac5be096d08c8c504128befa0c033529 (diff) | |
parent | 1b7176aea0a924ac59c6a283129d3e8eb00aa915 (diff) | |
download | linux-7c225c69f86c934e3be9be63ecde754e286838d7.tar.bz2 |
Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton:
- a few misc bits
- ocfs2 updates
- almost all of MM
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (131 commits)
memory hotplug: fix comments when adding section
mm: make alloc_node_mem_map a void call if we don't have CONFIG_FLAT_NODE_MEM_MAP
mm: simplify nodemask printing
mm,oom_reaper: remove pointless kthread_run() error check
mm/page_ext.c: check if page_ext is not prepared
writeback: remove unused function parameter
mm: do not rely on preempt_count in print_vma_addr
mm, sparse: do not swamp log with huge vmemmap allocation failures
mm/hmm: remove redundant variable align_end
mm/list_lru.c: mark expected switch fall-through
mm/shmem.c: mark expected switch fall-through
mm/page_alloc.c: broken deferred calculation
mm: don't warn about allocations which stall for too long
fs: fuse: account fuse_inode slab memory as reclaimable
mm, page_alloc: fix potential false positive in __zone_watermark_ok
mm: mlock: remove lru_add_drain_all()
mm, sysctl: make NUMA stats configurable
shmem: convert shmem_init_inodecache() to void
Unify migrate_pages and move_pages access checks
mm, pagevec: rename pagevec drained field
...
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 78 |
1 files changed, 65 insertions, 13 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 003f7bcd0952..86fe697e8bfb 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -606,7 +606,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable); set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry); add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR); - atomic_long_inc(&vma->vm_mm->nr_ptes); + mm_inc_nr_ptes(vma->vm_mm); spin_unlock(vmf->ptl); count_vm_event(THP_FAULT_ALLOC); } @@ -662,7 +662,7 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, if (pgtable) pgtable_trans_huge_deposit(mm, pmd, pgtable); set_pmd_at(mm, haddr, pmd, entry); - atomic_long_inc(&mm->nr_ptes); + mm_inc_nr_ptes(mm); return true; } @@ -747,7 +747,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, if (pgtable) { pgtable_trans_huge_deposit(mm, pmd, pgtable); - atomic_long_inc(&mm->nr_ptes); + mm_inc_nr_ptes(mm); } set_pmd_at(mm, addr, pmd, entry); @@ -942,7 +942,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, set_pmd_at(src_mm, addr, src_pmd, pmd); } add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR); - atomic_long_inc(&dst_mm->nr_ptes); + mm_inc_nr_ptes(dst_mm); pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable); set_pmd_at(dst_mm, addr, dst_pmd, pmd); ret = 0; @@ -978,7 +978,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, get_page(src_page); page_dup_rmap(src_page, true); add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR); - atomic_long_inc(&dst_mm->nr_ptes); + mm_inc_nr_ptes(dst_mm); pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable); pmdp_set_wrprotect(src_mm, addr, src_pmd); @@ -1189,8 +1189,15 @@ static int do_huge_pmd_wp_page_fallback(struct vm_fault *vmf, pmd_t orig_pmd, goto out_free_pages; VM_BUG_ON_PAGE(!PageHead(page), page); + /* + * Leave pmd empty until pte is filled note we must notify here as + * concurrent CPU thread might write to new page before the call to + * mmu_notifier_invalidate_range_end() happens which can lead to a + * device seeing memory write in different order than CPU. + * + * See Documentation/vm/mmu_notifier.txt + */ pmdp_huge_clear_flush_notify(vma, haddr, vmf->pmd); - /* leave pmd empty until pte is filled */ pgtable = pgtable_trans_huge_withdraw(vma->vm_mm, vmf->pmd); pmd_populate(vma->vm_mm, &_pmd, pgtable); @@ -1216,7 +1223,12 @@ static int do_huge_pmd_wp_page_fallback(struct vm_fault *vmf, pmd_t orig_pmd, page_remove_rmap(page, true); spin_unlock(vmf->ptl); - mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end); + /* + * No need to double call mmu_notifier->invalidate_range() callback as + * the above pmdp_huge_clear_flush_notify() did already call it. + */ + mmu_notifier_invalidate_range_only_end(vma->vm_mm, mmun_start, + mmun_end); ret |= VM_FAULT_WRITE; put_page(page); @@ -1365,7 +1377,12 @@ alloc: } spin_unlock(vmf->ptl); out_mn: - mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end); + /* + * No need to double call mmu_notifier->invalidate_range() callback as + * the above pmdp_huge_clear_flush_notify() did already call it. + */ + mmu_notifier_invalidate_range_only_end(vma->vm_mm, mmun_start, + mmun_end); out: return ret; out_unlock: @@ -1678,7 +1695,7 @@ static inline void zap_deposited_table(struct mm_struct *mm, pmd_t *pmd) pgtable = pgtable_trans_huge_withdraw(mm, pmd); pte_free(mm, pgtable); - atomic_long_dec(&mm->nr_ptes); + mm_dec_nr_ptes(mm); } int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, @@ -2017,7 +2034,12 @@ void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud, out: spin_unlock(ptl); - mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PUD_SIZE); + /* + * No need to double call mmu_notifier->invalidate_range() callback as + * the above pudp_huge_clear_flush_notify() did already call it. + */ + mmu_notifier_invalidate_range_only_end(mm, haddr, haddr + + HPAGE_PUD_SIZE); } #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ @@ -2029,8 +2051,15 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, pmd_t _pmd; int i; - /* leave pmd empty until pte is filled */ - pmdp_huge_clear_flush_notify(vma, haddr, pmd); + /* + * Leave pmd empty until pte is filled note that it is fine to delay + * notification until mmu_notifier_invalidate_range_end() as we are + * replacing a zero pmd write protected page with a zero pte write + * protected page. + * + * See Documentation/vm/mmu_notifier.txt + */ + pmdp_huge_clear_flush(vma, haddr, pmd); pgtable = pgtable_trans_huge_withdraw(mm, pmd); pmd_populate(mm, &_pmd, pgtable); @@ -2085,6 +2114,15 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, add_mm_counter(mm, MM_FILEPAGES, -HPAGE_PMD_NR); return; } else if (is_huge_zero_pmd(*pmd)) { + /* + * FIXME: Do we want to invalidate secondary mmu by calling + * mmu_notifier_invalidate_range() see comments below inside + * __split_huge_pmd() ? + * + * We are going from a zero huge page write protected to zero + * small page also write protected so it does not seems useful + * to invalidate secondary mmu at this time. + */ return __split_huge_zero_page_pmd(vma, haddr, pmd); } @@ -2220,7 +2258,21 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, __split_huge_pmd_locked(vma, pmd, haddr, freeze); out: spin_unlock(ptl); - mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PMD_SIZE); + /* + * No need to double call mmu_notifier->invalidate_range() callback. + * They are 3 cases to consider inside __split_huge_pmd_locked(): + * 1) pmdp_huge_clear_flush_notify() call invalidate_range() obvious + * 2) __split_huge_zero_page_pmd() read only zero page and any write + * fault will trigger a flush_notify before pointing to a new page + * (it is fine if the secondary mmu keeps pointing to the old zero + * page in the meantime) + * 3) Split a huge pmd into pte pointing to the same page. No need + * to invalidate secondary tlb entry they are all still valid. + * any further changes to individual pte will notify. So no need + * to call mmu_notifier->invalidate_range() + */ + mmu_notifier_invalidate_range_only_end(mm, haddr, haddr + + HPAGE_PMD_SIZE); } void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, |