diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-13 09:49:35 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-13 09:49:35 -0700 |
commit | 1251704a631b62591ad1d1b6ead252e9e597d5f5 (patch) | |
tree | bc394a069d3b8aef8c6dc147438c05cc9ba057aa /mm | |
parent | 0fcc3ab23d7395f58e8ab0834e7913e2e4314a83 (diff) | |
parent | b340959ea281dbac15344277094d0a294dbe8aca (diff) | |
download | linux-1251704a631b62591ad1d1b6ead252e9e597d5f5.tar.bz2 |
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton:
"15 fixes"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
mm, docs: update memory.stat description with workingset* entries
mm: vmscan: scan until it finds eligible pages
mm, thp: copying user pages must schedule on collapse
dax: fix PMD data corruption when fault races with write
dax: fix data corruption when fault races with write
ext4: return to starting transaction in ext4_dax_huge_fault()
mm: fix data corruption due to stale mmap reads
dax: prevent invalidation of mapped DAX entries
Tigran has moved
mm, vmalloc: fix vmalloc users tracking properly
mm/khugepaged: add missed tracepoint for collapse_huge_page_swapin
gcov: support GCC 7.1
mm, vmstat: Remove spurious WARN() during zoneinfo print
time: delete current_fs_time()
hwpoison, memcg: forcibly uncharge LRU pages
Diffstat (limited to 'mm')
-rw-r--r-- | mm/khugepaged.c | 11 | ||||
-rw-r--r-- | mm/memcontrol.c | 2 | ||||
-rw-r--r-- | mm/memory-failure.c | 7 | ||||
-rw-r--r-- | mm/truncate.c | 21 | ||||
-rw-r--r-- | mm/util.c | 3 | ||||
-rw-r--r-- | mm/vmalloc.c | 19 | ||||
-rw-r--r-- | mm/vmscan.c | 21 | ||||
-rw-r--r-- | mm/vmstat.c | 2 |
8 files changed, 63 insertions, 23 deletions
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 7cb9c88bb4a3..945fd1ca49b5 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -612,7 +612,8 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, spinlock_t *ptl) { pte_t *_pte; - for (_pte = pte; _pte < pte+HPAGE_PMD_NR; _pte++) { + for (_pte = pte; _pte < pte + HPAGE_PMD_NR; + _pte++, page++, address += PAGE_SIZE) { pte_t pteval = *_pte; struct page *src_page; @@ -651,9 +652,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, spin_unlock(ptl); free_page_and_swap_cache(src_page); } - - address += PAGE_SIZE; - page++; + cond_resched(); } } @@ -907,8 +906,10 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm, return false; } /* check if the pmd is still valid */ - if (mm_find_pmd(mm, address) != pmd) + if (mm_find_pmd(mm, address) != pmd) { + trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); return false; + } } if (ret & VM_FAULT_ERROR) { trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ff73899af61a..94172089f52f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5528,7 +5528,7 @@ static void uncharge_list(struct list_head *page_list) next = page->lru.next; VM_BUG_ON_PAGE(PageLRU(page), page); - VM_BUG_ON_PAGE(page_count(page), page); + VM_BUG_ON_PAGE(!PageHWPoison(page) && page_count(page), page); if (!page->mem_cgroup) continue; diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 73066b80d14a..2527dfeddb00 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -539,6 +539,13 @@ static int delete_from_lru_cache(struct page *p) */ ClearPageActive(p); ClearPageUnevictable(p); + + /* + * Poisoned page might never drop its ref count to 0 so we have + * to uncharge it manually from its memcg. + */ + mem_cgroup_uncharge(p); + /* * drop the page count elevated by isolate_lru_page() */ diff --git a/mm/truncate.c b/mm/truncate.c index 83a059e8cd1d..6479ed2afc53 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -67,17 +67,14 @@ static void truncate_exceptional_entry(struct address_space *mapping, /* * Invalidate exceptional entry if easily possible. This handles exceptional - * entries for invalidate_inode_pages() so for DAX it evicts only unlocked and - * clean entries. + * entries for invalidate_inode_pages(). */ static int invalidate_exceptional_entry(struct address_space *mapping, pgoff_t index, void *entry) { - /* Handled by shmem itself */ - if (shmem_mapping(mapping)) + /* Handled by shmem itself, or for DAX we do nothing. */ + if (shmem_mapping(mapping) || dax_mapping(mapping)) return 1; - if (dax_mapping(mapping)) - return dax_invalidate_mapping_entry(mapping, index); clear_shadow_entry(mapping, index, entry); return 1; } @@ -689,7 +686,17 @@ int invalidate_inode_pages2_range(struct address_space *mapping, cond_resched(); index++; } - + /* + * For DAX we invalidate page tables after invalidating radix tree. We + * could invalidate page tables while invalidating each entry however + * that would be expensive. And doing range unmapping before doesn't + * work as we have no cheap way to find whether radix tree entry didn't + * get remapped later. + */ + if (dax_mapping(mapping)) { + unmap_mapping_range(mapping, (loff_t)start << PAGE_SHIFT, + (loff_t)(end - start + 1) << PAGE_SHIFT, 0); + } out: cleancache_invalidate_inode(mapping); return ret; diff --git a/mm/util.c b/mm/util.c index 718154debc87..464df3489903 100644 --- a/mm/util.c +++ b/mm/util.c @@ -382,7 +382,8 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node) if (ret || size <= PAGE_SIZE) return ret; - return __vmalloc_node_flags(size, node, flags); + return __vmalloc_node_flags_caller(size, node, flags, + __builtin_return_address(0)); } EXPORT_SYMBOL(kvmalloc_node); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 194c22eccb9d..34a1c3e46ed7 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1649,6 +1649,9 @@ void *vmap(struct page **pages, unsigned int count, } EXPORT_SYMBOL(vmap); +static void *__vmalloc_node(unsigned long size, unsigned long align, + gfp_t gfp_mask, pgprot_t prot, + int node, const void *caller); static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot, int node) { @@ -1791,7 +1794,7 @@ fail: * with mm people. * */ -void *__vmalloc_node(unsigned long size, unsigned long align, +static void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, pgprot_t prot, int node, const void *caller) { @@ -1806,6 +1809,20 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) } EXPORT_SYMBOL(__vmalloc); +static inline void *__vmalloc_node_flags(unsigned long size, + int node, gfp_t flags) +{ + return __vmalloc_node(size, 1, flags, PAGE_KERNEL, + node, __builtin_return_address(0)); +} + + +void *__vmalloc_node_flags_caller(unsigned long size, int node, gfp_t flags, + void *caller) +{ + return __vmalloc_node(size, 1, flags, PAGE_KERNEL, node, caller); +} + /** * vmalloc - allocate virtually contiguous memory * @size: allocation size diff --git a/mm/vmscan.c b/mm/vmscan.c index 2f45c0520f43..8ad39bbc79e6 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1449,7 +1449,7 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec, * * Appropriate locks must be held before calling this function. * - * @nr_to_scan: The number of pages to look through on the list. + * @nr_to_scan: The number of eligible pages to look through on the list. * @lruvec: The LRU vector to pull pages from. * @dst: The temp list to put pages on to. * @nr_scanned: The number of pages that were scanned. @@ -1469,11 +1469,13 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, unsigned long nr_zone_taken[MAX_NR_ZONES] = { 0 }; unsigned long nr_skipped[MAX_NR_ZONES] = { 0, }; unsigned long skipped = 0; - unsigned long scan, nr_pages; + unsigned long scan, total_scan, nr_pages; LIST_HEAD(pages_skipped); - for (scan = 0; scan < nr_to_scan && nr_taken < nr_to_scan && - !list_empty(src); scan++) { + scan = 0; + for (total_scan = 0; + scan < nr_to_scan && nr_taken < nr_to_scan && !list_empty(src); + total_scan++) { struct page *page; page = lru_to_page(src); @@ -1487,6 +1489,13 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, continue; } + /* + * Do not count skipped pages because that makes the function + * return with no isolated pages if the LRU mostly contains + * ineligible pages. This causes the VM to not reclaim any + * pages, triggering a premature OOM. + */ + scan++; switch (__isolate_lru_page(page, mode)) { case 0: nr_pages = hpage_nr_pages(page); @@ -1524,9 +1533,9 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, skipped += nr_skipped[zid]; } } - *nr_scanned = scan; + *nr_scanned = total_scan; trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, - scan, skipped, nr_taken, mode, lru); + total_scan, skipped, nr_taken, mode, lru); update_lru_sizes(lruvec, lru, nr_zone_taken); return nr_taken; } diff --git a/mm/vmstat.c b/mm/vmstat.c index f5fa1bd1eb16..76f73670200a 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1359,8 +1359,6 @@ static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone) return zone == compare; } - /* The zone must be somewhere! */ - WARN_ON_ONCE(1); return false; } |