diff options
author | David S. Miller <davem@davemloft.net> | 2019-04-25 23:52:29 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2019-04-25 23:52:29 -0400 |
commit | 8b4483658364f05b2e32845c8f445cdfd9452286 (patch) | |
tree | 4734ef83378b1e90475ec4776ac72dfe16a7249b /mm | |
parent | c049d56eb219661c9ae48d596c3e633973f89d1f (diff) | |
parent | cd8dead0c39457e58ec1d36db93aedca811d48f1 (diff) | |
download | linux-8b4483658364f05b2e32845c8f445cdfd9452286.tar.bz2 |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Two easy cases of overlapping changes.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/kmemleak.c | 2 | ||||
-rw-r--r-- | mm/mmap.c | 7 | ||||
-rw-r--r-- | mm/page_alloc.c | 30 | ||||
-rw-r--r-- | mm/percpu.c | 8 | ||||
-rw-r--r-- | mm/shmem.c | 58 | ||||
-rw-r--r-- | mm/slab.c | 1 | ||||
-rw-r--r-- | mm/swapfile.c | 32 | ||||
-rw-r--r-- | mm/vmscan.c | 29 | ||||
-rw-r--r-- | mm/vmstat.c | 5 |
9 files changed, 83 insertions, 89 deletions
diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 6c318f5ac234..2e435b8142e5 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1401,6 +1401,7 @@ static void scan_block(void *_start, void *_end, /* * Scan a large memory block in MAX_SCAN_SIZE chunks to reduce the latency. */ +#ifdef CONFIG_SMP static void scan_large_block(void *start, void *end) { void *next; @@ -1412,6 +1413,7 @@ static void scan_large_block(void *start, void *end) cond_resched(); } } +#endif /* * Scan a memory block corresponding to a kmemleak_object. A condition is diff --git a/mm/mmap.c b/mm/mmap.c index 41eb48d9b527..bd7b9f293b39 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -45,6 +45,7 @@ #include <linux/moduleparam.h> #include <linux/pkeys.h> #include <linux/oom.h> +#include <linux/sched/mm.h> #include <linux/uaccess.h> #include <asm/cacheflush.h> @@ -2525,7 +2526,8 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr) vma = find_vma_prev(mm, addr, &prev); if (vma && (vma->vm_start <= addr)) return vma; - if (!prev || expand_stack(prev, addr)) + /* don't alter vm_end if the coredump is running */ + if (!prev || !mmget_still_valid(mm) || expand_stack(prev, addr)) return NULL; if (prev->vm_flags & VM_LOCKED) populate_vma_page_range(prev, addr, prev->vm_end, NULL); @@ -2551,6 +2553,9 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr) return vma; if (!(vma->vm_flags & VM_GROWSDOWN)) return NULL; + /* don't alter vm_start if the coredump is running */ + if (!mmget_still_valid(mm)) + return NULL; start = vma->vm_start; if (expand_stack(vma, addr)) return NULL; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d96ca5bc555b..c6ce20aaf80b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -8005,7 +8005,10 @@ void *__init alloc_large_system_hash(const char *tablename, bool has_unmovable_pages(struct zone *zone, struct page *page, int count, int migratetype, int flags) { - unsigned long pfn, iter, found; + unsigned long found; + unsigned long iter = 0; + unsigned long pfn = page_to_pfn(page); + const char *reason = "unmovable page"; /* * TODO we could make this much more efficient by not checking every @@ -8015,17 +8018,20 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, * can still lead to having bootmem allocations in zone_movable. */ - /* - * CMA allocations (alloc_contig_range) really need to mark isolate - * CMA pageblocks even when they are not movable in fact so consider - * them movable here. - */ - if (is_migrate_cma(migratetype) && - is_migrate_cma(get_pageblock_migratetype(page))) - return false; + if (is_migrate_cma_page(page)) { + /* + * CMA allocations (alloc_contig_range) really need to mark + * isolate CMA pageblocks even when they are not movable in fact + * so consider them movable here. + */ + if (is_migrate_cma(migratetype)) + return false; + + reason = "CMA page"; + goto unmovable; + } - pfn = page_to_pfn(page); - for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) { + for (found = 0; iter < pageblock_nr_pages; iter++) { unsigned long check = pfn + iter; if (!pfn_valid_within(check)) @@ -8105,7 +8111,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, unmovable: WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE); if (flags & REPORT_FAILURE) - dump_page(pfn_to_page(pfn+iter), "unmovable page"); + dump_page(pfn_to_page(pfn + iter), reason); return true; } diff --git a/mm/percpu.c b/mm/percpu.c index 2e6fc8d552c9..68dd2e7e73b5 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -2567,8 +2567,8 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, ai->groups[group].base_offset = areas[group] - base; } - pr_info("Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n", - PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size, + pr_info("Embedded %zu pages/cpu s%zu r%zu d%zu u%zu\n", + PFN_DOWN(size_sum), ai->static_size, ai->reserved_size, ai->dyn_size, ai->unit_size); rc = pcpu_setup_first_chunk(ai, base); @@ -2692,8 +2692,8 @@ int __init pcpu_page_first_chunk(size_t reserved_size, } /* we're ready, commit */ - pr_info("%d %s pages/cpu @%p s%zu r%zu d%zu\n", - unit_pages, psize_str, vm.addr, ai->static_size, + pr_info("%d %s pages/cpu s%zu r%zu d%zu\n", + unit_pages, psize_str, ai->static_size, ai->reserved_size, ai->dyn_size); rc = pcpu_setup_first_chunk(ai, vm.addr); diff --git a/mm/shmem.c b/mm/shmem.c index b3db3779a30a..2275a0ff7c30 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1081,9 +1081,14 @@ static void shmem_evict_inode(struct inode *inode) } spin_unlock(&sbinfo->shrinklist_lock); } - if (!list_empty(&info->swaplist)) { + while (!list_empty(&info->swaplist)) { + /* Wait while shmem_unuse() is scanning this inode... */ + wait_var_event(&info->stop_eviction, + !atomic_read(&info->stop_eviction)); mutex_lock(&shmem_swaplist_mutex); - list_del_init(&info->swaplist); + /* ...but beware of the race if we peeked too early */ + if (!atomic_read(&info->stop_eviction)) + list_del_init(&info->swaplist); mutex_unlock(&shmem_swaplist_mutex); } } @@ -1099,10 +1104,11 @@ extern struct swap_info_struct *swap_info[]; static int shmem_find_swap_entries(struct address_space *mapping, pgoff_t start, unsigned int nr_entries, struct page **entries, pgoff_t *indices, - bool frontswap) + unsigned int type, bool frontswap) { XA_STATE(xas, &mapping->i_pages, start); struct page *page; + swp_entry_t entry; unsigned int ret = 0; if (!nr_entries) @@ -1116,13 +1122,12 @@ static int shmem_find_swap_entries(struct address_space *mapping, if (!xa_is_value(page)) continue; - if (frontswap) { - swp_entry_t entry = radix_to_swp_entry(page); - - if (!frontswap_test(swap_info[swp_type(entry)], - swp_offset(entry))) - continue; - } + entry = radix_to_swp_entry(page); + if (swp_type(entry) != type) + continue; + if (frontswap && + !frontswap_test(swap_info[type], swp_offset(entry))) + continue; indices[ret] = xas.xa_index; entries[ret] = page; @@ -1194,7 +1199,7 @@ static int shmem_unuse_inode(struct inode *inode, unsigned int type, pvec.nr = shmem_find_swap_entries(mapping, start, nr_entries, pvec.pages, indices, - frontswap); + type, frontswap); if (pvec.nr == 0) { ret = 0; break; @@ -1227,36 +1232,27 @@ int shmem_unuse(unsigned int type, bool frontswap, unsigned long *fs_pages_to_unuse) { struct shmem_inode_info *info, *next; - struct inode *inode; - struct inode *prev_inode = NULL; int error = 0; if (list_empty(&shmem_swaplist)) return 0; mutex_lock(&shmem_swaplist_mutex); - - /* - * The extra refcount on the inode is necessary to safely dereference - * p->next after re-acquiring the lock. New shmem inodes with swap - * get added to the end of the list and we will scan them all. - */ list_for_each_entry_safe(info, next, &shmem_swaplist, swaplist) { if (!info->swapped) { list_del_init(&info->swaplist); continue; } - - inode = igrab(&info->vfs_inode); - if (!inode) - continue; - + /* + * Drop the swaplist mutex while searching the inode for swap; + * but before doing so, make sure shmem_evict_inode() will not + * remove placeholder inode from swaplist, nor let it be freed + * (igrab() would protect from unlink, but not from unmount). + */ + atomic_inc(&info->stop_eviction); mutex_unlock(&shmem_swaplist_mutex); - if (prev_inode) - iput(prev_inode); - prev_inode = inode; - error = shmem_unuse_inode(inode, type, frontswap, + error = shmem_unuse_inode(&info->vfs_inode, type, frontswap, fs_pages_to_unuse); cond_resched(); @@ -1264,14 +1260,13 @@ int shmem_unuse(unsigned int type, bool frontswap, next = list_next_entry(info, swaplist); if (!info->swapped) list_del_init(&info->swaplist); + if (atomic_dec_and_test(&info->stop_eviction)) + wake_up_var(&info->stop_eviction); if (error) break; } mutex_unlock(&shmem_swaplist_mutex); - if (prev_inode) - iput(prev_inode); - return error; } @@ -2238,6 +2233,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode info = SHMEM_I(inode); memset(info, 0, (char *)inode - (char *)info); spin_lock_init(&info->lock); + atomic_set(&info->stop_eviction, 0); info->seals = F_SEAL_SEAL; info->flags = flags & VM_NORESERVE; INIT_LIST_HEAD(&info->shrinklist); diff --git a/mm/slab.c b/mm/slab.c index 47a380a486ee..9142ee992493 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2374,7 +2374,6 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep, /* Slab management obj is off-slab. */ freelist = kmem_cache_alloc_node(cachep->freelist_cache, local_flags, nodeid); - freelist = kasan_reset_tag(freelist); if (!freelist) return NULL; } else { diff --git a/mm/swapfile.c b/mm/swapfile.c index 2b8d9c3fbb47..cf63b5f01adf 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2023,7 +2023,6 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si, * If the boolean frontswap is true, only unuse pages_to_unuse pages; * pages_to_unuse==0 means all pages; ignored if frontswap is false */ -#define SWAP_UNUSE_MAX_TRIES 3 int try_to_unuse(unsigned int type, bool frontswap, unsigned long pages_to_unuse) { @@ -2035,7 +2034,6 @@ int try_to_unuse(unsigned int type, bool frontswap, struct page *page; swp_entry_t entry; unsigned int i; - int retries = 0; if (!si->inuse_pages) return 0; @@ -2053,11 +2051,9 @@ retry: spin_lock(&mmlist_lock); p = &init_mm.mmlist; - while ((p = p->next) != &init_mm.mmlist) { - if (signal_pending(current)) { - retval = -EINTR; - break; - } + while (si->inuse_pages && + !signal_pending(current) && + (p = p->next) != &init_mm.mmlist) { mm = list_entry(p, struct mm_struct, mmlist); if (!mmget_not_zero(mm)) @@ -2084,7 +2080,9 @@ retry: mmput(prev_mm); i = 0; - while ((i = find_next_to_unuse(si, i, frontswap)) != 0) { + while (si->inuse_pages && + !signal_pending(current) && + (i = find_next_to_unuse(si, i, frontswap)) != 0) { entry = swp_entry(type, i); page = find_get_page(swap_address_space(entry), i); @@ -2117,14 +2115,18 @@ retry: * If yes, we would need to do retry the unuse logic again. * Under global memory pressure, swap entries can be reinserted back * into process space after the mmlist loop above passes over them. - * Its not worth continuosuly retrying to unuse the swap in this case. - * So we try SWAP_UNUSE_MAX_TRIES times. + * + * Limit the number of retries? No: when mmget_not_zero() above fails, + * that mm is likely to be freeing swap from exit_mmap(), which proceeds + * at its own independent pace; and even shmem_writepage() could have + * been preempted after get_swap_page(), temporarily hiding that swap. + * It's easy and robust (though cpu-intensive) just to keep retrying. */ - if (++retries >= SWAP_UNUSE_MAX_TRIES) - retval = -EBUSY; - else if (si->inuse_pages) - goto retry; - + if (si->inuse_pages) { + if (!signal_pending(current)) + goto retry; + retval = -EINTR; + } out: return (retval == FRONTSWAP_PAGES_UNUSED) ? 0 : retval; } diff --git a/mm/vmscan.c b/mm/vmscan.c index a5ad0b35ab8e..a815f73ee4d5 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2176,7 +2176,6 @@ static void shrink_active_list(unsigned long nr_to_scan, * 10TB 320 32GB */ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, - struct mem_cgroup *memcg, struct scan_control *sc, bool actual_reclaim) { enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE; @@ -2197,16 +2196,12 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx); active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx); - if (memcg) - refaults = memcg_page_state(memcg, WORKINGSET_ACTIVATE); - else - refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE); - /* * When refaults are being observed, it means a new workingset * is being established. Disable active list protection to get * rid of the stale workingset quickly. */ + refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE); if (file && actual_reclaim && lruvec->refaults != refaults) { inactive_ratio = 0; } else { @@ -2227,12 +2222,10 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, } static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, - struct lruvec *lruvec, struct mem_cgroup *memcg, - struct scan_control *sc) + struct lruvec *lruvec, struct scan_control *sc) { if (is_active_lru(lru)) { - if (inactive_list_is_low(lruvec, is_file_lru(lru), - memcg, sc, true)) + if (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true)) shrink_active_list(nr_to_scan, lruvec, sc, lru); return 0; } @@ -2332,7 +2325,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, * anonymous pages on the LRU in eligible zones. * Otherwise, the small LRU gets thrashed. */ - if (!inactive_list_is_low(lruvec, false, memcg, sc, false) && + if (!inactive_list_is_low(lruvec, false, sc, false) && lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, sc->reclaim_idx) >> sc->priority) { scan_balance = SCAN_ANON; @@ -2350,7 +2343,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, * lruvec even if it has plenty of old anonymous pages unless the * system is under heavy pressure. */ - if (!inactive_list_is_low(lruvec, true, memcg, sc, false) && + if (!inactive_list_is_low(lruvec, true, sc, false) && lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) { scan_balance = SCAN_FILE; goto out; @@ -2503,7 +2496,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc nr[lru] -= nr_to_scan; nr_reclaimed += shrink_list(lru, nr_to_scan, - lruvec, memcg, sc); + lruvec, sc); } } @@ -2570,7 +2563,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc * Even if we did not try to evict anon pages at all, we want to * rebalance the anon lru active/inactive ratio. */ - if (inactive_list_is_low(lruvec, false, memcg, sc, true)) + if (inactive_list_is_low(lruvec, false, sc, true)) shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON); } @@ -2969,12 +2962,8 @@ static void snapshot_refaults(struct mem_cgroup *root_memcg, pg_data_t *pgdat) unsigned long refaults; struct lruvec *lruvec; - if (memcg) - refaults = memcg_page_state(memcg, WORKINGSET_ACTIVATE); - else - refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE); - lruvec = mem_cgroup_lruvec(pgdat, memcg); + refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE); lruvec->refaults = refaults; } while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL))); } @@ -3339,7 +3328,7 @@ static void age_active_anon(struct pglist_data *pgdat, do { struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg); - if (inactive_list_is_low(lruvec, false, memcg, sc, true)) + if (inactive_list_is_low(lruvec, false, sc, true)) shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON); diff --git a/mm/vmstat.c b/mm/vmstat.c index 36b56f858f0f..a7d493366a65 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1274,13 +1274,8 @@ const char * const vmstat_text[] = { #endif #endif /* CONFIG_MEMORY_BALLOON */ #ifdef CONFIG_DEBUG_TLBFLUSH -#ifdef CONFIG_SMP "nr_tlb_remote_flush", "nr_tlb_remote_flush_received", -#else - "", /* nr_tlb_remote_flush */ - "", /* nr_tlb_remote_flush_received */ -#endif /* CONFIG_SMP */ "nr_tlb_local_flush_all", "nr_tlb_local_flush_one", #endif /* CONFIG_DEBUG_TLBFLUSH */ |