diff options
-rw-r--r-- | include/linux/mm.h | 18 | ||||
-rw-r--r-- | include/linux/mmzone.h | 5 | ||||
-rw-r--r-- | include/trace/events/huge_memory.h | 3 | ||||
-rw-r--r-- | init/main.c | 1 | ||||
-rw-r--r-- | mm/huge_memory.c | 38 | ||||
-rw-r--r-- | mm/memcontrol.c | 37 | ||||
-rw-r--r-- | mm/mempolicy.c | 10 | ||||
-rw-r--r-- | mm/page-writeback.c | 10 | ||||
-rw-r--r-- | mm/page_alloc.c | 61 | ||||
-rw-r--r-- | mm/shmem.c | 7 | ||||
-rw-r--r-- | mm/slab.c | 4 | ||||
-rw-r--r-- | mm/slab_common.c | 3 | ||||
-rw-r--r-- | mm/slub.c | 2 | ||||
-rw-r--r-- | mm/sparse.c | 9 | ||||
-rw-r--r-- | mm/vmalloc.c | 4 |
15 files changed, 102 insertions, 110 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index 80a9162b406c..cfaa8feecfe8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2658,14 +2658,26 @@ static inline bool want_init_on_free(void) !page_poisoning_enabled(); } -#ifdef CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT -DECLARE_STATIC_KEY_TRUE(_debug_pagealloc_enabled); +#ifdef CONFIG_DEBUG_PAGEALLOC +extern void init_debug_pagealloc(void); #else -DECLARE_STATIC_KEY_FALSE(_debug_pagealloc_enabled); +static inline void init_debug_pagealloc(void) {} #endif +extern bool _debug_pagealloc_enabled_early; +DECLARE_STATIC_KEY_FALSE(_debug_pagealloc_enabled); static inline bool debug_pagealloc_enabled(void) { + return IS_ENABLED(CONFIG_DEBUG_PAGEALLOC) && + _debug_pagealloc_enabled_early; +} + +/* + * For use in fast paths after init_debug_pagealloc() has run, or when a + * false negative result is not harmful when called too early. + */ +static inline bool debug_pagealloc_enabled_static(void) +{ if (!IS_ENABLED(CONFIG_DEBUG_PAGEALLOC)) return false; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 89d8ff06c9ce..5334ad8fc7bd 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -215,9 +215,8 @@ enum node_stat_item { NR_INACTIVE_FILE, /* " " " " " */ NR_ACTIVE_FILE, /* " " " " " */ NR_UNEVICTABLE, /* " " " " " */ - NR_SLAB_RECLAIMABLE, /* Please do not reorder this item */ - NR_SLAB_UNRECLAIMABLE, /* and this one without looking at - * memcg_flush_percpu_vmstats() first. */ + NR_SLAB_RECLAIMABLE, + NR_SLAB_UNRECLAIMABLE, NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ WORKINGSET_NODES, diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h index dd4db334bd63..d82a0f4e824d 100644 --- a/include/trace/events/huge_memory.h +++ b/include/trace/events/huge_memory.h @@ -31,7 +31,8 @@ EM( SCAN_ALLOC_HUGE_PAGE_FAIL, "alloc_huge_page_failed") \ EM( SCAN_CGROUP_CHARGE_FAIL, "ccgroup_charge_failed") \ EM( SCAN_EXCEED_SWAP_PTE, "exceed_swap_pte") \ - EMe(SCAN_TRUNCATED, "truncated") \ + EM( SCAN_TRUNCATED, "truncated") \ + EMe(SCAN_PAGE_HAS_PRIVATE, "page_has_private") \ #undef EM #undef EMe diff --git a/init/main.c b/init/main.c index 2cd736059416..da1bc0b60a7d 100644 --- a/init/main.c +++ b/init/main.c @@ -553,6 +553,7 @@ static void __init mm_init(void) * bigger than MAX_ORDER unless SPARSEMEM. */ page_ext_init_flatmem(); + init_debug_pagealloc(); report_meminit(); mem_init(); kmem_cache_init(); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 41a0fbddc96b..a88093213674 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -527,13 +527,13 @@ void prep_transhuge_page(struct page *page) set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR); } -static unsigned long __thp_get_unmapped_area(struct file *filp, unsigned long len, +static unsigned long __thp_get_unmapped_area(struct file *filp, + unsigned long addr, unsigned long len, loff_t off, unsigned long flags, unsigned long size) { - unsigned long addr; loff_t off_end = off + len; loff_t off_align = round_up(off, size); - unsigned long len_pad; + unsigned long len_pad, ret; if (off_end <= off_align || (off_end - off_align) < size) return 0; @@ -542,30 +542,40 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, unsigned long le if (len_pad < len || (off + len_pad) < off) return 0; - addr = current->mm->get_unmapped_area(filp, 0, len_pad, + ret = current->mm->get_unmapped_area(filp, addr, len_pad, off >> PAGE_SHIFT, flags); - if (IS_ERR_VALUE(addr)) + + /* + * The failure might be due to length padding. The caller will retry + * without the padding. + */ + if (IS_ERR_VALUE(ret)) return 0; - addr += (off - addr) & (size - 1); - return addr; + /* + * Do not try to align to THP boundary if allocation at the address + * hint succeeds. + */ + if (ret == addr) + return addr; + + ret += (off - ret) & (size - 1); + return ret; } unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { + unsigned long ret; loff_t off = (loff_t)pgoff << PAGE_SHIFT; - if (addr) - goto out; if (!IS_DAX(filp->f_mapping->host) || !IS_ENABLED(CONFIG_FS_DAX_PMD)) goto out; - addr = __thp_get_unmapped_area(filp, len, off, flags, PMD_SIZE); - if (addr) - return addr; - - out: + ret = __thp_get_unmapped_area(filp, addr, len, off, flags, PMD_SIZE); + if (ret) + return ret; +out: return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags); } EXPORT_SYMBOL_GPL(thp_get_unmapped_area); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c5b5f74cfd4d..6c83cf4ed970 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3287,49 +3287,34 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, } } -static void memcg_flush_percpu_vmstats(struct mem_cgroup *memcg, bool slab_only) +static void memcg_flush_percpu_vmstats(struct mem_cgroup *memcg) { - unsigned long stat[MEMCG_NR_STAT]; + unsigned long stat[MEMCG_NR_STAT] = {0}; struct mem_cgroup *mi; int node, cpu, i; - int min_idx, max_idx; - - if (slab_only) { - min_idx = NR_SLAB_RECLAIMABLE; - max_idx = NR_SLAB_UNRECLAIMABLE; - } else { - min_idx = 0; - max_idx = MEMCG_NR_STAT; - } - - for (i = min_idx; i < max_idx; i++) - stat[i] = 0; for_each_online_cpu(cpu) - for (i = min_idx; i < max_idx; i++) + for (i = 0; i < MEMCG_NR_STAT; i++) stat[i] += per_cpu(memcg->vmstats_percpu->stat[i], cpu); for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) - for (i = min_idx; i < max_idx; i++) + for (i = 0; i < MEMCG_NR_STAT; i++) atomic_long_add(stat[i], &mi->vmstats[i]); - if (!slab_only) - max_idx = NR_VM_NODE_STAT_ITEMS; - for_each_node(node) { struct mem_cgroup_per_node *pn = memcg->nodeinfo[node]; struct mem_cgroup_per_node *pi; - for (i = min_idx; i < max_idx; i++) + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) stat[i] = 0; for_each_online_cpu(cpu) - for (i = min_idx; i < max_idx; i++) + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) stat[i] += per_cpu( pn->lruvec_stat_cpu->count[i], cpu); for (pi = pn; pi; pi = parent_nodeinfo(pi, node)) - for (i = min_idx; i < max_idx; i++) + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) atomic_long_add(stat[i], &pi->lruvec_stat[i]); } } @@ -3403,13 +3388,9 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg) parent = root_mem_cgroup; /* - * Deactivate and reparent kmem_caches. Then flush percpu - * slab statistics to have precise values at the parent and - * all ancestor levels. It's required to keep slab stats - * accurate after the reparenting of kmem_caches. + * Deactivate and reparent kmem_caches. */ memcg_deactivate_kmem_caches(memcg, parent); - memcg_flush_percpu_vmstats(memcg, true); kmemcg_id = memcg->kmemcg_id; BUG_ON(kmemcg_id < 0); @@ -4913,7 +4894,7 @@ static void mem_cgroup_free(struct mem_cgroup *memcg) * Flush percpu vmstats and vmevents to guarantee the value correctness * on parent's and all ancestor levels. */ - memcg_flush_percpu_vmstats(memcg, false); + memcg_flush_percpu_vmstats(memcg); memcg_flush_percpu_vmevents(memcg); __mem_cgroup_free(memcg); } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 067cf7d3daf5..b2920ae87a61 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2148,18 +2148,22 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, nmask = policy_nodemask(gfp, pol); if (!nmask || node_isset(hpage_node, *nmask)) { mpol_cond_put(pol); + /* + * First, try to allocate THP only on local node, but + * don't reclaim unnecessarily, just compact. + */ page = __alloc_pages_node(hpage_node, - gfp | __GFP_THISNODE, order); + gfp | __GFP_THISNODE | __GFP_NORETRY, order); /* * If hugepage allocations are configured to always * synchronous compact or the vma has been madvised * to prefer hugepage backing, retry allowing remote - * memory as well. + * memory with both reclaim and compact as well. */ if (!page && (gfp & __GFP_DIRECT_RECLAIM)) page = __alloc_pages_node(hpage_node, - gfp | __GFP_NORETRY, order); + gfp, order); goto out; } diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 50055d2e4ea8..2caf780a42e7 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -201,11 +201,11 @@ static void wb_min_max_ratio(struct bdi_writeback *wb, if (this_bw < tot_bw) { if (min) { min *= this_bw; - do_div(min, tot_bw); + min = div64_ul(min, tot_bw); } if (max < 100) { max *= this_bw; - do_div(max, tot_bw); + max = div64_ul(max, tot_bw); } } @@ -766,7 +766,7 @@ static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc) struct wb_domain *dom = dtc_dom(dtc); unsigned long thresh = dtc->thresh; u64 wb_thresh; - long numerator, denominator; + unsigned long numerator, denominator; unsigned long wb_min_ratio, wb_max_ratio; /* @@ -777,7 +777,7 @@ static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc) wb_thresh = (thresh * (100 - bdi_min_ratio)) / 100; wb_thresh *= numerator; - do_div(wb_thresh, denominator); + wb_thresh = div64_ul(wb_thresh, denominator); wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio); @@ -1102,7 +1102,7 @@ static void wb_update_write_bandwidth(struct bdi_writeback *wb, bw = written - min(written, wb->written_stamp); bw *= HZ; if (unlikely(elapsed > period)) { - do_div(bw, elapsed); + bw = div64_ul(bw, elapsed); avg = bw; goto out; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4785a8a2040e..d047bf7d8fd4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -694,34 +694,27 @@ void prep_compound_page(struct page *page, unsigned int order) #ifdef CONFIG_DEBUG_PAGEALLOC unsigned int _debug_guardpage_minorder; -#ifdef CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT -DEFINE_STATIC_KEY_TRUE(_debug_pagealloc_enabled); -#else +bool _debug_pagealloc_enabled_early __read_mostly + = IS_ENABLED(CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT); +EXPORT_SYMBOL(_debug_pagealloc_enabled_early); DEFINE_STATIC_KEY_FALSE(_debug_pagealloc_enabled); -#endif EXPORT_SYMBOL(_debug_pagealloc_enabled); DEFINE_STATIC_KEY_FALSE(_debug_guardpage_enabled); static int __init early_debug_pagealloc(char *buf) { - bool enable = false; - - if (kstrtobool(buf, &enable)) - return -EINVAL; - - if (enable) - static_branch_enable(&_debug_pagealloc_enabled); - - return 0; + return kstrtobool(buf, &_debug_pagealloc_enabled_early); } early_param("debug_pagealloc", early_debug_pagealloc); -static void init_debug_guardpage(void) +void init_debug_pagealloc(void) { if (!debug_pagealloc_enabled()) return; + static_branch_enable(&_debug_pagealloc_enabled); + if (!debug_guardpage_minorder()) return; @@ -1186,7 +1179,7 @@ static __always_inline bool free_pages_prepare(struct page *page, */ arch_free_page(page, order); - if (debug_pagealloc_enabled()) + if (debug_pagealloc_enabled_static()) kernel_map_pages(page, 1 << order, 0); kasan_free_nondeferred_pages(page, order); @@ -1207,7 +1200,7 @@ static bool free_pcp_prepare(struct page *page) static bool bulkfree_pcp_prepare(struct page *page) { - if (debug_pagealloc_enabled()) + if (debug_pagealloc_enabled_static()) return free_pages_check(page); else return false; @@ -1221,7 +1214,7 @@ static bool bulkfree_pcp_prepare(struct page *page) */ static bool free_pcp_prepare(struct page *page) { - if (debug_pagealloc_enabled()) + if (debug_pagealloc_enabled_static()) return free_pages_prepare(page, 0, true); else return free_pages_prepare(page, 0, false); @@ -1973,10 +1966,6 @@ void __init page_alloc_init_late(void) for_each_populated_zone(zone) set_zone_contiguous(zone); - -#ifdef CONFIG_DEBUG_PAGEALLOC - init_debug_guardpage(); -#endif } #ifdef CONFIG_CMA @@ -2106,7 +2095,7 @@ static inline bool free_pages_prezeroed(void) */ static inline bool check_pcp_refill(struct page *page) { - if (debug_pagealloc_enabled()) + if (debug_pagealloc_enabled_static()) return check_new_page(page); else return false; @@ -2128,7 +2117,7 @@ static inline bool check_pcp_refill(struct page *page) } static inline bool check_new_pcp(struct page *page) { - if (debug_pagealloc_enabled()) + if (debug_pagealloc_enabled_static()) return check_new_page(page); else return false; @@ -2155,7 +2144,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order, set_page_refcounted(page); arch_alloc_page(page, order); - if (debug_pagealloc_enabled()) + if (debug_pagealloc_enabled_static()) kernel_map_pages(page, 1 << order, 1); kasan_alloc_pages(page, order); kernel_poison_pages(page, 1 << order, 1); @@ -4476,8 +4465,11 @@ retry_cpuset: if (page) goto got_pg; - if (order >= pageblock_order && (gfp_mask & __GFP_IO) && - !(gfp_mask & __GFP_RETRY_MAYFAIL)) { + /* + * Checks for costly allocations with __GFP_NORETRY, which + * includes some THP page fault allocations + */ + if (costly_order && (gfp_mask & __GFP_NORETRY)) { /* * If allocating entire pageblock(s) and compaction * failed because all zones are below low watermarks @@ -4498,23 +4490,6 @@ retry_cpuset: if (compact_result == COMPACT_SKIPPED || compact_result == COMPACT_DEFERRED) goto nopage; - } - - /* - * Checks for costly allocations with __GFP_NORETRY, which - * includes THP page fault allocations - */ - if (costly_order && (gfp_mask & __GFP_NORETRY)) { - /* - * If compaction is deferred for high-order allocations, - * it is because sync compaction recently failed. If - * this is the case and the caller requested a THP - * allocation, we do not want to heavily disrupt the - * system, so we fail the allocation instead of entering - * direct reclaim. - */ - if (compact_result == COMPACT_DEFERRED) - goto nopage; /* * Looks like reclaim/compaction is worth trying, but diff --git a/mm/shmem.c b/mm/shmem.c index 165fa6332993..8793e8cc1a48 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2107,9 +2107,10 @@ unsigned long shmem_get_unmapped_area(struct file *file, /* * Our priority is to support MAP_SHARED mapped hugely; * and support MAP_PRIVATE mapped hugely too, until it is COWed. - * But if caller specified an address hint, respect that as before. + * But if caller specified an address hint and we allocated area there + * successfully, respect that as before. */ - if (uaddr) + if (uaddr == addr) return addr; if (shmem_huge != SHMEM_HUGE_FORCE) { @@ -2143,7 +2144,7 @@ unsigned long shmem_get_unmapped_area(struct file *file, if (inflated_len < len) return addr; - inflated_addr = get_area(NULL, 0, inflated_len, 0, flags); + inflated_addr = get_area(NULL, uaddr, inflated_len, 0, flags); if (IS_ERR_VALUE(inflated_addr)) return addr; if (inflated_addr & ~PAGE_MASK) diff --git a/mm/slab.c b/mm/slab.c index f1e1840af533..a89633603b2d 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1416,7 +1416,7 @@ static void kmem_rcu_free(struct rcu_head *head) #if DEBUG static bool is_debug_pagealloc_cache(struct kmem_cache *cachep) { - if (debug_pagealloc_enabled() && OFF_SLAB(cachep) && + if (debug_pagealloc_enabled_static() && OFF_SLAB(cachep) && (cachep->size % PAGE_SIZE) == 0) return true; @@ -2008,7 +2008,7 @@ int __kmem_cache_create(struct kmem_cache *cachep, slab_flags_t flags) * to check size >= 256. It guarantees that all necessary small * sized slab is initialized in current slab initialization sequence. */ - if (debug_pagealloc_enabled() && (flags & SLAB_POISON) && + if (debug_pagealloc_enabled_static() && (flags & SLAB_POISON) && size >= 256 && cachep->object_size > cache_line_size()) { if (size < PAGE_SIZE || size % PAGE_SIZE == 0) { size_t tmp_size = ALIGN(size, PAGE_SIZE); diff --git a/mm/slab_common.c b/mm/slab_common.c index f0ab6d4ceb4c..0d95ddea13b0 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -903,7 +903,8 @@ static void flush_memcg_workqueue(struct kmem_cache *s) * deactivates the memcg kmem_caches through workqueue. Make sure all * previous workitems on workqueue are processed. */ - flush_workqueue(memcg_kmem_cache_wq); + if (likely(memcg_kmem_cache_wq)) + flush_workqueue(memcg_kmem_cache_wq); /* * If we're racing with children kmem_cache deactivation, it might diff --git a/mm/slub.c b/mm/slub.c index d11389710b12..8eafccf75940 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -288,7 +288,7 @@ static inline void *get_freepointer_safe(struct kmem_cache *s, void *object) unsigned long freepointer_addr; void *p; - if (!debug_pagealloc_enabled()) + if (!debug_pagealloc_enabled_static()) return get_freepointer(s, object); freepointer_addr = (unsigned long)object + s->offset; diff --git a/mm/sparse.c b/mm/sparse.c index b20ab7cdac86..3822ecbd8a1f 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -777,7 +777,14 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages, if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION)) { unsigned long section_nr = pfn_to_section_nr(pfn); - if (!section_is_early) { + /* + * When removing an early section, the usage map is kept (as the + * usage maps of other sections fall into the same page). It + * will be re-used when re-adding the section - which is then no + * longer an early section. If the usage map is PageReserved, it + * was allocated during boot. + */ + if (!PageReserved(virt_to_page(ms->usage))) { kfree(ms->usage); ms->usage = NULL; } diff --git a/mm/vmalloc.c b/mm/vmalloc.c index e9681dc4aa75..b29ad17edcf5 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1383,7 +1383,7 @@ static void free_unmap_vmap_area(struct vmap_area *va) { flush_cache_vunmap(va->va_start, va->va_end); unmap_vmap_area(va); - if (debug_pagealloc_enabled()) + if (debug_pagealloc_enabled_static()) flush_tlb_kernel_range(va->va_start, va->va_end); free_vmap_area_noflush(va); @@ -1681,7 +1681,7 @@ static void vb_free(const void *addr, unsigned long size) vunmap_page_range((unsigned long)addr, (unsigned long)addr + size); - if (debug_pagealloc_enabled()) + if (debug_pagealloc_enabled_static()) flush_tlb_kernel_range((unsigned long)addr, (unsigned long)addr + size); |