diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 255 |
1 files changed, 165 insertions, 90 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3b13d3914176..e515bfcf7f28 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/mm/page_alloc.c * @@ -49,7 +50,6 @@ #include <linux/backing-dev.h> #include <linux/fault-inject.h> #include <linux/page-isolation.h> -#include <linux/page_ext.h> #include <linux/debugobjects.h> #include <linux/kmemleak.h> #include <linux/compaction.h> @@ -135,6 +135,55 @@ unsigned long totalcma_pages __read_mostly; int percpu_pagelist_fraction; gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; +#ifdef CONFIG_INIT_ON_ALLOC_DEFAULT_ON +DEFINE_STATIC_KEY_TRUE(init_on_alloc); +#else +DEFINE_STATIC_KEY_FALSE(init_on_alloc); +#endif +EXPORT_SYMBOL(init_on_alloc); + +#ifdef CONFIG_INIT_ON_FREE_DEFAULT_ON +DEFINE_STATIC_KEY_TRUE(init_on_free); +#else +DEFINE_STATIC_KEY_FALSE(init_on_free); +#endif +EXPORT_SYMBOL(init_on_free); + +static int __init early_init_on_alloc(char *buf) +{ + int ret; + bool bool_result; + + if (!buf) + return -EINVAL; + ret = kstrtobool(buf, &bool_result); + if (bool_result && page_poisoning_enabled()) + pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, will take precedence over init_on_alloc\n"); + if (bool_result) + static_branch_enable(&init_on_alloc); + else + static_branch_disable(&init_on_alloc); + return ret; +} +early_param("init_on_alloc", early_init_on_alloc); + +static int __init early_init_on_free(char *buf) +{ + int ret; + bool bool_result; + + if (!buf) + return -EINVAL; + ret = kstrtobool(buf, &bool_result); + if (bool_result && page_poisoning_enabled()) + pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, will take precedence over init_on_free\n"); + if (bool_result) + static_branch_enable(&init_on_free); + else + static_branch_disable(&init_on_free); + return ret; +} +early_param("init_on_free", early_init_on_free); /* * A cached value of the page's pageblock's migratetype, used when the page is @@ -223,8 +272,6 @@ int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES] = { [ZONE_MOVABLE] = 0, }; -EXPORT_SYMBOL(totalram_pages); - static char * const zone_names[MAX_NR_ZONES] = { #ifdef CONFIG_ZONE_DMA "DMA", @@ -645,30 +692,29 @@ void prep_compound_page(struct page *page, unsigned int order) #ifdef CONFIG_DEBUG_PAGEALLOC unsigned int _debug_guardpage_minorder; -bool _debug_pagealloc_enabled __read_mostly - = IS_ENABLED(CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT); + +#ifdef CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT +DEFINE_STATIC_KEY_TRUE(_debug_pagealloc_enabled); +#else +DEFINE_STATIC_KEY_FALSE(_debug_pagealloc_enabled); +#endif EXPORT_SYMBOL(_debug_pagealloc_enabled); -bool _debug_guardpage_enabled __read_mostly; + +DEFINE_STATIC_KEY_FALSE(_debug_guardpage_enabled); static int __init early_debug_pagealloc(char *buf) { - if (!buf) - return -EINVAL; - return kstrtobool(buf, &_debug_pagealloc_enabled); -} -early_param("debug_pagealloc", early_debug_pagealloc); + bool enable = false; -static bool need_debug_guardpage(void) -{ - /* If we don't use debug_pagealloc, we don't need guard page */ - if (!debug_pagealloc_enabled()) - return false; + if (kstrtobool(buf, &enable)) + return -EINVAL; - if (!debug_guardpage_minorder()) - return false; + if (enable) + static_branch_enable(&_debug_pagealloc_enabled); - return true; + return 0; } +early_param("debug_pagealloc", early_debug_pagealloc); static void init_debug_guardpage(void) { @@ -678,14 +724,9 @@ static void init_debug_guardpage(void) if (!debug_guardpage_minorder()) return; - _debug_guardpage_enabled = true; + static_branch_enable(&_debug_guardpage_enabled); } -struct page_ext_operations debug_guardpage_ops = { - .need = need_debug_guardpage, - .init = init_debug_guardpage, -}; - static int __init debug_guardpage_minorder_setup(char *buf) { unsigned long res; @@ -703,20 +744,13 @@ early_param("debug_guardpage_minorder", debug_guardpage_minorder_setup); static inline bool set_page_guard(struct zone *zone, struct page *page, unsigned int order, int migratetype) { - struct page_ext *page_ext; - if (!debug_guardpage_enabled()) return false; if (order >= debug_guardpage_minorder()) return false; - page_ext = lookup_page_ext(page); - if (unlikely(!page_ext)) - return false; - - __set_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags); - + __SetPageGuard(page); INIT_LIST_HEAD(&page->lru); set_page_private(page, order); /* Guard pages are not available for any usage */ @@ -728,23 +762,16 @@ static inline bool set_page_guard(struct zone *zone, struct page *page, static inline void clear_page_guard(struct zone *zone, struct page *page, unsigned int order, int migratetype) { - struct page_ext *page_ext; - if (!debug_guardpage_enabled()) return; - page_ext = lookup_page_ext(page); - if (unlikely(!page_ext)) - return; - - __clear_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags); + __ClearPageGuard(page); set_page_private(page, 0); if (!is_migrate_isolate(migratetype)) __mod_zone_freepage_state(zone, (1 << order), migratetype); } #else -struct page_ext_operations debug_guardpage_ops; static inline bool set_page_guard(struct zone *zone, struct page *page, unsigned int order, int migratetype) { return false; } static inline void clear_page_guard(struct zone *zone, struct page *page, @@ -1089,6 +1116,14 @@ out: return ret; } +static void kernel_init_free_pages(struct page *page, int numpages) +{ + int i; + + for (i = 0; i < numpages; i++) + clear_highpage(page + i); +} + static __always_inline bool free_pages_prepare(struct page *page, unsigned int order, bool check_free) { @@ -1140,6 +1175,9 @@ static __always_inline bool free_pages_prepare(struct page *page, PAGE_SIZE << order); } arch_free_page(page, order); + if (want_init_on_free()) + kernel_init_free_pages(page, 1 << order); + kernel_poison_pages(page, 1 << order, 0); if (debug_pagealloc_enabled()) kernel_map_pages(page, 1 << order, 0); @@ -1150,19 +1188,36 @@ static __always_inline bool free_pages_prepare(struct page *page, } #ifdef CONFIG_DEBUG_VM -static inline bool free_pcp_prepare(struct page *page) +/* + * With DEBUG_VM enabled, order-0 pages are checked immediately when being freed + * to pcp lists. With debug_pagealloc also enabled, they are also rechecked when + * moved from pcp lists to free lists. + */ +static bool free_pcp_prepare(struct page *page) { return free_pages_prepare(page, 0, true); } -static inline bool bulkfree_pcp_prepare(struct page *page) +static bool bulkfree_pcp_prepare(struct page *page) { - return false; + if (debug_pagealloc_enabled()) + return free_pages_check(page); + else + return false; } #else +/* + * With DEBUG_VM disabled, order-0 pages being freed are checked only when + * moving from pcp lists to free list in order to reduce overhead. With + * debug_pagealloc enabled, they are checked also immediately when being freed + * to the pcp lists. + */ static bool free_pcp_prepare(struct page *page) { - return free_pages_prepare(page, 0, false); + if (debug_pagealloc_enabled()) + return free_pages_prepare(page, 0, true); + else + return free_pages_prepare(page, 0, false); } static bool bulkfree_pcp_prepare(struct page *page) @@ -1825,7 +1880,8 @@ deferred_grow_zone(struct zone *zone, unsigned int order) first_deferred_pfn)) { pgdat->first_deferred_pfn = ULONG_MAX; pgdat_resize_unlock(pgdat, &flags); - return true; + /* Retry only once. */ + return first_deferred_pfn != ULONG_MAX; } /* @@ -1902,6 +1958,10 @@ void __init page_alloc_init_late(void) for_each_populated_zone(zone) set_zone_contiguous(zone); + +#ifdef CONFIG_DEBUG_PAGEALLOC + init_debug_guardpage(); +#endif } #ifdef CONFIG_CMA @@ -2019,28 +2079,44 @@ static inline int check_new_page(struct page *page) static inline bool free_pages_prezeroed(void) { - return IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) && - page_poisoning_enabled(); + return (IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) && + page_poisoning_enabled()) || want_init_on_free(); } #ifdef CONFIG_DEBUG_VM -static bool check_pcp_refill(struct page *page) +/* + * With DEBUG_VM enabled, order-0 pages are checked for expected state when + * being allocated from pcp lists. With debug_pagealloc also enabled, they are + * also checked when pcp lists are refilled from the free lists. + */ +static inline bool check_pcp_refill(struct page *page) { - return false; + if (debug_pagealloc_enabled()) + return check_new_page(page); + else + return false; } -static bool check_new_pcp(struct page *page) +static inline bool check_new_pcp(struct page *page) { return check_new_page(page); } #else -static bool check_pcp_refill(struct page *page) +/* + * With DEBUG_VM disabled, free order-0 pages are checked for expected state + * when pcp lists are being refilled from the free lists. With debug_pagealloc + * enabled, they are also checked when being allocated from the pcp lists. + */ +static inline bool check_pcp_refill(struct page *page) { return check_new_page(page); } -static bool check_new_pcp(struct page *page) +static inline bool check_new_pcp(struct page *page) { - return false; + if (debug_pagealloc_enabled()) + return check_new_page(page); + else + return false; } #endif /* CONFIG_DEBUG_VM */ @@ -2074,13 +2150,10 @@ inline void post_alloc_hook(struct page *page, unsigned int order, static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags, unsigned int alloc_flags) { - int i; - post_alloc_hook(page, order, gfp_flags); - if (!free_pages_prezeroed() && (gfp_flags & __GFP_ZERO)) - for (i = 0; i < (1 << order); i++) - clear_highpage(page + i); + if (!free_pages_prezeroed() && want_init_on_alloc(gfp_flags)) + kernel_init_free_pages(page, 1 << order); if (order && (gfp_flags & __GFP_COMP)) prep_compound_page(page, order); @@ -4029,7 +4102,6 @@ static int __perform_reclaim(gfp_t gfp_mask, unsigned int order, const struct alloc_context *ac) { - struct reclaim_state reclaim_state; int progress; unsigned int noreclaim_flag; unsigned long pflags; @@ -4041,13 +4113,10 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, psi_memstall_enter(&pflags); fs_reclaim_acquire(gfp_mask); noreclaim_flag = memalloc_noreclaim_save(); - reclaim_state.reclaimed_slab = 0; - current->reclaim_state = &reclaim_state; progress = try_to_free_pages(ac->zonelist, order, gfp_mask, ac->nodemask); - current->reclaim_state = NULL; memalloc_noreclaim_restore(noreclaim_flag); fs_reclaim_release(gfp_mask); psi_memstall_leave(&pflags); @@ -5852,6 +5921,7 @@ void __ref memmap_init_zone_device(struct zone *zone, { unsigned long pfn, end_pfn = start_pfn + size; struct pglist_data *pgdat = zone->zone_pgdat; + struct vmem_altmap *altmap = pgmap_altmap(pgmap); unsigned long zone_idx = zone_idx(zone); unsigned long start = jiffies; int nid = pgdat->node_id; @@ -5864,9 +5934,7 @@ void __ref memmap_init_zone_device(struct zone *zone, * of the pages reserved for the memmap, so we can just jump to * the end of that region and start processing the device pages. */ - if (pgmap->altmap_valid) { - struct vmem_altmap *altmap = &pgmap->altmap; - + if (altmap) { start_pfn = altmap->base_pfn + vmem_altmap_offset(altmap); size = end_pfn - start_pfn; } @@ -5886,12 +5954,12 @@ void __ref memmap_init_zone_device(struct zone *zone, __SetPageReserved(page); /* - * ZONE_DEVICE pages union ->lru with a ->pgmap back - * pointer and hmm_data. It is a bug if a ZONE_DEVICE - * page is ever freed or placed on a driver-private list. + * ZONE_DEVICE pages union ->lru with a ->pgmap back pointer + * and zone_device_data. It is a bug if a ZONE_DEVICE page is + * ever freed or placed on a driver-private list. */ page->pgmap = pgmap; - page->hmm_data = 0; + page->zone_device_data = NULL; /* * Mark the block movable so that blocks are reserved for @@ -7518,10 +7586,28 @@ static int page_alloc_cpu_dead(unsigned int cpu) return 0; } +#ifdef CONFIG_NUMA +int hashdist = HASHDIST_DEFAULT; + +static int __init set_hashdist(char *str) +{ + if (!str) + return 0; + hashdist = simple_strtoul(str, &str, 0); + return 1; +} +__setup("hashdist=", set_hashdist); +#endif + void __init page_alloc_init(void) { int ret; +#ifdef CONFIG_NUMA + if (num_node_state(N_MEMORY) == 1) + hashdist = 0; +#endif + ret = cpuhp_setup_state_nocalls(CPUHP_PAGE_ALLOC_DEAD, "mm/page_alloc:dead", NULL, page_alloc_cpu_dead); @@ -7906,19 +7992,6 @@ out: return ret; } -#ifdef CONFIG_NUMA -int hashdist = HASHDIST_DEFAULT; - -static int __init set_hashdist(char *str) -{ - if (!str) - return 0; - hashdist = simple_strtoul(str, &str, 0); - return 1; -} -__setup("hashdist=", set_hashdist); -#endif - #ifndef __HAVE_ARCH_RESERVED_KERNEL_PAGES /* * Returns the number of pages that arch has reserved but @@ -7965,6 +8038,7 @@ void *__init alloc_large_system_hash(const char *tablename, unsigned long log2qty, size; void *table = NULL; gfp_t gfp_flags; + bool virt; /* allow the kernel cmdline to have a say */ if (!numentries) { @@ -8021,6 +8095,7 @@ void *__init alloc_large_system_hash(const char *tablename, gfp_flags = (flags & HASH_ZERO) ? GFP_ATOMIC | __GFP_ZERO : GFP_ATOMIC; do { + virt = false; size = bucketsize << log2qty; if (flags & HASH_EARLY) { if (flags & HASH_ZERO) @@ -8028,26 +8103,26 @@ void *__init alloc_large_system_hash(const char *tablename, else table = memblock_alloc_raw(size, SMP_CACHE_BYTES); - } else if (hashdist) { + } else if (get_order(size) >= MAX_ORDER || hashdist) { table = __vmalloc(size, gfp_flags, PAGE_KERNEL); + virt = true; } else { /* * If bucketsize is not a power-of-two, we may free * some pages at the end of hash table which * alloc_pages_exact() automatically does */ - if (get_order(size) < MAX_ORDER) { - table = alloc_pages_exact(size, gfp_flags); - kmemleak_alloc(table, size, 1, gfp_flags); - } + table = alloc_pages_exact(size, gfp_flags); + kmemleak_alloc(table, size, 1, gfp_flags); } } while (!table && size > PAGE_SIZE && --log2qty); if (!table) panic("Failed to allocate %s hash table\n", tablename); - pr_info("%s hash table entries: %ld (order: %d, %lu bytes)\n", - tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size); + pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n", + tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size, + virt ? "vmalloc" : "linear"); if (_hash_shift) *_hash_shift = log2qty; |