diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 114 |
1 files changed, 56 insertions, 58 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fab5e97dc9ca..05fe1ddb033c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -69,6 +69,7 @@ #include <linux/nmi.h> #include <linux/psi.h> #include <linux/padata.h> +#include <linux/khugepaged.h> #include <asm/sections.h> #include <asm/tlbflush.h> @@ -155,16 +156,16 @@ static int __init early_init_on_alloc(char *buf) int ret; bool bool_result; - if (!buf) - return -EINVAL; ret = kstrtobool(buf, &bool_result); + if (ret) + return ret; if (bool_result && page_poisoning_enabled()) pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, will take precedence over init_on_alloc\n"); if (bool_result) static_branch_enable(&init_on_alloc); else static_branch_disable(&init_on_alloc); - return ret; + return 0; } early_param("init_on_alloc", early_init_on_alloc); @@ -173,16 +174,16 @@ static int __init early_init_on_free(char *buf) int ret; bool bool_result; - if (!buf) - return -EINVAL; ret = kstrtobool(buf, &bool_result); + if (ret) + return ret; if (bool_result && page_poisoning_enabled()) pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, will take precedence over init_on_free\n"); if (bool_result) static_branch_enable(&init_on_free); else static_branch_disable(&init_on_free); - return ret; + return 0; } early_param("init_on_free", early_init_on_free); @@ -3367,9 +3368,16 @@ struct page *rmqueue(struct zone *preferred_zone, struct page *page; if (likely(order == 0)) { - page = rmqueue_pcplist(preferred_zone, zone, gfp_flags, + /* + * MIGRATE_MOVABLE pcplist could have the pages on CMA area and + * we need to skip it when CMA area isn't allowed. + */ + if (!IS_ENABLED(CONFIG_CMA) || alloc_flags & ALLOC_CMA || + migratetype != MIGRATE_MOVABLE) { + page = rmqueue_pcplist(preferred_zone, zone, gfp_flags, migratetype, alloc_flags); - goto out; + goto out; + } } /* @@ -3381,7 +3389,13 @@ struct page *rmqueue(struct zone *preferred_zone, do { page = NULL; - if (alloc_flags & ALLOC_HARDER) { + /* + * order-0 request can reach here when the pcplist is skipped + * due to non-CMA allocation context. HIGHATOMIC area is + * reserved for high-order atomic allocation, so order-0 + * request should skip it. + */ + if (order > 0 && alloc_flags & ALLOC_HARDER) { page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); if (page) trace_mm_page_alloc_zone_locked(page, order, migratetype); @@ -3727,8 +3741,8 @@ retry: */ no_fallback = alloc_flags & ALLOC_NOFRAGMENT; z = ac->preferred_zoneref; - for_next_zone_zonelist_nodemask(zone, z, ac->zonelist, - ac->highest_zoneidx, ac->nodemask) { + for_next_zone_zonelist_nodemask(zone, z, ac->highest_zoneidx, + ac->nodemask) { struct page *page; unsigned long mark; @@ -3972,8 +3986,10 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, * success so it is time to admit defeat. We will skip the OOM killer * because it is very likely that the caller has a more reasonable * fallback than shooting a random task. + * + * The OOM killer may not free memory on a specific node. */ - if (gfp_mask & __GFP_RETRY_MAYFAIL) + if (gfp_mask & (__GFP_RETRY_MAYFAIL | __GFP_THISNODE)) goto out; /* The OOM killer does not needlessly kill tasks for lowmem */ if (ac->highest_zoneidx < ZONE_NORMAL) @@ -3990,10 +4006,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, * failures more gracefully we should just bail out here. */ - /* The OOM killer may not free memory on a specific node */ - if (gfp_mask & __GFP_THISNODE) - goto out; - /* Exhausted what can be done so it's blame time */ if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) { *did_some_progress = 1; @@ -4241,13 +4253,12 @@ EXPORT_SYMBOL_GPL(fs_reclaim_release); #endif /* Perform direct synchronous page reclaim */ -static int +static unsigned long __perform_reclaim(gfp_t gfp_mask, unsigned int order, const struct alloc_context *ac) { - int progress; unsigned int noreclaim_flag; - unsigned long pflags; + unsigned long pflags, progress; cond_resched(); @@ -4826,12 +4837,6 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order, *alloc_flags = current_alloc_flags(gfp_mask, *alloc_flags); - return true; -} - -/* Determine whether to spread dirty pages and what the first usable zone */ -static inline void finalise_ac(gfp_t gfp_mask, struct alloc_context *ac) -{ /* Dirty zone balancing only done in the fast path */ ac->spread_dirty_pages = (gfp_mask & __GFP_WRITE); @@ -4842,6 +4847,8 @@ static inline void finalise_ac(gfp_t gfp_mask, struct alloc_context *ac) */ ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, ac->highest_zoneidx, ac->nodemask); + + return true; } /* @@ -4870,8 +4877,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags)) return NULL; - finalise_ac(gfp_mask, &ac); - /* * Forbid the first pass from falling back to types that fragment * memory until all local zones are considered. @@ -4947,6 +4952,9 @@ void __free_pages(struct page *page, unsigned int order) { if (put_page_testzero(page)) free_the_page(page, order); + else if (!PageHead(page)) + while (order-- > 0) + free_the_page(page + (1 << order), order); } EXPORT_SYMBOL(__free_pages); @@ -5637,7 +5645,6 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask) int n, val; int min_val = INT_MAX; int best_node = NUMA_NO_NODE; - const struct cpumask *tmp = cpumask_of_node(0); /* Use the local node if we haven't already */ if (!node_isset(node, *used_node_mask)) { @@ -5658,8 +5665,7 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask) val += (n < node); /* Give preference to headless and unused nodes */ - tmp = cpumask_of_node(n); - if (!cpumask_empty(tmp)) + if (!cpumask_empty(cpumask_of_node(n))) val += PENALTY_FOR_NODE_WITH_CPUS; /* Slight preference for less loaded node */ @@ -5955,7 +5961,7 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn) if (mirrored_kernelcore && zone == ZONE_MOVABLE) { if (!r || *pfn >= memblock_region_memory_end_pfn(r)) { - for_each_memblock(memory, r) { + for_each_mem_region(r) { if (*pfn < memblock_region_memory_end_pfn(r)) break; } @@ -5975,7 +5981,7 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn) * done. Non-atomic initialization, single-pass. */ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, - unsigned long start_pfn, enum memmap_context context, + unsigned long start_pfn, enum meminit_context context, struct vmem_altmap *altmap) { unsigned long pfn, end_pfn = start_pfn + size; @@ -6007,7 +6013,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, * There can be holes in boot-time mem_map[]s handed to this * function. They do not exist on hotplugged memory. */ - if (context == MEMMAP_EARLY) { + if (context == MEMINIT_EARLY) { if (overlap_memmap_init(zone, &pfn)) continue; if (defer_init(nid, pfn, end_pfn)) @@ -6016,7 +6022,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, page = pfn_to_page(pfn); __init_single_page(page, pfn, zone, nid); - if (context == MEMMAP_HOTPLUG) + if (context == MEMINIT_HOTPLUG) __SetPageReserved(page); /* @@ -6099,7 +6105,7 @@ void __ref memmap_init_zone_device(struct zone *zone, * check here not to call set_pageblock_migratetype() against * pfn out of zone. * - * Please note that MEMMAP_HOTPLUG path doesn't clear memmap + * Please note that MEMINIT_HOTPLUG path doesn't clear memmap * because this is done early in section_activate() */ if (!(pfn & (pageblock_nr_pages - 1))) { @@ -6137,7 +6143,7 @@ void __meminit __weak memmap_init(unsigned long size, int nid, if (end_pfn > start_pfn) { size = end_pfn - start_pfn; memmap_init_zone(size, nid, zone, start_pfn, - MEMMAP_EARLY, NULL); + MEMINIT_EARLY, NULL); } } } @@ -6540,7 +6546,7 @@ static unsigned long __init zone_absent_pages_in_node(int nid, unsigned long start_pfn, end_pfn; struct memblock_region *r; - for_each_memblock(memory, r) { + for_each_mem_region(r) { start_pfn = clamp(memblock_region_memory_base_pfn(r), zone_start_pfn, zone_end_pfn); end_pfn = clamp(memblock_region_memory_end_pfn(r), @@ -6984,8 +6990,7 @@ static void __init init_unavailable_mem(void) * Loop through unavailable ranges not covered by memblock.memory. */ pgcnt = 0; - for_each_mem_range(i, &memblock.memory, NULL, - NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end, NULL) { + for_each_mem_range(i, &start, &end) { if (next < start) pgcnt += init_unavailable_range(PFN_DOWN(next), PFN_UP(start)); @@ -7135,7 +7140,7 @@ static void __init find_zone_movable_pfns_for_nodes(void) * options. */ if (movable_node_is_enabled()) { - for_each_memblock(memory, r) { + for_each_mem_region(r) { if (!memblock_is_hotpluggable(r)) continue; @@ -7156,7 +7161,7 @@ static void __init find_zone_movable_pfns_for_nodes(void) if (mirrored_kernelcore) { bool mem_below_4gb_not_mirrored = false; - for_each_memblock(memory, r) { + for_each_mem_region(r) { if (memblock_is_mirror(r)) continue; @@ -7891,6 +7896,8 @@ int __meminit init_per_zone_wmark_min(void) setup_min_slab_ratio(); #endif + khugepaged_min_free_kbytes_update(); + return 0; } postcore_initcall(init_per_zone_wmark_min) @@ -8218,14 +8225,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, { unsigned long iter = 0; unsigned long pfn = page_to_pfn(page); - - /* - * TODO we could make this much more efficient by not checking every - * page in the range if we know all of them are in MOVABLE_ZONE and - * that the movable zone guarantees that pages are migratable but - * the later is not the case right now unfortunatelly. E.g. movablecore - * can still lead to having bootmem allocations in zone_movable. - */ + unsigned long offset = pfn % pageblock_nr_pages; if (is_migrate_cma_page(page)) { /* @@ -8239,12 +8239,18 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, return page; } - for (; iter < pageblock_nr_pages; iter++) { + for (; iter < pageblock_nr_pages - offset; iter++) { if (!pfn_valid_within(pfn + iter)) continue; page = pfn_to_page(pfn + iter); + /* + * Both, bootmem allocations and memory holes are marked + * PG_reserved and are unmovable. We can even have unmovable + * allocations inside ZONE_MOVABLE, for example when + * specifying "movablecore". + */ if (PageReserved(page)) return page; @@ -8318,14 +8324,6 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, * it. But now, memory offline itself doesn't call * shrink_node_slabs() and it still to be fixed. */ - /* - * If the page is not RAM, page_count()should be 0. - * we don't need more check. This is an _used_ not-movable page. - * - * The problematic thing here is PG_reserved pages. PG_reserved - * is set to both of a memory hole page and a _used_ kernel - * page at boot. - */ return page; } return NULL; |