From b4a0215e11dcfe23a48c65c6d6c82c0c2c551a48 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Sat, 27 Aug 2022 19:19:59 +0800 Subject: mm: fix null-ptr-deref in kswapd_is_running() kswapd_run/stop() will set pgdat->kswapd to NULL, which could race with kswapd_is_running() in kcompactd(), kswapd_run/stop() kcompactd() kswapd_is_running() pgdat->kswapd // error or nomal ptr verify pgdat->kswapd // load non-NULL pgdat->kswapd pgdat->kswapd = NULL task_is_running(pgdat->kswapd) // Null pointer derefence KASAN reports the null-ptr-deref shown below, vmscan: Failed to start kswapd on node 0 ... BUG: KASAN: null-ptr-deref in kcompactd+0x440/0x504 Read of size 8 at addr 0000000000000024 by task kcompactd0/37 CPU: 0 PID: 37 Comm: kcompactd0 Kdump: loaded Tainted: G OE 5.10.60 #1 Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 Call trace: dump_backtrace+0x0/0x394 show_stack+0x34/0x4c dump_stack+0x158/0x1e4 __kasan_report+0x138/0x140 kasan_report+0x44/0xdc __asan_load8+0x94/0xd0 kcompactd+0x440/0x504 kthread+0x1a4/0x1f0 ret_from_fork+0x10/0x18 At present kswapd/kcompactd_run() and kswapd/kcompactd_stop() are protected by mem_hotplug_begin/done(), but without kcompactd(). There is no need to involve memory hotplug lock in kcompactd(), so let's add a new mutex to protect pgdat->kswapd accesses. Also, because the kcompactd task will check the state of kswapd task, it's better to call kcompactd_stop() before kswapd_stop() to reduce lock conflicts. [akpm@linux-foundation.org: add comments] Link: https://lkml.kernel.org/r/20220827111959.186838-1-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Cc: David Hildenbrand Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/compaction.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'mm/compaction.c') diff --git a/mm/compaction.c b/mm/compaction.c index 640fa76228dd..262c4676b32c 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1981,9 +1981,21 @@ static inline bool is_via_compact_memory(int order) return order == -1; } +/* + * Determine whether kswapd is (or recently was!) running on this node. + * + * pgdat_kswapd_lock() pins pgdat->kswapd, so a concurrent kswapd_stop() can't + * zero it. + */ static bool kswapd_is_running(pg_data_t *pgdat) { - return pgdat->kswapd && task_is_running(pgdat->kswapd); + bool running; + + pgdat_kswapd_lock(pgdat); + running = pgdat->kswapd && task_is_running(pgdat->kswapd); + pgdat_kswapd_unlock(pgdat); + + return running; } /* -- cgit v1.2.3 From 4f9bc69ac5ce34071a9a51343bc81ca76cb2e3f1 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 7 Sep 2022 14:08:42 +0800 Subject: mm: reuse pageblock_start/end_pfn() macro Move pageblock_start_pfn/pageblock_end_pfn() into pageblock-flags.h, then they could be used somewhere else, not only in compaction, also use ALIGN_DOWN() instead of round_down() to be pair with ALIGN(), which should be same for pageblock usage. Link: https://lkml.kernel.org/r/20220907060844.126891-1-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Acked-by: Mike Rapoport Reviewed-by: David Hildenbrand Cc: Oscar Salvador Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/pageblock-flags.h | 2 ++ mm/compaction.c | 2 -- mm/memblock.c | 2 +- mm/page_alloc.c | 13 ++++++------- mm/page_isolation.c | 11 +++++------ mm/page_owner.c | 4 ++-- 6 files changed, 16 insertions(+), 18 deletions(-) (limited to 'mm/compaction.c') diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index 83c7248053a1..a09b7fe6bbf8 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -53,6 +53,8 @@ extern unsigned int pageblock_order; #endif /* CONFIG_HUGETLB_PAGE */ #define pageblock_nr_pages (1UL << pageblock_order) +#define pageblock_start_pfn(pfn) ALIGN_DOWN((pfn), pageblock_nr_pages) +#define pageblock_end_pfn(pfn) ALIGN((pfn) + 1, pageblock_nr_pages) /* Forward declaration */ struct page; diff --git a/mm/compaction.c b/mm/compaction.c index 262c4676b32c..9cbe8562b63a 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -52,8 +52,6 @@ static inline void count_compact_events(enum vm_event_item item, long delta) #define block_start_pfn(pfn, order) round_down(pfn, 1UL << (order)) #define block_end_pfn(pfn, order) ALIGN((pfn) + 1, 1UL << (order)) -#define pageblock_start_pfn(pfn) block_start_pfn(pfn, pageblock_order) -#define pageblock_end_pfn(pfn) block_end_pfn(pfn, pageblock_order) /* * Page order with-respect-to which proactive compaction diff --git a/mm/memblock.c b/mm/memblock.c index b5d3026979fc..46fe7575f03c 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -2000,7 +2000,7 @@ static void __init free_unused_memmap(void) * presume that there are no holes in the memory map inside * a pageblock */ - start = round_down(start, pageblock_nr_pages); + start = pageblock_start_pfn(start); /* * If we had a previous bank, and there is a space diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 44f3c9364316..1637db90472e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -544,7 +544,7 @@ static inline int pfn_to_bitidx(const struct page *page, unsigned long pfn) #ifdef CONFIG_SPARSEMEM pfn &= (PAGES_PER_SECTION-1); #else - pfn = pfn - round_down(page_zone(page)->zone_start_pfn, pageblock_nr_pages); + pfn = pfn - pageblock_start_pfn(page_zone(page)->zone_start_pfn); #endif /* CONFIG_SPARSEMEM */ return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS; } @@ -1857,7 +1857,7 @@ void set_zone_contiguous(struct zone *zone) unsigned long block_start_pfn = zone->zone_start_pfn; unsigned long block_end_pfn; - block_end_pfn = ALIGN(block_start_pfn + 1, pageblock_nr_pages); + block_end_pfn = pageblock_end_pfn(block_start_pfn); for (; block_start_pfn < zone_end_pfn(zone); block_start_pfn = block_end_pfn, block_end_pfn += pageblock_nr_pages) { @@ -2653,8 +2653,8 @@ int move_freepages_block(struct zone *zone, struct page *page, *num_movable = 0; pfn = page_to_pfn(page); - start_pfn = pfn & ~(pageblock_nr_pages - 1); - end_pfn = start_pfn + pageblock_nr_pages - 1; + start_pfn = pageblock_start_pfn(pfn); + end_pfn = pageblock_end_pfn(pfn) - 1; /* Do not cross zone boundaries */ if (!zone_spans_pfn(zone, start_pfn)) @@ -6934,9 +6934,8 @@ static void __init init_unavailable_range(unsigned long spfn, u64 pgcnt = 0; for (pfn = spfn; pfn < epfn; pfn++) { - if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) { - pfn = ALIGN_DOWN(pfn, pageblock_nr_pages) - + pageblock_nr_pages - 1; + if (!pfn_valid(pageblock_start_pfn(pfn))) { + pfn = pageblock_end_pfn(pfn) - 1; continue; } __init_single_page(pfn_to_page(pfn), pfn, zone, node); diff --git a/mm/page_isolation.c b/mm/page_isolation.c index eb3a68ca92ad..5819cb9c62f3 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -37,8 +37,8 @@ static struct page *has_unmovable_pages(unsigned long start_pfn, unsigned long e struct zone *zone = page_zone(page); unsigned long pfn; - VM_BUG_ON(ALIGN_DOWN(start_pfn, pageblock_nr_pages) != - ALIGN_DOWN(end_pfn - 1, pageblock_nr_pages)); + VM_BUG_ON(pageblock_start_pfn(start_pfn) != + pageblock_start_pfn(end_pfn - 1)); if (is_migrate_cma_page(page)) { /* @@ -172,7 +172,7 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_ * to avoid redundant checks. */ check_unmovable_start = max(page_to_pfn(page), start_pfn); - check_unmovable_end = min(ALIGN(page_to_pfn(page) + 1, pageblock_nr_pages), + check_unmovable_end = min(pageblock_end_pfn(page_to_pfn(page)), end_pfn); unmovable = has_unmovable_pages(check_unmovable_start, check_unmovable_end, @@ -532,7 +532,7 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, unsigned long pfn; struct page *page; /* isolation is done at page block granularity */ - unsigned long isolate_start = ALIGN_DOWN(start_pfn, pageblock_nr_pages); + unsigned long isolate_start = pageblock_start_pfn(start_pfn); unsigned long isolate_end = ALIGN(end_pfn, pageblock_nr_pages); int ret; bool skip_isolation = false; @@ -579,10 +579,9 @@ void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, { unsigned long pfn; struct page *page; - unsigned long isolate_start = ALIGN_DOWN(start_pfn, pageblock_nr_pages); + unsigned long isolate_start = pageblock_start_pfn(start_pfn); unsigned long isolate_end = ALIGN(end_pfn, pageblock_nr_pages); - for (pfn = isolate_start; pfn < isolate_end; pfn += pageblock_nr_pages) { diff --git a/mm/page_owner.c b/mm/page_owner.c index 54f3e039fb48..2d27f532df4c 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -297,7 +297,7 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m, continue; } - block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); + block_end_pfn = pageblock_end_pfn(pfn); block_end_pfn = min(block_end_pfn, end_pfn); pageblock_mt = get_pageblock_migratetype(page); @@ -635,7 +635,7 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone) continue; } - block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); + block_end_pfn = pageblock_end_pfn(pfn); block_end_pfn = min(block_end_pfn, end_pfn); for (; pfn < block_end_pfn; pfn++) { -- cgit v1.2.3 From ee0913c4719610204315a0d8a35122c6233249e0 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 7 Sep 2022 14:08:44 +0800 Subject: mm: add pageblock_aligned() macro Add pageblock_aligned() and use it to simplify code. Link: https://lkml.kernel.org/r/20220907060844.126891-3-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Acked-by: Mike Rapoport Cc: David Hildenbrand Cc: Oscar Salvador Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/pageblock-flags.h | 1 + mm/compaction.c | 8 ++++---- mm/memory_hotplug.c | 6 ++---- mm/page_alloc.c | 17 +++++++---------- mm/page_isolation.c | 2 +- 5 files changed, 15 insertions(+), 19 deletions(-) (limited to 'mm/compaction.c') diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index 293c76630fa8..5f1ae07d724b 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -54,6 +54,7 @@ extern unsigned int pageblock_order; #define pageblock_nr_pages (1UL << pageblock_order) #define pageblock_align(pfn) ALIGN((pfn), pageblock_nr_pages) +#define pageblock_aligned(pfn) IS_ALIGNED((pfn), pageblock_nr_pages) #define pageblock_start_pfn(pfn) ALIGN_DOWN((pfn), pageblock_nr_pages) #define pageblock_end_pfn(pfn) ALIGN((pfn) + 1, pageblock_nr_pages) diff --git a/mm/compaction.c b/mm/compaction.c index 9cbe8562b63a..e2a9615f5fde 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -402,7 +402,7 @@ static bool test_and_set_skip(struct compact_control *cc, struct page *page, if (cc->ignore_skip_hint) return false; - if (!IS_ALIGNED(pfn, pageblock_nr_pages)) + if (!pageblock_aligned(pfn)) return false; skip = get_pageblock_skip(page); @@ -884,7 +884,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, * COMPACT_CLUSTER_MAX at a time so the second call must * not falsely conclude that the block should be skipped. */ - if (!valid_page && IS_ALIGNED(low_pfn, pageblock_nr_pages)) { + if (!valid_page && pageblock_aligned(low_pfn)) { if (!isolation_suitable(cc, page)) { low_pfn = end_pfn; page = NULL; @@ -1937,7 +1937,7 @@ static isolate_migrate_t isolate_migratepages(struct compact_control *cc) * before making it "skip" so other compaction instances do * not scan the same block. */ - if (IS_ALIGNED(low_pfn, pageblock_nr_pages) && + if (pageblock_aligned(low_pfn) && !fast_find_block && !isolation_suitable(cc, page)) continue; @@ -2123,7 +2123,7 @@ static enum compact_result __compact_finished(struct compact_control *cc) * migration source is unmovable/reclaimable but it's not worth * special casing. */ - if (!IS_ALIGNED(cc->migrate_pfn, pageblock_nr_pages)) + if (!pageblock_aligned(cc->migrate_pfn)) return COMPACT_CONTINUE; /* Direct compactor: Is a suitable page free? */ diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 9ae1f98548b1..fd40f7e9f176 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1085,8 +1085,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, * of the physical memory space for vmemmaps. That space is pageblock * aligned. */ - if (WARN_ON_ONCE(!nr_pages || - !IS_ALIGNED(pfn, pageblock_nr_pages) || + if (WARN_ON_ONCE(!nr_pages || !pageblock_aligned(pfn) || !IS_ALIGNED(pfn + nr_pages, PAGES_PER_SECTION))) return -EINVAL; @@ -1806,8 +1805,7 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages, * of the physical memory space for vmemmaps. That space is pageblock * aligned. */ - if (WARN_ON_ONCE(!nr_pages || - !IS_ALIGNED(start_pfn, pageblock_nr_pages) || + if (WARN_ON_ONCE(!nr_pages || !pageblock_aligned(start_pfn) || !IS_ALIGNED(start_pfn + nr_pages, PAGES_PER_SECTION))) return -EINVAL; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1637db90472e..0002ded4ab0e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1892,15 +1892,14 @@ static void __init deferred_free_range(unsigned long pfn, page = pfn_to_page(pfn); /* Free a large naturally-aligned chunk if possible */ - if (nr_pages == pageblock_nr_pages && - (pfn & (pageblock_nr_pages - 1)) == 0) { + if (nr_pages == pageblock_nr_pages && pageblock_aligned(pfn)) { set_pageblock_migratetype(page, MIGRATE_MOVABLE); __free_pages_core(page, pageblock_order); return; } for (i = 0; i < nr_pages; i++, page++, pfn++) { - if ((pfn & (pageblock_nr_pages - 1)) == 0) + if (pageblock_aligned(pfn)) set_pageblock_migratetype(page, MIGRATE_MOVABLE); __free_pages_core(page, 0); } @@ -1928,7 +1927,7 @@ static inline void __init pgdat_init_report_one_done(void) */ static inline bool __init deferred_pfn_valid(unsigned long pfn) { - if (!(pfn & (pageblock_nr_pages - 1)) && !pfn_valid(pfn)) + if (pageblock_aligned(pfn) && !pfn_valid(pfn)) return false; return true; } @@ -1940,14 +1939,13 @@ static inline bool __init deferred_pfn_valid(unsigned long pfn) static void __init deferred_free_pages(unsigned long pfn, unsigned long end_pfn) { - unsigned long nr_pgmask = pageblock_nr_pages - 1; unsigned long nr_free = 0; for (; pfn < end_pfn; pfn++) { if (!deferred_pfn_valid(pfn)) { deferred_free_range(pfn - nr_free, nr_free); nr_free = 0; - } else if (!(pfn & nr_pgmask)) { + } else if (pageblock_aligned(pfn)) { deferred_free_range(pfn - nr_free, nr_free); nr_free = 1; } else { @@ -1967,7 +1965,6 @@ static unsigned long __init deferred_init_pages(struct zone *zone, unsigned long pfn, unsigned long end_pfn) { - unsigned long nr_pgmask = pageblock_nr_pages - 1; int nid = zone_to_nid(zone); unsigned long nr_pages = 0; int zid = zone_idx(zone); @@ -1977,7 +1974,7 @@ static unsigned long __init deferred_init_pages(struct zone *zone, if (!deferred_pfn_valid(pfn)) { page = NULL; continue; - } else if (!page || !(pfn & nr_pgmask)) { + } else if (!page || pageblock_aligned(pfn)) { page = pfn_to_page(pfn); } else { page++; @@ -6759,7 +6756,7 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone * such that unmovable allocations won't be scattered all * over the place during system boot. */ - if (IS_ALIGNED(pfn, pageblock_nr_pages)) { + if (pageblock_aligned(pfn)) { set_pageblock_migratetype(page, migratetype); cond_resched(); } @@ -6802,7 +6799,7 @@ static void __ref __init_zone_device_page(struct page *page, unsigned long pfn, * Please note that MEMINIT_HOTPLUG path doesn't clear memmap * because this is done early in section_activate() */ - if (IS_ALIGNED(pfn, pageblock_nr_pages)) { + if (pageblock_aligned(pfn)) { set_pageblock_migratetype(page, MIGRATE_MOVABLE); cond_resched(); } diff --git a/mm/page_isolation.c b/mm/page_isolation.c index fa82faa07daf..04141a9bea70 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -312,7 +312,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, struct zone *zone; int ret; - VM_BUG_ON(!IS_ALIGNED(boundary_pfn, pageblock_nr_pages)); + VM_BUG_ON(!pageblock_aligned(boundary_pfn)); if (isolate_before) isolate_pageblock = boundary_pfn - pageblock_nr_pages; -- cgit v1.2.3