diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 2 | ||||
-rw-r--r-- | mm/Makefile | 3 | ||||
-rw-r--r-- | mm/compaction.c | 418 | ||||
-rw-r--r-- | mm/internal.h | 33 | ||||
-rw-r--r-- | mm/memory-failure.c | 2 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 6 | ||||
-rw-r--r-- | mm/page_alloc.c | 409 | ||||
-rw-r--r-- | mm/page_isolation.c | 15 | ||||
-rw-r--r-- | mm/vmstat.c | 3 |
9 files changed, 675 insertions, 216 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index e338407f1225..39220026c797 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -198,7 +198,7 @@ config COMPACTION config MIGRATION bool "Page migration" def_bool y - depends on NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION + depends on NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION || CMA help Allows the migration of the physical location of pages of processes while the virtual addresses are not changed. This is useful in diff --git a/mm/Makefile b/mm/Makefile index 50ec00ef2a0e..8aada89efbbb 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -13,7 +13,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ page_isolation.o mm_init.o mmu_context.o percpu.o \ - $(mmu-y) + compaction.o $(mmu-y) obj-y += init-mm.o ifdef CONFIG_NO_BOOTMEM @@ -32,7 +32,6 @@ obj-$(CONFIG_NUMA) += mempolicy.o obj-$(CONFIG_SPARSEMEM) += sparse.o obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o obj-$(CONFIG_SLOB) += slob.o -obj-$(CONFIG_COMPACTION) += compaction.o obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o obj-$(CONFIG_KSM) += ksm.o obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o diff --git a/mm/compaction.c b/mm/compaction.c index 74a8c825ff28..da7d35ea5103 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -16,30 +16,11 @@ #include <linux/sysfs.h> #include "internal.h" +#if defined CONFIG_COMPACTION || defined CONFIG_CMA + #define CREATE_TRACE_POINTS #include <trace/events/compaction.h> -/* - * compact_control is used to track pages being migrated and the free pages - * they are being migrated to during memory compaction. The free_pfn starts - * at the end of a zone and migrate_pfn begins at the start. Movable pages - * are moved to the end of a zone during a compaction run and the run - * completes when free_pfn <= migrate_pfn - */ -struct compact_control { - struct list_head freepages; /* List of free pages to migrate to */ - struct list_head migratepages; /* List of pages being migrated */ - unsigned long nr_freepages; /* Number of isolated free pages */ - unsigned long nr_migratepages; /* Number of pages to migrate */ - unsigned long free_pfn; /* isolate_freepages search base */ - unsigned long migrate_pfn; /* isolate_migratepages search base */ - bool sync; /* Synchronous migration */ - - int order; /* order a direct compactor needs */ - int migratetype; /* MOVABLE, RECLAIMABLE etc */ - struct zone *zone; -}; - static unsigned long release_freepages(struct list_head *freelist) { struct page *page, *next; @@ -54,24 +35,35 @@ static unsigned long release_freepages(struct list_head *freelist) return count; } -/* Isolate free pages onto a private freelist. Must hold zone->lock */ -static unsigned long isolate_freepages_block(struct zone *zone, - unsigned long blockpfn, - struct list_head *freelist) +static void map_pages(struct list_head *list) +{ + struct page *page; + + list_for_each_entry(page, list, lru) { + arch_alloc_page(page, 0); + kernel_map_pages(page, 1, 1); + } +} + +static inline bool migrate_async_suitable(int migratetype) +{ + return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE; +} + +/* + * Isolate free pages onto a private freelist. Caller must hold zone->lock. + * If @strict is true, will abort returning 0 on any invalid PFNs or non-free + * pages inside of the pageblock (even though it may still end up isolating + * some pages). + */ +static unsigned long isolate_freepages_block(unsigned long blockpfn, + unsigned long end_pfn, + struct list_head *freelist, + bool strict) { - unsigned long zone_end_pfn, end_pfn; int nr_scanned = 0, total_isolated = 0; struct page *cursor; - /* Get the last PFN we should scan for free pages at */ - zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; - end_pfn = min(blockpfn + pageblock_nr_pages, zone_end_pfn); - - /* Find the first usable PFN in the block to initialse page cursor */ - for (; blockpfn < end_pfn; blockpfn++) { - if (pfn_valid_within(blockpfn)) - break; - } cursor = pfn_to_page(blockpfn); /* Isolate free pages. This assumes the block is valid */ @@ -79,15 +71,23 @@ static unsigned long isolate_freepages_block(struct zone *zone, int isolated, i; struct page *page = cursor; - if (!pfn_valid_within(blockpfn)) + if (!pfn_valid_within(blockpfn)) { + if (strict) + return 0; continue; + } nr_scanned++; - if (!PageBuddy(page)) + if (!PageBuddy(page)) { + if (strict) + return 0; continue; + } /* Found a free page, break it into order-0 pages */ isolated = split_free_page(page); + if (!isolated && strict) + return 0; total_isolated += isolated; for (i = 0; i < isolated; i++) { list_add(&page->lru, freelist); @@ -105,114 +105,71 @@ static unsigned long isolate_freepages_block(struct zone *zone, return total_isolated; } -/* Returns true if the page is within a block suitable for migration to */ -static bool suitable_migration_target(struct page *page) -{ - - int migratetype = get_pageblock_migratetype(page); - - /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ - if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE) - return false; - - /* If the page is a large free page, then allow migration */ - if (PageBuddy(page) && page_order(page) >= pageblock_order) - return true; - - /* If the block is MIGRATE_MOVABLE, allow migration */ - if (migratetype == MIGRATE_MOVABLE) - return true; - - /* Otherwise skip the block */ - return false; -} - -/* - * Based on information in the current compact_control, find blocks - * suitable for isolating free pages from and then isolate them. +/** + * isolate_freepages_range() - isolate free pages. + * @start_pfn: The first PFN to start isolating. + * @end_pfn: The one-past-last PFN. + * + * Non-free pages, invalid PFNs, or zone boundaries within the + * [start_pfn, end_pfn) range are considered errors, cause function to + * undo its actions and return zero. + * + * Otherwise, function returns one-past-the-last PFN of isolated page + * (which may be greater then end_pfn if end fell in a middle of + * a free page). */ -static void isolate_freepages(struct zone *zone, - struct compact_control *cc) +unsigned long +isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn) { - struct page *page; - unsigned long high_pfn, low_pfn, pfn; - unsigned long flags; - int nr_freepages = cc->nr_freepages; - struct list_head *freelist = &cc->freepages; - - /* - * Initialise the free scanner. The starting point is where we last - * scanned from (or the end of the zone if starting). The low point - * is the end of the pageblock the migration scanner is using. - */ - pfn = cc->free_pfn; - low_pfn = cc->migrate_pfn + pageblock_nr_pages; + unsigned long isolated, pfn, block_end_pfn, flags; + struct zone *zone = NULL; + LIST_HEAD(freelist); - /* - * Take care that if the migration scanner is at the end of the zone - * that the free scanner does not accidentally move to the next zone - * in the next isolation cycle. - */ - high_pfn = min(low_pfn, pfn); - - /* - * Isolate free pages until enough are available to migrate the - * pages on cc->migratepages. We stop searching if the migrate - * and free page scanners meet or enough free pages are isolated. - */ - for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages; - pfn -= pageblock_nr_pages) { - unsigned long isolated; + if (pfn_valid(start_pfn)) + zone = page_zone(pfn_to_page(start_pfn)); - if (!pfn_valid(pfn)) - continue; + for (pfn = start_pfn; pfn < end_pfn; pfn += isolated) { + if (!pfn_valid(pfn) || zone != page_zone(pfn_to_page(pfn))) + break; /* - * Check for overlapping nodes/zones. It's possible on some - * configurations to have a setup like - * node0 node1 node0 - * i.e. it's possible that all pages within a zones range of - * pages do not belong to a single zone. + * On subsequent iterations ALIGN() is actually not needed, + * but we keep it that we not to complicate the code. */ - page = pfn_to_page(pfn); - if (page_zone(page) != zone) - continue; + block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); + block_end_pfn = min(block_end_pfn, end_pfn); - /* Check the block is suitable for migration */ - if (!suitable_migration_target(page)) - continue; + spin_lock_irqsave(&zone->lock, flags); + isolated = isolate_freepages_block(pfn, block_end_pfn, + &freelist, true); + spin_unlock_irqrestore(&zone->lock, flags); /* - * Found a block suitable for isolating free pages from. Now - * we disabled interrupts, double check things are ok and - * isolate the pages. This is to minimise the time IRQs - * are disabled + * In strict mode, isolate_freepages_block() returns 0 if + * there are any holes in the block (ie. invalid PFNs or + * non-free pages). */ - isolated = 0; - spin_lock_irqsave(&zone->lock, flags); - if (suitable_migration_target(page)) { - isolated = isolate_freepages_block(zone, pfn, freelist); - nr_freepages += isolated; - } - spin_unlock_irqrestore(&zone->lock, flags); + if (!isolated) + break; /* - * Record the highest PFN we isolated pages from. When next - * looking for free pages, the search will restart here as - * page migration may have returned some pages to the allocator + * If we managed to isolate pages, it is always (1 << n) * + * pageblock_nr_pages for some non-negative n. (Max order + * page may span two pageblocks). */ - if (isolated) - high_pfn = max(high_pfn, pfn); } /* split_free_page does not map the pages */ - list_for_each_entry(page, freelist, lru) { - arch_alloc_page(page, 0); - kernel_map_pages(page, 1, 1); + map_pages(&freelist); + + if (pfn < end_pfn) { + /* Loop terminated early, cleanup. */ + release_freepages(&freelist); + return 0; } - cc->free_pfn = high_pfn; - cc->nr_freepages = nr_freepages; + /* We don't use freelists for anything. */ + return pfn; } /* Update the number of anon and file isolated pages in the zone */ @@ -243,38 +200,34 @@ static bool too_many_isolated(struct zone *zone) return isolated > (inactive + active) / 2; } -/* possible outcome of isolate_migratepages */ -typedef enum { - ISOLATE_ABORT, /* Abort compaction now */ - ISOLATE_NONE, /* No pages isolated, continue scanning */ - ISOLATE_SUCCESS, /* Pages isolated, migrate */ -} isolate_migrate_t; - -/* - * Isolate all pages that can be migrated from the block pointed to by - * the migrate scanner within compact_control. +/** + * isolate_migratepages_range() - isolate all migrate-able pages in range. + * @zone: Zone pages are in. + * @cc: Compaction control structure. + * @low_pfn: The first PFN of the range. + * @end_pfn: The one-past-the-last PFN of the range. + * + * Isolate all pages that can be migrated from the range specified by + * [low_pfn, end_pfn). Returns zero if there is a fatal signal + * pending), otherwise PFN of the first page that was not scanned + * (which may be both less, equal to or more then end_pfn). + * + * Assumes that cc->migratepages is empty and cc->nr_migratepages is + * zero. + * + * Apart from cc->migratepages and cc->nr_migratetypes this function + * does not modify any cc's fields, in particular it does not modify + * (or read for that matter) cc->migrate_pfn. */ -static isolate_migrate_t isolate_migratepages(struct zone *zone, - struct compact_control *cc) +unsigned long +isolate_migratepages_range(struct zone *zone, struct compact_control *cc, + unsigned long low_pfn, unsigned long end_pfn) { - unsigned long low_pfn, end_pfn; unsigned long last_pageblock_nr = 0, pageblock_nr; unsigned long nr_scanned = 0, nr_isolated = 0; struct list_head *migratelist = &cc->migratepages; isolate_mode_t mode = ISOLATE_ACTIVE|ISOLATE_INACTIVE; - /* Do not scan outside zone boundaries */ - low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn); - - /* Only scan within a pageblock boundary */ - end_pfn = ALIGN(low_pfn + pageblock_nr_pages, pageblock_nr_pages); - - /* Do not cross the free scanner or scan within a memory hole */ - if (end_pfn > cc->free_pfn || !pfn_valid(low_pfn)) { - cc->migrate_pfn = end_pfn; - return ISOLATE_NONE; - } - /* * Ensure that there are not too many pages isolated from the LRU * list by either parallel reclaimers or compaction. If there are, @@ -283,12 +236,12 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, while (unlikely(too_many_isolated(zone))) { /* async migration should just abort */ if (!cc->sync) - return ISOLATE_ABORT; + return 0; congestion_wait(BLK_RW_ASYNC, HZ/10); if (fatal_signal_pending(current)) - return ISOLATE_ABORT; + return 0; } /* Time to isolate some pages for migration */ @@ -351,7 +304,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, */ pageblock_nr = low_pfn >> pageblock_order; if (!cc->sync && last_pageblock_nr != pageblock_nr && - get_pageblock_migratetype(page) != MIGRATE_MOVABLE) { + !migrate_async_suitable(get_pageblock_migratetype(page))) { low_pfn += pageblock_nr_pages; low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1; last_pageblock_nr = pageblock_nr; @@ -396,11 +349,124 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, acct_isolated(zone, cc); spin_unlock_irq(&zone->lru_lock); - cc->migrate_pfn = low_pfn; trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated); - return ISOLATE_SUCCESS; + return low_pfn; +} + +#endif /* CONFIG_COMPACTION || CONFIG_CMA */ +#ifdef CONFIG_COMPACTION + +/* Returns true if the page is within a block suitable for migration to */ +static bool suitable_migration_target(struct page *page) +{ + + int migratetype = get_pageblock_migratetype(page); + + /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ + if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE) + return false; + + /* If the page is a large free page, then allow migration */ + if (PageBuddy(page) && page_order(page) >= pageblock_order) + return true; + + /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */ + if (migrate_async_suitable(migratetype)) + return true; + + /* Otherwise skip the block */ + return false; +} + +/* + * Based on information in the current compact_control, find blocks + * suitable for isolating free pages from and then isolate them. + */ +static void isolate_freepages(struct zone *zone, + struct compact_control *cc) +{ + struct page *page; + unsigned long high_pfn, low_pfn, pfn, zone_end_pfn, end_pfn; + unsigned long flags; + int nr_freepages = cc->nr_freepages; + struct list_head *freelist = &cc->freepages; + + /* + * Initialise the free scanner. The starting point is where we last + * scanned from (or the end of the zone if starting). The low point + * is the end of the pageblock the migration scanner is using. + */ + pfn = cc->free_pfn; + low_pfn = cc->migrate_pfn + pageblock_nr_pages; + + /* + * Take care that if the migration scanner is at the end of the zone + * that the free scanner does not accidentally move to the next zone + * in the next isolation cycle. + */ + high_pfn = min(low_pfn, pfn); + + zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; + + /* + * Isolate free pages until enough are available to migrate the + * pages on cc->migratepages. We stop searching if the migrate + * and free page scanners meet or enough free pages are isolated. + */ + for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages; + pfn -= pageblock_nr_pages) { + unsigned long isolated; + + if (!pfn_valid(pfn)) + continue; + + /* + * Check for overlapping nodes/zones. It's possible on some + * configurations to have a setup like + * node0 node1 node0 + * i.e. it's possible that all pages within a zones range of + * pages do not belong to a single zone. + */ + page = pfn_to_page(pfn); + if (page_zone(page) != zone) + continue; + + /* Check the block is suitable for migration */ + if (!suitable_migration_target(page)) + continue; + + /* + * Found a block suitable for isolating free pages from. Now + * we disabled interrupts, double check things are ok and + * isolate the pages. This is to minimise the time IRQs + * are disabled + */ + isolated = 0; + spin_lock_irqsave(&zone->lock, flags); + if (suitable_migration_target(page)) { + end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn); + isolated = isolate_freepages_block(pfn, end_pfn, + freelist, false); + nr_freepages += isolated; + } + spin_unlock_irqrestore(&zone->lock, flags); + + /* + * Record the highest PFN we isolated pages from. When next + * looking for free pages, the search will restart here as + * page migration may have returned some pages to the allocator + */ + if (isolated) + high_pfn = max(high_pfn, pfn); + } + + /* split_free_page does not map the pages */ + map_pages(freelist); + + cc->free_pfn = high_pfn; + cc->nr_freepages = nr_freepages; } /* @@ -449,6 +515,44 @@ static void update_nr_listpages(struct compact_control *cc) cc->nr_freepages = nr_freepages; } +/* possible outcome of isolate_migratepages */ +typedef enum { + ISOLATE_ABORT, /* Abort compaction now */ + ISOLATE_NONE, /* No pages isolated, continue scanning */ + ISOLATE_SUCCESS, /* Pages isolated, migrate */ +} isolate_migrate_t; + +/* + * Isolate all pages that can be migrated from the block pointed to by + * the migrate scanner within compact_control. + */ +static isolate_migrate_t isolate_migratepages(struct zone *zone, + struct compact_control *cc) +{ + unsigned long low_pfn, end_pfn; + + /* Do not scan outside zone boundaries */ + low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn); + + /* Only scan within a pageblock boundary */ + end_pfn = ALIGN(low_pfn + pageblock_nr_pages, pageblock_nr_pages); + + /* Do not cross the free scanner or scan within a memory hole */ + if (end_pfn > cc->free_pfn || !pfn_valid(low_pfn)) { + cc->migrate_pfn = end_pfn; + return ISOLATE_NONE; + } + + /* Perform the isolation */ + low_pfn = isolate_migratepages_range(zone, cc, low_pfn, end_pfn); + if (!low_pfn) + return ISOLATE_ABORT; + + cc->migrate_pfn = low_pfn; + + return ISOLATE_SUCCESS; +} + static int compact_finished(struct zone *zone, struct compact_control *cc) { @@ -795,3 +899,5 @@ void compaction_unregister_node(struct node *node) return device_remove_file(&node->dev, &dev_attr_compact); } #endif /* CONFIG_SYSFS && CONFIG_NUMA */ + +#endif /* CONFIG_COMPACTION */ diff --git a/mm/internal.h b/mm/internal.h index 2189af491783..aee4761cf9a9 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -100,6 +100,39 @@ extern void prep_compound_page(struct page *page, unsigned long order); extern bool is_free_buddy_page(struct page *page); #endif +#if defined CONFIG_COMPACTION || defined CONFIG_CMA + +/* + * in mm/compaction.c + */ +/* + * compact_control is used to track pages being migrated and the free pages + * they are being migrated to during memory compaction. The free_pfn starts + * at the end of a zone and migrate_pfn begins at the start. Movable pages + * are moved to the end of a zone during a compaction run and the run + * completes when free_pfn <= migrate_pfn + */ +struct compact_control { + struct list_head freepages; /* List of free pages to migrate to */ + struct list_head migratepages; /* List of pages being migrated */ + unsigned long nr_freepages; /* Number of isolated free pages */ + unsigned long nr_migratepages; /* Number of pages to migrate */ + unsigned long free_pfn; /* isolate_freepages search base */ + unsigned long migrate_pfn; /* isolate_migratepages search base */ + bool sync; /* Synchronous migration */ + + int order; /* order a direct compactor needs */ + int migratetype; /* MOVABLE, RECLAIMABLE etc */ + struct zone *zone; +}; + +unsigned long +isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn); +unsigned long +isolate_migratepages_range(struct zone *zone, struct compact_control *cc, + unsigned long low_pfn, unsigned long end_pfn); + +#endif /* * function for dealing with page's order in buddy system. diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 97cc2733551a..c99ad4e6b88c 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1404,7 +1404,7 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) /* Not a free page */ ret = 1; } - unset_migratetype_isolate(p); + unset_migratetype_isolate(p, MIGRATE_MOVABLE); unlock_memory_hotplug(); return ret; } diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 6629fafd6ce4..fc898cb4fe8f 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -891,7 +891,7 @@ static int __ref offline_pages(unsigned long start_pfn, nr_pages = end_pfn - start_pfn; /* set above range as isolated */ - ret = start_isolate_page_range(start_pfn, end_pfn); + ret = start_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); if (ret) goto out; @@ -956,7 +956,7 @@ repeat: We cannot do rollback at this point. */ offline_isolated_pages(start_pfn, end_pfn); /* reset pagetype flags and makes migrate type to be MOVABLE */ - undo_isolate_page_range(start_pfn, end_pfn); + undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); /* removal success */ zone->present_pages -= offlined_pages; zone->zone_pgdat->node_present_pages -= offlined_pages; @@ -981,7 +981,7 @@ failed_removal: start_pfn, end_pfn); memory_notify(MEM_CANCEL_OFFLINE, &arg); /* pushback to free area */ - undo_isolate_page_range(start_pfn, end_pfn); + undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); out: unlock_memory_hotplug(); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1851df600438..bab8e3bc4202 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -57,6 +57,7 @@ #include <linux/ftrace_event.h> #include <linux/memcontrol.h> #include <linux/prefetch.h> +#include <linux/migrate.h> #include <linux/page-debug-flags.h> #include <asm/tlbflush.h> @@ -513,10 +514,10 @@ static inline int page_is_buddy(struct page *page, struct page *buddy, * free pages of length of (1 << order) and marked with _mapcount -2. Page's * order is recorded in page_private(page) field. * So when we are allocating or freeing one, we can derive the state of the - * other. That is, if we allocate a small block, and both were - * free, the remainder of the region must be split into blocks. + * other. That is, if we allocate a small block, and both were + * free, the remainder of the region must be split into blocks. * If a block is freed, and its buddy is also free, then this - * triggers coalescing into a block of larger size. + * triggers coalescing into a block of larger size. * * -- wli */ @@ -749,6 +750,24 @@ void __meminit __free_pages_bootmem(struct page *page, unsigned int order) __free_pages(page, order); } +#ifdef CONFIG_CMA +/* Free whole pageblock and set it's migration type to MIGRATE_CMA. */ +void __init init_cma_reserved_pageblock(struct page *page) +{ + unsigned i = pageblock_nr_pages; + struct page *p = page; + + do { + __ClearPageReserved(p); + set_page_count(p, 0); + } while (++p, --i); + + set_page_refcounted(page); + set_pageblock_migratetype(page, MIGRATE_CMA); + __free_pages(page, pageblock_order); + totalram_pages += pageblock_nr_pages; +} +#endif /* * The order of subdivision here is critical for the IO subsystem. @@ -874,11 +893,17 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, * This array describes the order lists are fallen back to when * the free lists for the desirable migrate type are depleted */ -static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = { - [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, - [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, - [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, - [MIGRATE_RESERVE] = { MIGRATE_RESERVE, MIGRATE_RESERVE, MIGRATE_RESERVE }, /* Never used */ +static int fallbacks[MIGRATE_TYPES][4] = { + [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, + [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, +#ifdef CONFIG_CMA + [MIGRATE_MOVABLE] = { MIGRATE_CMA, MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, + [MIGRATE_CMA] = { MIGRATE_RESERVE }, /* Never used */ +#else + [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, +#endif + [MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */ + [MIGRATE_ISOLATE] = { MIGRATE_RESERVE }, /* Never used */ }; /* @@ -973,12 +998,12 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) /* Find the largest possible block of pages in the other list */ for (current_order = MAX_ORDER-1; current_order >= order; --current_order) { - for (i = 0; i < MIGRATE_TYPES - 1; i++) { + for (i = 0;; i++) { migratetype = fallbacks[start_migratetype][i]; /* MIGRATE_RESERVE handled later if necessary */ if (migratetype == MIGRATE_RESERVE) - continue; + break; area = &(zone->free_area[current_order]); if (list_empty(&area->free_list[migratetype])) @@ -993,11 +1018,18 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) * pages to the preferred allocation list. If falling * back for a reclaimable kernel allocation, be more * aggressive about taking ownership of free pages + * + * On the other hand, never change migration + * type of MIGRATE_CMA pageblocks nor move CMA + * pages on different free lists. We don't + * want unmovable pages to be allocated from + * MIGRATE_CMA areas. */ - if (unlikely(current_order >= (pageblock_order >> 1)) || - start_migratetype == MIGRATE_RECLAIMABLE || - page_group_by_mobility_disabled) { - unsigned long pages; + if (!is_migrate_cma(migratetype) && + (unlikely(current_order >= pageblock_order / 2) || + start_migratetype == MIGRATE_RECLAIMABLE || + page_group_by_mobility_disabled)) { + int pages; pages = move_freepages_block(zone, page, start_migratetype); @@ -1015,11 +1047,14 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) rmv_page_order(page); /* Take ownership for orders >= pageblock_order */ - if (current_order >= pageblock_order) + if (current_order >= pageblock_order && + !is_migrate_cma(migratetype)) change_pageblock_range(page, current_order, start_migratetype); - expand(zone, page, order, current_order, area, migratetype); + expand(zone, page, order, current_order, area, + is_migrate_cma(migratetype) + ? migratetype : start_migratetype); trace_mm_page_alloc_extfrag(page, order, current_order, start_migratetype, migratetype); @@ -1061,17 +1096,17 @@ retry_reserve: return page; } -/* +/* * Obtain a specified number of elements from the buddy allocator, all under * a single hold of the lock, for efficiency. Add them to the supplied list. * Returns the number of new pages which were placed at *list. */ -static int rmqueue_bulk(struct zone *zone, unsigned int order, +static int rmqueue_bulk(struct zone *zone, unsigned int order, unsigned long count, struct list_head *list, int migratetype, int cold) { - int i; - + int mt = migratetype, i; + spin_lock(&zone->lock); for (i = 0; i < count; ++i) { struct page *page = __rmqueue(zone, order, migratetype); @@ -1091,7 +1126,12 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, list_add(&page->lru, list); else list_add_tail(&page->lru, list); - set_page_private(page, migratetype); + if (IS_ENABLED(CONFIG_CMA)) { + mt = get_pageblock_migratetype(page); + if (!is_migrate_cma(mt) && mt != MIGRATE_ISOLATE) + mt = migratetype; + } + set_page_private(page, mt); list = &page->lru; } __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); @@ -1371,8 +1411,12 @@ int split_free_page(struct page *page) if (order >= pageblock_order - 1) { struct page *endpage = page + (1 << order) - 1; - for (; page < endpage; page += pageblock_nr_pages) - set_pageblock_migratetype(page, MIGRATE_MOVABLE); + for (; page < endpage; page += pageblock_nr_pages) { + int mt = get_pageblock_migratetype(page); + if (mt != MIGRATE_ISOLATE && !is_migrate_cma(mt)) + set_pageblock_migratetype(page, + MIGRATE_MOVABLE); + } } return 1 << order; @@ -2086,16 +2130,13 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, } #endif /* CONFIG_COMPACTION */ -/* The really slow allocator path where we enter direct reclaim */ -static inline struct page * -__alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, - struct zonelist *zonelist, enum zone_type high_zoneidx, - nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, - int migratetype, unsigned long *did_some_progress) +/* Perform direct synchronous page reclaim */ +static int +__perform_reclaim(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, + nodemask_t *nodemask) { - struct page *page = NULL; struct reclaim_state reclaim_state; - bool drained = false; + int progress; cond_resched(); @@ -2106,7 +2147,7 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, reclaim_state.reclaimed_slab = 0; current->reclaim_state = &reclaim_state; - *did_some_progress = try_to_free_pages(zonelist, order, gfp_mask, nodemask); + progress = try_to_free_pages(zonelist, order, gfp_mask, nodemask); current->reclaim_state = NULL; lockdep_clear_current_reclaim_state(); @@ -2114,6 +2155,21 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, cond_resched(); + return progress; +} + +/* The really slow allocator path where we enter direct reclaim */ +static inline struct page * +__alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, + struct zonelist *zonelist, enum zone_type high_zoneidx, + nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, + int migratetype, unsigned long *did_some_progress) +{ + struct page *page = NULL; + bool drained = false; + + *did_some_progress = __perform_reclaim(gfp_mask, order, zonelist, + nodemask); if (unlikely(!(*did_some_progress))) return NULL; @@ -4301,7 +4357,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, init_waitqueue_head(&pgdat->kswapd_wait); pgdat->kswapd_max_order = 0; pgdat_page_cgroup_init(pgdat); - + for (j = 0; j < MAX_NR_ZONES; j++) { struct zone *zone = pgdat->node_zones + j; unsigned long size, realsize, memmap_pages; @@ -4976,14 +5032,7 @@ static void setup_per_zone_lowmem_reserve(void) calculate_totalreserve_pages(); } -/** - * setup_per_zone_wmarks - called when min_free_kbytes changes - * or when memory is hot-{added|removed} - * - * Ensures that the watermark[min,low,high] values for each zone are set - * correctly with respect to min_free_kbytes. - */ -void setup_per_zone_wmarks(void) +static void __setup_per_zone_wmarks(void) { unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10); unsigned long lowmem_pages = 0; @@ -5030,6 +5079,11 @@ void setup_per_zone_wmarks(void) zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2); zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1); + + zone->watermark[WMARK_MIN] += cma_wmark_pages(zone); + zone->watermark[WMARK_LOW] += cma_wmark_pages(zone); + zone->watermark[WMARK_HIGH] += cma_wmark_pages(zone); + setup_zone_migrate_reserve(zone); spin_unlock_irqrestore(&zone->lock, flags); } @@ -5038,6 +5092,20 @@ void setup_per_zone_wmarks(void) calculate_totalreserve_pages(); } +/** + * setup_per_zone_wmarks - called when min_free_kbytes changes + * or when memory is hot-{added|removed} + * + * Ensures that the watermark[min,low,high] values for each zone are set + * correctly with respect to min_free_kbytes. + */ +void setup_per_zone_wmarks(void) +{ + mutex_lock(&zonelists_mutex); + __setup_per_zone_wmarks(); + mutex_unlock(&zonelists_mutex); +} + /* * The inactive anon list should be small enough that the VM never has to * do too much work, but large enough that each inactive page has a chance @@ -5415,14 +5483,16 @@ static int __count_immobile_pages(struct zone *zone, struct page *page, int count) { unsigned long pfn, iter, found; + int mt; + /* * For avoiding noise data, lru_add_drain_all() should be called * If ZONE_MOVABLE, the zone never contains immobile pages */ if (zone_idx(zone) == ZONE_MOVABLE) return true; - - if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE) + mt = get_pageblock_migratetype(page); + if (mt == MIGRATE_MOVABLE || is_migrate_cma(mt)) return true; pfn = page_to_pfn(page); @@ -5539,7 +5609,7 @@ out: return ret; } -void unset_migratetype_isolate(struct page *page) +void unset_migratetype_isolate(struct page *page, unsigned migratetype) { struct zone *zone; unsigned long flags; @@ -5547,12 +5617,259 @@ void unset_migratetype_isolate(struct page *page) spin_lock_irqsave(&zone->lock, flags); if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE) goto out; - set_pageblock_migratetype(page, MIGRATE_MOVABLE); - move_freepages_block(zone, page, MIGRATE_MOVABLE); + set_pageblock_migratetype(page, migratetype); + move_freepages_block(zone, page, migratetype); out: spin_unlock_irqrestore(&zone->lock, flags); } +#ifdef CONFIG_CMA + +static unsigned long pfn_max_align_down(unsigned long pfn) +{ + return pfn & ~(max_t(unsigned long, MAX_ORDER_NR_PAGES, + pageblock_nr_pages) - 1); +} + +static unsigned long pfn_max_align_up(unsigned long pfn) +{ + return ALIGN(pfn, max_t(unsigned long, MAX_ORDER_NR_PAGES, + pageblock_nr_pages)); +} + +static struct page * +__alloc_contig_migrate_alloc(struct page *page, unsigned long private, + int **resultp) +{ + return alloc_page(GFP_HIGHUSER_MOVABLE); +} + +/* [start, end) must belong to a single zone. */ +static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) +{ + /* This function is based on compact_zone() from compaction.c. */ + + unsigned long pfn = start; + unsigned int tries = 0; + int ret = 0; + + struct compact_control cc = { + .nr_migratepages = 0, + .order = -1, + .zone = page_zone(pfn_to_page(start)), + .sync = true, + }; + INIT_LIST_HEAD(&cc.migratepages); + + migrate_prep_local(); + + while (pfn < end || !list_empty(&cc.migratepages)) { + if (fatal_signal_pending(current)) { + ret = -EINTR; + break; + } + + if (list_empty(&cc.migratepages)) { + cc.nr_migratepages = 0; + pfn = isolate_migratepages_range(cc.zone, &cc, + pfn, end); + if (!pfn) { + ret = -EINTR; + break; + } + tries = 0; + } else if (++tries == 5) { + ret = ret < 0 ? ret : -EBUSY; + break; + } + + ret = migrate_pages(&cc.migratepages, + __alloc_contig_migrate_alloc, + 0, false, MIGRATE_SYNC); + } + + putback_lru_pages(&cc.migratepages); + return ret > 0 ? 0 : ret; +} + +/* + * Update zone's cma pages counter used for watermark level calculation. + */ +static inline void __update_cma_watermarks(struct zone *zone, int count) +{ + unsigned long flags; + spin_lock_irqsave(&zone->lock, flags); + zone->min_cma_pages += count; + spin_unlock_irqrestore(&zone->lock, flags); + setup_per_zone_wmarks(); +} + +/* + * Trigger memory pressure bump to reclaim some pages in order to be able to + * allocate 'count' pages in single page units. Does similar work as + *__alloc_pages_slowpath() function. + */ +static int __reclaim_pages(struct zone *zone, gfp_t gfp_mask, int count) +{ + enum zone_type high_zoneidx = gfp_zone(gfp_mask); + struct zonelist *zonelist = node_zonelist(0, gfp_mask); + int did_some_progress = 0; + int order = 1; + + /* + * Increase level of watermarks to force kswapd do his job + * to stabilise at new watermark level. + */ + __update_cma_watermarks(zone, count); + + /* Obey watermarks as if the page was being allocated */ + while (!zone_watermark_ok(zone, 0, low_wmark_pages(zone), 0, 0)) { + wake_all_kswapd(order, zonelist, high_zoneidx, zone_idx(zone)); + + did_some_progress = __perform_reclaim(gfp_mask, order, zonelist, + NULL); + if (!did_some_progress) { + /* Exhausted what can be done so it's blamo time */ + out_of_memory(zonelist, gfp_mask, order, NULL, false); + } + } + + /* Restore original watermark levels. */ + __update_cma_watermarks(zone, -count); + + return count; +} + +/** + * alloc_contig_range() -- tries to allocate given range of pages + * @start: start PFN to allocate + * @end: one-past-the-last PFN to allocate + * @migratetype: migratetype of the underlaying pageblocks (either + * #MIGRATE_MOVABLE or #MIGRATE_CMA). All pageblocks + * in range must have the same migratetype and it must + * be either of the two. + * + * The PFN range does not have to be pageblock or MAX_ORDER_NR_PAGES + * aligned, however it's the caller's responsibility to guarantee that + * we are the only thread that changes migrate type of pageblocks the + * pages fall in. + * + * The PFN range must belong to a single zone. + * + * Returns zero on success or negative error code. On success all + * pages which PFN is in [start, end) are allocated for the caller and + * need to be freed with free_contig_range(). + */ +int alloc_contig_range(unsigned long start, unsigned long end, + unsigned migratetype) +{ + struct zone *zone = page_zone(pfn_to_page(start)); + unsigned long outer_start, outer_end; + int ret = 0, order; + + /* + * What we do here is we mark all pageblocks in range as + * MIGRATE_ISOLATE. Because pageblock and max order pages may + * have different sizes, and due to the way page allocator + * work, we align the range to biggest of the two pages so + * that page allocator won't try to merge buddies from + * different pageblocks and change MIGRATE_ISOLATE to some + * other migration type. + * + * Once the pageblocks are marked as MIGRATE_ISOLATE, we + * migrate the pages from an unaligned range (ie. pages that + * we are interested in). This will put all the pages in + * range back to page allocator as MIGRATE_ISOLATE. + * + * When this is done, we take the pages in range from page + * allocator removing them from the buddy system. This way + * page allocator will never consider using them. + * + * This lets us mark the pageblocks back as + * MIGRATE_CMA/MIGRATE_MOVABLE so that free pages in the + * aligned range but not in the unaligned, original range are + * put back to page allocator so that buddy can use them. + */ + + ret = start_isolate_page_range(pfn_max_align_down(start), + pfn_max_align_up(end), migratetype); + if (ret) + goto done; + + ret = __alloc_contig_migrate_range(start, end); + if (ret) + goto done; + + /* + * Pages from [start, end) are within a MAX_ORDER_NR_PAGES + * aligned blocks that are marked as MIGRATE_ISOLATE. What's + * more, all pages in [start, end) are free in page allocator. + * What we are going to do is to allocate all pages from + * [start, end) (that is remove them from page allocator). + * + * The only problem is that pages at the beginning and at the + * end of interesting range may be not aligned with pages that + * page allocator holds, ie. they can be part of higher order + * pages. Because of this, we reserve the bigger range and + * once this is done free the pages we are not interested in. + * + * We don't have to hold zone->lock here because the pages are + * isolated thus they won't get removed from buddy. + */ + + lru_add_drain_all(); + drain_all_pages(); + + order = 0; + outer_start = start; + while (!PageBuddy(pfn_to_page(outer_start))) { + if (++order >= MAX_ORDER) { + ret = -EBUSY; + goto done; + } + outer_start &= ~0UL << order; + } + + /* Make sure the range is really isolated. */ + if (test_pages_isolated(outer_start, end)) { + pr_warn("alloc_contig_range test_pages_isolated(%lx, %lx) failed\n", + outer_start, end); + ret = -EBUSY; + goto done; + } + + /* + * Reclaim enough pages to make sure that contiguous allocation + * will not starve the system. + */ + __reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start); + + /* Grab isolated pages from freelists. */ + outer_end = isolate_freepages_range(outer_start, end); + if (!outer_end) { + ret = -EBUSY; + goto done; + } + + /* Free head and tail (if any) */ + if (start != outer_start) + free_contig_range(outer_start, start - outer_start); + if (end != outer_end) + free_contig_range(end, outer_end - end); + +done: + undo_isolate_page_range(pfn_max_align_down(start), + pfn_max_align_up(end), migratetype); + return ret; +} + +void free_contig_range(unsigned long pfn, unsigned nr_pages) +{ + for (; nr_pages--; ++pfn) + __free_page(pfn_to_page(pfn)); +} +#endif + #ifdef CONFIG_MEMORY_HOTREMOVE /* * All pages in the range must be isolated before calling this. diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 4ae42bb40892..c9f04774f2b8 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -24,6 +24,7 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) * to be MIGRATE_ISOLATE. * @start_pfn: The lower PFN of the range to be isolated. * @end_pfn: The upper PFN of the range to be isolated. + * @migratetype: migrate type to set in error recovery. * * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in * the range will never be allocated. Any free pages and pages freed in the @@ -32,8 +33,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) * start_pfn/end_pfn must be aligned to pageblock_order. * Returns 0 on success and -EBUSY if any part of range cannot be isolated. */ -int -start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) +int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, + unsigned migratetype) { unsigned long pfn; unsigned long undo_pfn; @@ -56,7 +57,7 @@ undo: for (pfn = start_pfn; pfn < undo_pfn; pfn += pageblock_nr_pages) - unset_migratetype_isolate(pfn_to_page(pfn)); + unset_migratetype_isolate(pfn_to_page(pfn), migratetype); return -EBUSY; } @@ -64,8 +65,8 @@ undo: /* * Make isolated pages available again. */ -int -undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) +int undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, + unsigned migratetype) { unsigned long pfn; struct page *page; @@ -77,7 +78,7 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) page = __first_valid_page(pfn, pageblock_nr_pages); if (!page || get_pageblock_migratetype(page) != MIGRATE_ISOLATE) continue; - unset_migratetype_isolate(page); + unset_migratetype_isolate(page, migratetype); } return 0; } @@ -86,7 +87,7 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) * all pages in [start_pfn...end_pfn) must be in the same zone. * zone->lock must be held before call this. * - * Returns 1 if all pages in the range is isolated. + * Returns 1 if all pages in the range are isolated. */ static int __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn) diff --git a/mm/vmstat.c b/mm/vmstat.c index 7db1b9bab492..0dad31dc1618 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -613,6 +613,9 @@ static char * const migratetype_names[MIGRATE_TYPES] = { "Reclaimable", "Movable", "Reserve", +#ifdef CONFIG_CMA + "CMA", +#endif "Isolate", }; |