diff options
Diffstat (limited to 'mm/memory_hotplug.c')
-rw-r--r-- | mm/memory_hotplug.c | 211 |
1 files changed, 86 insertions, 125 deletions
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 8e9e2d44cdad..6f203574ca1d 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -105,7 +105,7 @@ static struct resource *register_memory_resource(u64 start, u64 size, unsigned long flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; if (strcmp(resource_name, "System RAM")) - flags |= IORESOURCE_MEM_DRIVER_MANAGED; + flags |= IORESOURCE_SYSRAM_DRIVER_MANAGED; /* * Make sure value parsed from 'mem=' only restricts memory adding @@ -625,31 +625,22 @@ void generic_online_page(struct page *page, unsigned int order) } EXPORT_SYMBOL_GPL(generic_online_page); -static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages, - void *arg) +static void online_pages_range(unsigned long start_pfn, unsigned long nr_pages) { const unsigned long end_pfn = start_pfn + nr_pages; unsigned long pfn; - int order; /* - * Online the pages. The callback might decide to keep some pages - * PG_reserved (to add them to the buddy later), but we still account - * them as being online/belonging to this zone ("present"). + * Online the pages in MAX_ORDER - 1 aligned chunks. The callback might + * decide to not expose all pages to the buddy (e.g., expose them + * later). We account all pages as being online and belonging to this + * zone ("present"). */ - for (pfn = start_pfn; pfn < end_pfn; pfn += 1ul << order) { - order = min(MAX_ORDER - 1, get_order(PFN_PHYS(end_pfn - pfn))); - /* __free_pages_core() wants pfns to be aligned to the order */ - if (WARN_ON_ONCE(!IS_ALIGNED(pfn, 1ul << order))) - order = 0; - (*online_page_callback)(pfn_to_page(pfn), order); - } + for (pfn = start_pfn; pfn < end_pfn; pfn += MAX_ORDER_NR_PAGES) + (*online_page_callback)(pfn_to_page(pfn), MAX_ORDER - 1); /* mark all involved sections as online */ online_mem_sections(start_pfn, end_pfn); - - *(unsigned long *)arg += nr_pages; - return 0; } /* check which state of node_states will be changed when online memory */ @@ -710,9 +701,14 @@ static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned lon * Associate the pfn range with the given zone, initializing the memmaps * and resizing the pgdat/zone data to span the added pages. After this * call, all affected pages are PG_reserved. + * + * All aligned pageblocks are initialized to the specified migratetype + * (usually MIGRATE_MOVABLE). Besides setting the migratetype, no related + * zone stats (e.g., nr_isolate_pageblock) are touched. */ void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, - unsigned long nr_pages, struct vmem_altmap *altmap) + unsigned long nr_pages, + struct vmem_altmap *altmap, int migratetype) { struct pglist_data *pgdat = zone->zone_pgdat; int nid = pgdat->node_id; @@ -737,7 +733,7 @@ void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, * are reserved so nobody should be touching them so we should be safe */ memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, - MEMINIT_HOTPLUG, altmap); + MEMINIT_HOTPLUG, altmap, migratetype); set_zone_contiguous(zone); } @@ -803,17 +799,21 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_type, int nid) { unsigned long flags; - unsigned long onlined_pages = 0; struct zone *zone; int need_zonelists_rebuild = 0; int ret; struct memory_notify arg; + /* We can only online full sections (e.g., SECTION_IS_ONLINE) */ + if (WARN_ON_ONCE(!nr_pages || + !IS_ALIGNED(pfn | nr_pages, PAGES_PER_SECTION))) + return -EINVAL; + mem_hotplug_begin(); /* associate pfn range with the zone */ zone = zone_for_pfn_range(online_type, nid, pfn, nr_pages); - move_pfn_range_to_zone(zone, pfn, nr_pages, NULL); + move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_ISOLATE); arg.start_pfn = pfn; arg.nr_pages = nr_pages; @@ -825,6 +825,14 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, goto failed_addition; /* + * Fixup the number of isolated pageblocks before marking the sections + * onlining, such that undo_isolate_page_range() works correctly. + */ + spin_lock_irqsave(&zone->lock, flags); + zone->nr_isolate_pageblock += nr_pages / pageblock_nr_pages; + spin_unlock_irqrestore(&zone->lock, flags); + + /* * If this zone is not populated, then it is not in zonelist. * This means the page allocator ignores this zone. * So, zonelist must be updated after online. @@ -834,36 +842,29 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, setup_zone_pageset(zone); } - ret = walk_system_ram_range(pfn, nr_pages, &onlined_pages, - online_pages_range); - if (ret) { - /* not a single memory resource was applicable */ - if (need_zonelists_rebuild) - zone_pcp_reset(zone); - goto failed_addition; - } - - zone->present_pages += onlined_pages; + online_pages_range(pfn, nr_pages); + zone->present_pages += nr_pages; pgdat_resize_lock(zone->zone_pgdat, &flags); - zone->zone_pgdat->node_present_pages += onlined_pages; + zone->zone_pgdat->node_present_pages += nr_pages; pgdat_resize_unlock(zone->zone_pgdat, &flags); - /* - * When exposing larger, physically contiguous memory areas to the - * buddy, shuffling in the buddy (when freeing onlined pages, putting - * them either to the head or the tail of the freelist) is only helpful - * for maintaining the shuffle, but not for creating the initial - * shuffle. Shuffle the whole zone to make sure the just onlined pages - * are properly distributed across the whole freelist. - */ - shuffle_zone(zone); - node_states_set_node(nid, &arg); if (need_zonelists_rebuild) build_all_zonelists(NULL); zone_pcp_update(zone); + /* Basic onlining is complete, allow allocation of onlined pages. */ + undo_isolate_page_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE); + + /* + * Freshly onlined pages aren't shuffled (e.g., all pages are placed to + * the tail of the freelist when undoing isolation). Shuffle the whole + * zone to make sure the just onlined pages are properly distributed + * across the whole freelist - to create an initial shuffle. + */ + shuffle_zone(zone); + init_per_zone_wmark_min(); kswapd_run(nid); @@ -1035,7 +1036,7 @@ static int online_memory_block(struct memory_block *mem, void *arg) * * we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ -int __ref add_memory_resource(int nid, struct resource *res) +int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) { struct mhp_params params = { .pgprot = PAGE_KERNEL }; u64 start, size; @@ -1088,9 +1089,8 @@ int __ref add_memory_resource(int nid, struct resource *res) } /* link memory sections under this node.*/ - ret = link_mem_sections(nid, PFN_DOWN(start), PFN_UP(start + size - 1), - MEMINIT_HOTPLUG); - BUG_ON(ret); + link_mem_sections(nid, PFN_DOWN(start), PFN_UP(start + size - 1), + MEMINIT_HOTPLUG); /* create new memmap entry */ if (!strcmp(res->name, "System RAM")) @@ -1099,6 +1099,13 @@ int __ref add_memory_resource(int nid, struct resource *res) /* device_online() will take the lock when calling online_pages() */ mem_hotplug_done(); + /* + * In case we're allowed to merge the resource, flag it and trigger + * merging now that adding succeeded. + */ + if (mhp_flags & MEMHP_MERGE_RESOURCE) + merge_system_ram_resource(res); + /* online pages if requested */ if (memhp_default_online_type != MMOP_OFFLINE) walk_memory_blocks(start, size, NULL, online_memory_block); @@ -1115,7 +1122,7 @@ error: } /* requires device_hotplug_lock, see add_memory_resource() */ -int __ref __add_memory(int nid, u64 start, u64 size) +int __ref __add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags) { struct resource *res; int ret; @@ -1124,18 +1131,18 @@ int __ref __add_memory(int nid, u64 start, u64 size) if (IS_ERR(res)) return PTR_ERR(res); - ret = add_memory_resource(nid, res); + ret = add_memory_resource(nid, res, mhp_flags); if (ret < 0) release_memory_resource(res); return ret; } -int add_memory(int nid, u64 start, u64 size) +int add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags) { int rc; lock_device_hotplug(); - rc = __add_memory(nid, start, size); + rc = __add_memory(nid, start, size, mhp_flags); unlock_device_hotplug(); return rc; @@ -1157,14 +1164,14 @@ EXPORT_SYMBOL_GPL(add_memory); * * For this memory, no entries in /sys/firmware/memmap ("raw firmware-provided * memory map") are created. Also, the created memory resource is flagged - * with IORESOURCE_MEM_DRIVER_MANAGED, so in-kernel users can special-case + * with IORESOURCE_SYSRAM_DRIVER_MANAGED, so in-kernel users can special-case * this memory as well (esp., not place kexec images onto it). * * The resource_name (visible via /proc/iomem) has to have the format * "System RAM ($DRIVER)". */ int add_memory_driver_managed(int nid, u64 start, u64 size, - const char *resource_name) + const char *resource_name, mhp_t mhp_flags) { struct resource *res; int rc; @@ -1182,7 +1189,7 @@ int add_memory_driver_managed(int nid, u64 start, u64 size, goto out_unlock; } - rc = add_memory_resource(nid, res); + rc = add_memory_resource(nid, res, mhp_flags); if (rc < 0) release_memory_resource(res); @@ -1379,28 +1386,6 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) return ret; } -/* Mark all sections offline and remove all free pages from the buddy. */ -static int -offline_isolated_pages_cb(unsigned long start, unsigned long nr_pages, - void *data) -{ - unsigned long *offlined_pages = (unsigned long *)data; - - *offlined_pages += __offline_isolated_pages(start, start + nr_pages); - return 0; -} - -/* - * Check all pages in range, recorded as memory resource, are isolated. - */ -static int -check_pages_isolated_cb(unsigned long start_pfn, unsigned long nr_pages, - void *data) -{ - return test_pages_isolated(start_pfn, start_pfn + nr_pages, - MEMORY_OFFLINE); -} - static int __init cmdline_parse_movable_node(char *p) { movable_node_enabled = true; @@ -1484,17 +1469,21 @@ static int count_system_ram_pages_cb(unsigned long start_pfn, return 0; } -static int __ref __offline_pages(unsigned long start_pfn, - unsigned long end_pfn) +int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages) { - unsigned long pfn, nr_pages = 0; - unsigned long offlined_pages = 0; - int ret, node, nr_isolate_pageblock; + const unsigned long end_pfn = start_pfn + nr_pages; + unsigned long pfn, system_ram_pages = 0; unsigned long flags; struct zone *zone; struct memory_notify arg; + int ret, node; char *reason; + /* We can only offline full sections (e.g., SECTION_IS_ONLINE) */ + if (WARN_ON_ONCE(!nr_pages || + !IS_ALIGNED(start_pfn | nr_pages, PAGES_PER_SECTION))) + return -EINVAL; + mem_hotplug_begin(); /* @@ -1505,9 +1494,9 @@ static int __ref __offline_pages(unsigned long start_pfn, * memory holes PG_reserved, don't need pfn_valid() checks, and can * avoid using walk_system_ram_range() later. */ - walk_system_ram_range(start_pfn, end_pfn - start_pfn, &nr_pages, + walk_system_ram_range(start_pfn, nr_pages, &system_ram_pages, count_system_ram_pages_cb); - if (nr_pages != end_pfn - start_pfn) { + if (system_ram_pages != nr_pages) { ret = -EINVAL; reason = "memory holes"; goto failed_removal; @@ -1527,11 +1516,10 @@ static int __ref __offline_pages(unsigned long start_pfn, ret = start_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE, MEMORY_OFFLINE | REPORT_FAILURE); - if (ret < 0) { + if (ret) { reason = "failure to isolate range"; goto failed_removal; } - nr_isolate_pageblock = ret; arg.start_pfn = start_pfn; arg.nr_pages = nr_pages; @@ -1581,9 +1569,7 @@ static int __ref __offline_pages(unsigned long start_pfn, reason = "failure to dissolve huge pages"; goto failed_removal_isolated; } - /* check again */ - ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn, - NULL, check_pages_isolated_cb); + /* * per-cpu pages are drained in start_isolate_page_range, but if * there are still pages that are not free, make sure that we @@ -1596,30 +1582,30 @@ static int __ref __offline_pages(unsigned long start_pfn, * because has_unmovable_pages explicitly checks for * PageBuddy on freed pages on other zones. */ + ret = test_pages_isolated(start_pfn, end_pfn, MEMORY_OFFLINE); if (ret) drain_all_pages(zone); } while (ret); - /* Ok, all of our target is isolated. - We cannot do rollback at this point. */ - walk_system_ram_range(start_pfn, end_pfn - start_pfn, - &offlined_pages, offline_isolated_pages_cb); - pr_info("Offlined Pages %ld\n", offlined_pages); + /* Mark all sections offline and remove free pages from the buddy. */ + __offline_isolated_pages(start_pfn, end_pfn); + pr_info("Offlined Pages %ld\n", nr_pages); + /* - * Onlining will reset pagetype flags and makes migrate type - * MOVABLE, so just need to decrease the number of isolated - * pageblocks zone counter here. + * The memory sections are marked offline, and the pageblock flags + * effectively stale; nobody should be touching them. Fixup the number + * of isolated pageblocks, memory onlining will properly revert this. */ spin_lock_irqsave(&zone->lock, flags); - zone->nr_isolate_pageblock -= nr_isolate_pageblock; + zone->nr_isolate_pageblock -= nr_pages / pageblock_nr_pages; spin_unlock_irqrestore(&zone->lock, flags); /* removal success */ - adjust_managed_page_count(pfn_to_page(start_pfn), -offlined_pages); - zone->present_pages -= offlined_pages; + adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages); + zone->present_pages -= nr_pages; pgdat_resize_lock(zone->zone_pgdat, &flags); - zone->zone_pgdat->node_present_pages -= offlined_pages; + zone->zone_pgdat->node_present_pages -= nr_pages; pgdat_resize_unlock(zone->zone_pgdat, &flags); init_per_zone_wmark_min(); @@ -1656,11 +1642,6 @@ failed_removal: return ret; } -int offline_pages(unsigned long start_pfn, unsigned long nr_pages) -{ - return __offline_pages(start_pfn, start_pfn + nr_pages); -} - static int check_memblock_offlined_cb(struct memory_block *mem, void *arg) { int ret = !is_memblock_offlined(mem); @@ -1749,26 +1730,6 @@ void try_offline_node(int nid) } EXPORT_SYMBOL(try_offline_node); -static void __release_memory_resource(resource_size_t start, - resource_size_t size) -{ - int ret; - - /* - * When removing memory in the same granularity as it was added, - * this function never fails. It might only fail if resources - * have to be adjusted or split. We'll ignore the error, as - * removing of memory cannot fail. - */ - ret = release_mem_region_adjustable(&iomem_resource, start, size); - if (ret) { - resource_size_t endres = start + size - 1; - - pr_warn("Unable to release resource <%pa-%pa> (%d)\n", - &start, &endres, ret); - } -} - static int __ref try_remove_memory(int nid, u64 start, u64 size) { int rc = 0; @@ -1802,7 +1763,7 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size) memblock_remove(start, size); } - __release_memory_resource(start, size); + release_mem_region_adjustable(start, size); try_offline_node(nid); |