From 859a85ddf90e714092dea71a0e54c7b9896621be Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 7 Sep 2021 19:54:52 -0700 Subject: mm: remove pfn_valid_within() and CONFIG_HOLES_IN_ZONE Patch series "mm: remove pfn_valid_within() and CONFIG_HOLES_IN_ZONE". After recent updates to freeing unused parts of the memory map, no architecture can have holes in the memory map within a pageblock. This makes pfn_valid_within() check and CONFIG_HOLES_IN_ZONE configuration option redundant. The first patch removes them both in a mechanical way and the second patch simplifies memory_hotplug::test_pages_in_a_zone() that had pfn_valid_within() surrounded by more logic than simple if. This patch (of 2): After recent changes in freeing of the unused parts of the memory map and rework of pfn_valid() in arm and arm64 there are no architectures that can have holes in the memory map within a pageblock and so nothing can enable CONFIG_HOLES_IN_ZONE which guards non trivial implementation of pfn_valid_within(). With that, pfn_valid_within() is always hardwired to 1 and can be completely removed. Remove calls to pfn_valid_within() and CONFIG_HOLES_IN_ZONE. Link: https://lkml.kernel.org/r/20210713080035.7464-1-rppt@kernel.org Link: https://lkml.kernel.org/r/20210713080035.7464-2-rppt@kernel.org Signed-off-by: Mike Rapoport Acked-by: David Hildenbrand Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers') diff --git a/drivers/base/node.c b/drivers/base/node.c index 4a4ae868ad9f..8ec6b7dfbb0f 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -768,8 +768,6 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE static int __ref get_nid_for_pfn(unsigned long pfn) { - if (!pfn_valid_within(pfn)) - return -1; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT if (system_state < SYSTEM_RUNNING) return early_pfn_to_nid(pfn); -- cgit v1.2.3 From e1c158e4956612e7bada4c03dfb99210af4d6cde Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 7 Sep 2021 19:55:09 -0700 Subject: mm/memory_hotplug: remove nid parameter from remove_memory() and friends There is only a single user remaining. We can simply lookup the nid only used for node offlining purposes when walking our memory blocks. We don't expect to remove multi-nid ranges; and if we'd ever do, we most probably don't care about removing multi-nid ranges that actually result in empty nodes. If ever required, we can detect the "multi-nid" scenario and simply try offlining all online nodes. Link: https://lkml.kernel.org/r/20210712124052.26491-4-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Michael Ellerman (powerpc) Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "Rafael J. Wysocki" Cc: Len Brown Cc: Dan Williams Cc: Vishal Verma Cc: Dave Jiang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Nathan Lynch Cc: Laurent Dufour Cc: "Aneesh Kumar K.V" Cc: Scott Cheloha Cc: Anton Blanchard Cc: Andy Lutomirski Cc: Anshuman Khandual Cc: Ard Biesheuvel Cc: Baoquan He Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Dave Hansen Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jia He Cc: Joe Perches Cc: Kefeng Wang Cc: Michal Hocko Cc: Michel Lespinasse Cc: Mike Rapoport Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Pankaj Gupta Cc: Pankaj Gupta Cc: Pavel Tatashin Cc: Peter Zijlstra Cc: Pierre Morel Cc: "Rafael J. Wysocki" Cc: Rich Felker Cc: Sergei Trofimovich Cc: Thiago Jung Bauermann Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vitaly Kuznetsov Cc: Vlastimil Babka Cc: Wei Yang Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/platforms/pseries/hotplug-memory.c | 9 ++++---- drivers/acpi/acpi_memhotplug.c | 7 +------ drivers/dax/kmem.c | 3 +-- drivers/virtio/virtio_mem.c | 4 ++-- include/linux/memory_hotplug.h | 10 ++++----- mm/memory_hotplug.c | 28 +++++++++++++++---------- 6 files changed, 30 insertions(+), 31 deletions(-) (limited to 'drivers') diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 377d852f5a9a..ef5c24b42cf1 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -286,7 +286,7 @@ static int pseries_remove_memblock(unsigned long base, unsigned long memblock_si { unsigned long block_sz, start_pfn; int sections_per_block; - int i, nid; + int i; start_pfn = base >> PAGE_SHIFT; @@ -297,10 +297,9 @@ static int pseries_remove_memblock(unsigned long base, unsigned long memblock_si block_sz = pseries_memory_block_size(); sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; - nid = memory_add_physaddr_to_nid(base); for (i = 0; i < sections_per_block; i++) { - __remove_memory(nid, base, MIN_MEMORY_BLOCK_SIZE); + __remove_memory(base, MIN_MEMORY_BLOCK_SIZE); base += MIN_MEMORY_BLOCK_SIZE; } @@ -387,7 +386,7 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb) block_sz = pseries_memory_block_size(); - __remove_memory(mem_block->nid, lmb->base_addr, block_sz); + __remove_memory(lmb->base_addr, block_sz); put_device(&mem_block->dev); /* Update memory regions for memory remove */ @@ -660,7 +659,7 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb) rc = dlpar_online_lmb(lmb); if (rc) { - __remove_memory(nid, lmb->base_addr, block_sz); + __remove_memory(lmb->base_addr, block_sz); invalidate_lmb_associativity_index(lmb); } else { lmb->flags |= DRCONF_MEM_ASSIGNED; diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index 8cc195c4c861..1d01d9414c40 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -239,19 +239,14 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) static void acpi_memory_remove_memory(struct acpi_memory_device *mem_device) { - acpi_handle handle = mem_device->device->handle; struct acpi_memory_info *info, *n; - int nid = acpi_get_node(handle); list_for_each_entry_safe(info, n, &mem_device->res_list, list) { if (!info->enabled) continue; - if (nid == NUMA_NO_NODE) - nid = memory_add_physaddr_to_nid(info->start_addr); - acpi_unbind_memory_blocks(info); - __remove_memory(nid, info->start_addr, info->length); + __remove_memory(info->start_addr, info->length); list_del(&info->list); kfree(info); } diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c index ac231cc36359..99e0f60c4c26 100644 --- a/drivers/dax/kmem.c +++ b/drivers/dax/kmem.c @@ -156,8 +156,7 @@ static void dev_dax_kmem_remove(struct dev_dax *dev_dax) if (rc) continue; - rc = remove_memory(dev_dax->target_node, range.start, - range_len(&range)); + rc = remove_memory(range.start, range_len(&range)); if (rc == 0) { release_resource(data->res[i]); kfree(data->res[i]); diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index b91bc810a87e..7e83ed373e00 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -677,7 +677,7 @@ static int virtio_mem_remove_memory(struct virtio_mem *vm, uint64_t addr, dev_dbg(&vm->vdev->dev, "removing memory: 0x%llx - 0x%llx\n", addr, addr + size - 1); - rc = remove_memory(vm->nid, addr, size); + rc = remove_memory(addr, size); if (!rc) { atomic64_sub(size, &vm->offline_size); /* @@ -720,7 +720,7 @@ static int virtio_mem_offline_and_remove_memory(struct virtio_mem *vm, "offlining and removing memory: 0x%llx - 0x%llx\n", addr, addr + size - 1); - rc = offline_and_remove_memory(vm->nid, addr, size); + rc = offline_and_remove_memory(addr, size); if (!rc) { atomic64_sub(size, &vm->offline_size); /* diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 010a192298b5..068e3dcf4690 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -292,9 +292,9 @@ static inline void pgdat_resize_init(struct pglist_data *pgdat) {} extern void try_offline_node(int nid); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); -extern int remove_memory(int nid, u64 start, u64 size); -extern void __remove_memory(int nid, u64 start, u64 size); -extern int offline_and_remove_memory(int nid, u64 start, u64 size); +extern int remove_memory(u64 start, u64 size); +extern void __remove_memory(u64 start, u64 size); +extern int offline_and_remove_memory(u64 start, u64 size); #else static inline void try_offline_node(int nid) {} @@ -304,12 +304,12 @@ static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages) return -EINVAL; } -static inline int remove_memory(int nid, u64 start, u64 size) +static inline int remove_memory(u64 start, u64 size) { return -EBUSY; } -static inline void __remove_memory(int nid, u64 start, u64 size) {} +static inline void __remove_memory(u64 start, u64 size) {} #endif /* CONFIG_MEMORY_HOTREMOVE */ extern void set_zone_contiguous(struct zone *zone); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 14c4f6051c13..6ea62efe2a8f 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1739,7 +1739,9 @@ failed_removal: static int check_memblock_offlined_cb(struct memory_block *mem, void *arg) { int ret = !is_memblock_offlined(mem); + int *nid = arg; + *nid = mem->nid; if (unlikely(ret)) { phys_addr_t beginpa, endpa; @@ -1832,12 +1834,12 @@ void try_offline_node(int nid) } EXPORT_SYMBOL(try_offline_node); -static int __ref try_remove_memory(int nid, u64 start, u64 size) +static int __ref try_remove_memory(u64 start, u64 size) { - int rc = 0; struct vmem_altmap mhp_altmap = {}; struct vmem_altmap *altmap = NULL; unsigned long nr_vmemmap_pages; + int rc = 0, nid = NUMA_NO_NODE; BUG_ON(check_hotplug_memory_range(start, size)); @@ -1845,8 +1847,12 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size) * All memory blocks must be offlined before removing memory. Check * whether all memory blocks in question are offline and return error * if this is not the case. + * + * While at it, determine the nid. Note that if we'd have mixed nodes, + * we'd only try to offline the last determined one -- which is good + * enough for the cases we care about. */ - rc = walk_memory_blocks(start, size, NULL, check_memblock_offlined_cb); + rc = walk_memory_blocks(start, size, &nid, check_memblock_offlined_cb); if (rc) return rc; @@ -1895,7 +1901,8 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size) release_mem_region_adjustable(start, size); - try_offline_node(nid); + if (nid != NUMA_NO_NODE) + try_offline_node(nid); mem_hotplug_done(); return 0; @@ -1903,7 +1910,6 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size) /** * __remove_memory - Remove memory if every memory block is offline - * @nid: the node ID * @start: physical address of the region to remove * @size: size of the region to remove * @@ -1911,14 +1917,14 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size) * and online/offline operations before this call, as required by * try_offline_node(). */ -void __remove_memory(int nid, u64 start, u64 size) +void __remove_memory(u64 start, u64 size) { /* * trigger BUG() if some memory is not offlined prior to calling this * function */ - if (try_remove_memory(nid, start, size)) + if (try_remove_memory(start, size)) BUG(); } @@ -1926,12 +1932,12 @@ void __remove_memory(int nid, u64 start, u64 size) * Remove memory if every memory block is offline, otherwise return -EBUSY is * some memory is not offline */ -int remove_memory(int nid, u64 start, u64 size) +int remove_memory(u64 start, u64 size) { int rc; lock_device_hotplug(); - rc = try_remove_memory(nid, start, size); + rc = try_remove_memory(start, size); unlock_device_hotplug(); return rc; @@ -1991,7 +1997,7 @@ static int try_reonline_memory_block(struct memory_block *mem, void *arg) * unplugged all memory (so it's no longer in use) and want to offline + remove * that memory. */ -int offline_and_remove_memory(int nid, u64 start, u64 size) +int offline_and_remove_memory(u64 start, u64 size) { const unsigned long mb_count = size / memory_block_size_bytes(); uint8_t *online_types, *tmp; @@ -2027,7 +2033,7 @@ int offline_and_remove_memory(int nid, u64 start, u64 size) * This cannot fail as it cannot get onlined in the meantime. */ if (!rc) { - rc = try_remove_memory(nid, start, size); + rc = try_remove_memory(start, size); if (rc) pr_err("%s: Failed to remove memory: %d", __func__, rc); } -- cgit v1.2.3 From 35ba0cd5290b2f1f92e1f0eb6de7ce7979f79728 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 7 Sep 2021 19:55:14 -0700 Subject: ACPI: memhotplug: memory resources cannot be enabled yet We allocate + initialize everything from scratch. In case enabling the device fails, we free all memory resourcs. Link: https://lkml.kernel.org/r/20210712124052.26491-5-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Rafael J. Wysocki Reviewed-by: Oscar Salvador Reviewed-by: Pankaj Gupta Cc: Andy Lutomirski Cc: "Aneesh Kumar K.V" Cc: Anshuman Khandual Cc: Anton Blanchard Cc: Ard Biesheuvel Cc: Baoquan He Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Dan Williams Cc: Dave Hansen Cc: Dave Jiang Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Wang Cc: Jia He Cc: Joe Perches Cc: Kefeng Wang Cc: Laurent Dufour Cc: Len Brown Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Michel Lespinasse Cc: Mike Rapoport Cc: Nathan Lynch Cc: Nicholas Piggin Cc: Pankaj Gupta Cc: Paul Mackerras Cc: Pavel Tatashin Cc: Peter Zijlstra Cc: Pierre Morel Cc: "Rafael J. Wysocki" Cc: Rich Felker Cc: Scott Cheloha Cc: Sergei Trofimovich Cc: Thiago Jung Bauermann Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vishal Verma Cc: Vitaly Kuznetsov Cc: Vlastimil Babka Cc: Wei Yang Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/acpi/acpi_memhotplug.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers') diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index 1d01d9414c40..eb4faf7c5cad 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -182,10 +182,6 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) * (i.e. memory-hot-remove function) */ list_for_each_entry(info, &mem_device->res_list, list) { - if (info->enabled) { /* just sanity check...*/ - num_enabled++; - continue; - } /* * If the memory block size is zero, please ignore it. * Don't try to do the following memory hotplug flowchart. -- cgit v1.2.3 From 4b0970024408afb17886e0c76e9761c4264db2a8 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 7 Sep 2021 19:55:19 -0700 Subject: mm: track present early pages per zone Patch series "mm/memory_hotplug: "auto-movable" online policy and memory groups", v3. I. Goal The goal of this series is improving in-kernel auto-online support. It tackles the fundamental problems that: 1) We can create zone imbalances when onlining all memory blindly to ZONE_MOVABLE, in the worst case crashing the system. We have to know upfront how much memory we are going to hotplug such that we can safely enable auto-onlining of all hotplugged memory to ZONE_MOVABLE via "online_movable". This is far from practical and only applicable in limited setups -- like inside VMs under the RHV/oVirt hypervisor which will never hotplug more than 3 times the boot memory (and the limitation is only in place due to the Linux limitation). 2) We see more setups that implement dynamic VM resizing, hot(un)plugging memory to resize VM memory. In these setups, we might hotplug a lot of memory, but it might happen in various small steps in both directions (e.g., 2 GiB -> 8 GiB -> 4 GiB -> 16 GiB ...). virtio-mem is the primary driver of this upstream right now, performing such dynamic resizing NUMA-aware via multiple virtio-mem devices. Onlining all hotplugged memory to ZONE_NORMAL means we basically have no hotunplug guarantees. Onlining all to ZONE_MOVABLE means we can easily run into zone imbalances when growing a VM. We want a mixture, and we want as much memory as reasonable/configured in ZONE_MOVABLE. Details regarding zone imbalances can be found at [1]. 3) Memory devices consist of 1..X memory block devices, however, the kernel doesn't really track the relationship. Consequently, also user space has no idea. We want to make per-device decisions. As one example, for memory hotunplug it doesn't make sense to use a mixture of zones within a single DIMM: we want all MOVABLE if possible, otherwise all !MOVABLE, because any !MOVABLE part will easily block the whole DIMM from getting hotunplugged. As another example, virtio-mem operates on individual units that span 1..X memory blocks. Similar to a DIMM, we want a unit to either be all MOVABLE or !MOVABLE. A "unit" can be thought of like a DIMM, however, all units of a virtio-mem device logically belong together and are managed (added/removed) by a single driver. We want as much memory of a virtio-mem device to be MOVABLE as possible. 4) We want memory onlining to be done right from the kernel while adding memory, not triggered by user space via udev rules; for example, this is reqired for fast memory hotplug for drivers that add individual memory blocks, like virito-mem. We want a way to configure a policy in the kernel and avoid implementing advanced policies in user space. The auto-onlining support we have in the kernel is not sufficient. All we have is a) online everything MOVABLE (online_movable) b) online everything !MOVABLE (online_kernel) c) keep zones contiguous (online). This series allows configuring c) to mean instead "online movable if possible according to the coniguration, driven by a maximum MOVABLE:KERNEL ratio" -- a new onlining policy. II. Approach This series does 3 things: 1) Introduces the "auto-movable" online policy that initially operates on individual memory blocks only. It uses a maximum MOVABLE:KERNEL ratio to make a decision whether a memory block will be onlined to ZONE_MOVABLE or not. However, in the basic form, hotplugged KERNEL memory does not allow for more MOVABLE memory (details in the patches). CMA memory is treated like MOVABLE memory. 2) Introduces static (e.g., DIMM) and dynamic (e.g., virtio-mem) memory groups and uses group information to make decisions in the "auto-movable" online policy across memory blocks of a single memory device (modeled as memory group). More details can be found in patch #3 or in the DIMM example below. 3) Maximizes ZONE_MOVABLE memory within dynamic memory groups, by allowing ZONE_NORMAL memory within a dynamic memory group to allow for more ZONE_MOVABLE memory within the same memory group. The target use case is dynamic VM resizing using virtio-mem. See the virtio-mem example below. I remember that the basic idea of using a ratio to implement a policy in the kernel was once mentioned by Vitaly Kuznetsov, but I might be wrong (I lost the pointer to that discussion). For me, the main use case is using it along with virtio-mem (and DIMMs / ppc64 dlpar where necessary) for dynamic resizing of VMs, increasing the amount of memory we can hotunplug reliably again if we might eventually hotplug a lot of memory to a VM. III. Target Usage The target usage will be: 1) Linux boots with "mhp_default_online_type=offline" 2) User space (e.g., systemd unit) configures memory onlining (according to a config file and system properties), for example: * Setting memory_hotplug.online_policy=auto-movable * Setting memory_hotplug.auto_movable_ratio=301 * Setting memory_hotplug.auto_movable_numa_aware=true 3) User space enabled auto onlining via "echo online > /sys/devices/system/memory/auto_online_blocks" 4) User space triggers manual onlining of all already-offline memory blocks (go over offline memory blocks and set them to "online") IV. Example For DIMMs, hotplugging 4 GiB DIMMs to a 4 GiB VM with a configured ratio of 301% results in the following layout: Memory block 0-15: DMA32 (early) Memory block 32-47: Normal (early) Memory block 48-79: Movable (DIMM 0) Memory block 80-111: Movable (DIMM 1) Memory block 112-143: Movable (DIMM 2) Memory block 144-275: Normal (DIMM 3) Memory block 176-207: Normal (DIMM 4) ... all Normal (-> hotplugged Normal memory does not allow for more Movable memory) For virtio-mem, using a simple, single virtio-mem device with a 4 GiB VM will result in the following layout: Memory block 0-15: DMA32 (early) Memory block 32-47: Normal (early) Memory block 48-143: Movable (virtio-mem, first 12 GiB) Memory block 144: Normal (virtio-mem, next 128 MiB) Memory block 145-147: Movable (virtio-mem, next 384 MiB) Memory block 148: Normal (virtio-mem, next 128 MiB) Memory block 149-151: Movable (virtio-mem, next 384 MiB) ... Normal/Movable mixture as above (-> hotplugged Normal memory allows for more Movable memory within the same device) Which gives us maximum flexibility when dynamically growing/shrinking a VM in smaller steps. V. Doc Update I'll update the memory-hotplug.rst documentation, once the overhaul [1] is usptream. Until then, details can be found in patch #2. VI. Future Work 1) Use memory groups for ppc64 dlpar 2) Being able to specify a portion of (early) kernel memory that will be excluded from the ratio. Like "128 MiB globally/per node" are excluded. This might be helpful when starting VMs with extremely small memory footprint (e.g., 128 MiB) and hotplugging memory later -- not wanting the first hotplugged units getting onlined to ZONE_MOVABLE. One alternative would be a trigger to not consider ZONE_DMA memory in the ratio. We'll have to see if this is really rrequired. 3) Indicate to user space that MOVABLE might be a bad idea -- especially relevant when memory ballooning without support for balloon compaction is active. This patch (of 9): For implementing a new memory onlining policy, which determines when to online memory blocks to ZONE_MOVABLE semi-automatically, we need the number of present early (boot) pages -- present pages excluding hotplugged pages. Let's track these pages per zone. Pass a page instead of the zone to adjust_present_page_count(), similar as adjust_managed_page_count() and derive the zone from the page. It's worth noting that a memory block to be offlined/onlined is either completely "early" or "not early". add_memory() and friends can only add complete memory blocks and we only online/offline complete (individual) memory blocks. Link: https://lkml.kernel.org/r/20210806124715.17090-1-david@redhat.com Link: https://lkml.kernel.org/r/20210806124715.17090-2-david@redhat.com Signed-off-by: David Hildenbrand Cc: Vitaly Kuznetsov Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Marek Kedzierski Cc: Hui Zhu Cc: Pankaj Gupta Cc: Wei Yang Cc: Oscar Salvador Cc: Michal Hocko Cc: Dan Williams Cc: Anshuman Khandual Cc: Dave Hansen Cc: Vlastimil Babka Cc: Mike Rapoport Cc: "Rafael J. Wysocki" Cc: Len Brown Cc: Pavel Tatashin Cc: Greg Kroah-Hartman Cc: Rafael J. Wysocki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 14 +++++++------- include/linux/memory_hotplug.h | 2 +- include/linux/mmzone.h | 7 +++++++ mm/memory_hotplug.c | 14 +++++++++++--- mm/page_alloc.c | 3 +++ 5 files changed, 29 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index aa31a21f33d7..86ec2dc82fc2 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -205,7 +205,8 @@ static int memory_block_online(struct memory_block *mem) * now already properly populated. */ if (nr_vmemmap_pages) - adjust_present_page_count(zone, nr_vmemmap_pages); + adjust_present_page_count(pfn_to_page(start_pfn), + nr_vmemmap_pages); return ret; } @@ -215,24 +216,23 @@ static int memory_block_offline(struct memory_block *mem) unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages; - struct zone *zone; int ret; /* * Unaccount before offlining, such that unpopulated zone and kthreads * can properly be torn down in offline_pages(). */ - if (nr_vmemmap_pages) { - zone = page_zone(pfn_to_page(start_pfn)); - adjust_present_page_count(zone, -nr_vmemmap_pages); - } + if (nr_vmemmap_pages) + adjust_present_page_count(pfn_to_page(start_pfn), + -nr_vmemmap_pages); ret = offline_pages(start_pfn + nr_vmemmap_pages, nr_pages - nr_vmemmap_pages); if (ret) { /* offline_pages() failed. Account back. */ if (nr_vmemmap_pages) - adjust_present_page_count(zone, nr_vmemmap_pages); + adjust_present_page_count(pfn_to_page(start_pfn), + nr_vmemmap_pages); return ret; } diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 068e3dcf4690..39b04e99a30e 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -95,7 +95,7 @@ static inline void zone_seqlock_init(struct zone *zone) extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages); extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages); extern int add_one_highpage(struct page *page, int pfn, int bad_ppro); -extern void adjust_present_page_count(struct zone *zone, long nr_pages); +extern void adjust_present_page_count(struct page *page, long nr_pages); /* VM interface that may be used by firmware interface */ extern int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages, struct zone *zone); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index ee3a86830519..1c0e3bf42521 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -540,6 +540,10 @@ struct zone { * is calculated as: * present_pages = spanned_pages - absent_pages(pages in holes); * + * present_early_pages is present pages existing within the zone + * located on memory available since early boot, excluding hotplugged + * memory. + * * managed_pages is present pages managed by the buddy system, which * is calculated as (reserved_pages includes pages allocated by the * bootmem allocator): @@ -572,6 +576,9 @@ struct zone { atomic_long_t managed_pages; unsigned long spanned_pages; unsigned long present_pages; +#if defined(CONFIG_MEMORY_HOTPLUG) + unsigned long present_early_pages; +#endif #ifdef CONFIG_CMA unsigned long cma_pages; #endif diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 6ea62efe2a8f..8a99fa6d096c 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -724,8 +724,16 @@ struct zone *zone_for_pfn_range(int online_type, int nid, * This function should only be called by memory_block_{online,offline}, * and {online,offline}_pages. */ -void adjust_present_page_count(struct zone *zone, long nr_pages) +void adjust_present_page_count(struct page *page, long nr_pages) { + struct zone *zone = page_zone(page); + + /* + * We only support onlining/offlining/adding/removing of complete + * memory blocks; therefore, either all is either early or hotplugged. + */ + if (early_section(__pfn_to_section(page_to_pfn(page)))) + zone->present_early_pages += nr_pages; zone->present_pages += nr_pages; zone->zone_pgdat->node_present_pages += nr_pages; } @@ -826,7 +834,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *z } online_pages_range(pfn, nr_pages); - adjust_present_page_count(zone, nr_pages); + adjust_present_page_count(pfn_to_page(pfn), nr_pages); node_states_set_node(nid, &arg); if (need_zonelists_rebuild) @@ -1697,7 +1705,7 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages) /* removal success */ adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages); - adjust_present_page_count(zone, -nr_pages); + adjust_present_page_count(pfn_to_page(start_pfn), -nr_pages); /* reinitialise watermarks and update pcp limits */ init_per_zone_wmark_min(); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 79a2fc5b6c6f..9353418892a7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7240,6 +7240,9 @@ static void __init calculate_node_totalpages(struct pglist_data *pgdat, zone->zone_start_pfn = 0; zone->spanned_pages = size; zone->present_pages = real_size; +#if defined(CONFIG_MEMORY_HOTPLUG) + zone->present_early_pages = real_size; +#endif totalpages += size; realtotalpages += real_size; -- cgit v1.2.3 From 028fc57a1c361116e3bcebfeba4ca87878baaf4f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 7 Sep 2021 19:55:26 -0700 Subject: drivers/base/memory: introduce "memory groups" to logically group memory blocks In our "auto-movable" memory onlining policy, we want to make decisions across memory blocks of a single memory device. Examples of memory devices include ACPI memory devices (in the simplest case a single DIMM) and virtio-mem. For now, we don't have a connection between a single memory block device and the real memory device. Each memory device consists of 1..X memory block devices. Let's logically group memory blocks belonging to the same memory device in "memory groups". Memory groups can span multiple physical ranges and a memory group itself does not contain any information regarding physical ranges, only properties (e.g., "max_pages") necessary for improved memory onlining. Introduce two memory group types: 1) Static memory group: E.g., a single ACPI memory device, consisting of 1..X memory resources. A memory group consists of 1..Y memory blocks. The whole group is added/removed in one go. If any part cannot get offlined, the whole group cannot be removed. 2) Dynamic memory group: E.g., a single virtio-mem device. Memory is dynamically added/removed in a fixed granularity, called a "unit", consisting of 1..X memory blocks. A unit is added/removed in one go. If any part of a unit cannot get offlined, the whole unit cannot be removed. In case of 1) we usually want either all memory managed by ZONE_MOVABLE or none. In case of 2) we usually want to have as many units as possible managed by ZONE_MOVABLE. We want a single unit to be of the same type. For now, memory groups are an internal concept that is not exposed to user space; we might want to change that in the future, though. add_memory() users can specify a mgid instead of a nid when passing the MHP_NID_IS_MGID flag. Link: https://lkml.kernel.org/r/20210806124715.17090-4-david@redhat.com Signed-off-by: David Hildenbrand Cc: Anshuman Khandual Cc: Dan Williams Cc: Dave Hansen Cc: Greg Kroah-Hartman Cc: Hui Zhu Cc: Jason Wang Cc: Len Brown Cc: Marek Kedzierski Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Oscar Salvador Cc: Pankaj Gupta Cc: Pavel Tatashin Cc: Rafael J. Wysocki Cc: "Rafael J. Wysocki" Cc: Vitaly Kuznetsov Cc: Vlastimil Babka Cc: Wei Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 159 +++++++++++++++++++++++++++++++++++++++-- include/linux/memory.h | 46 +++++++++++- include/linux/memory_hotplug.h | 5 ++ mm/memory_hotplug.c | 11 ++- 4 files changed, 215 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 86ec2dc82fc2..16f5a3610229 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -82,6 +82,11 @@ static struct bus_type memory_subsys = { */ static DEFINE_XARRAY(memory_blocks); +/* + * Memory groups, indexed by memory group id (mgid). + */ +static DEFINE_XARRAY_FLAGS(memory_groups, XA_FLAGS_ALLOC); + static BLOCKING_NOTIFIER_HEAD(memory_chain); int register_memory_notifier(struct notifier_block *nb) @@ -634,7 +639,8 @@ int register_memory(struct memory_block *memory) } static int init_memory_block(unsigned long block_id, unsigned long state, - unsigned long nr_vmemmap_pages) + unsigned long nr_vmemmap_pages, + struct memory_group *group) { struct memory_block *mem; int ret = 0; @@ -652,6 +658,12 @@ static int init_memory_block(unsigned long block_id, unsigned long state, mem->state = state; mem->nid = NUMA_NO_NODE; mem->nr_vmemmap_pages = nr_vmemmap_pages; + INIT_LIST_HEAD(&mem->group_next); + + if (group) { + mem->group = group; + list_add(&mem->group_next, &group->memory_blocks); + } ret = register_memory(mem); @@ -671,7 +683,7 @@ static int add_memory_block(unsigned long base_section_nr) if (section_count == 0) return 0; return init_memory_block(memory_block_id(base_section_nr), - MEM_ONLINE, 0); + MEM_ONLINE, 0, NULL); } static void unregister_memory(struct memory_block *memory) @@ -681,6 +693,11 @@ static void unregister_memory(struct memory_block *memory) WARN_ON(xa_erase(&memory_blocks, memory->dev.id) == NULL); + if (memory->group) { + list_del(&memory->group_next); + memory->group = NULL; + } + /* drop the ref. we got via find_memory_block() */ put_device(&memory->dev); device_unregister(&memory->dev); @@ -694,7 +711,8 @@ static void unregister_memory(struct memory_block *memory) * Called under device_hotplug_lock. */ int create_memory_block_devices(unsigned long start, unsigned long size, - unsigned long vmemmap_pages) + unsigned long vmemmap_pages, + struct memory_group *group) { const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start)); unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); @@ -707,7 +725,8 @@ int create_memory_block_devices(unsigned long start, unsigned long size, return -EINVAL; for (block_id = start_block_id; block_id != end_block_id; block_id++) { - ret = init_memory_block(block_id, MEM_OFFLINE, vmemmap_pages); + ret = init_memory_block(block_id, MEM_OFFLINE, vmemmap_pages, + group); if (ret) break; } @@ -891,3 +910,135 @@ int for_each_memory_block(void *arg, walk_memory_blocks_func_t func) return bus_for_each_dev(&memory_subsys, NULL, &cb_data, for_each_memory_block_cb); } + +/* + * This is an internal helper to unify allocation and initialization of + * memory groups. Note that the passed memory group will be copied to a + * dynamically allocated memory group. After this call, the passed + * memory group should no longer be used. + */ +static int memory_group_register(struct memory_group group) +{ + struct memory_group *new_group; + uint32_t mgid; + int ret; + + if (!node_possible(group.nid)) + return -EINVAL; + + new_group = kzalloc(sizeof(group), GFP_KERNEL); + if (!new_group) + return -ENOMEM; + *new_group = group; + INIT_LIST_HEAD(&new_group->memory_blocks); + + ret = xa_alloc(&memory_groups, &mgid, new_group, xa_limit_31b, + GFP_KERNEL); + if (ret) { + kfree(new_group); + return ret; + } + return mgid; +} + +/** + * memory_group_register_static() - Register a static memory group. + * @nid: The node id. + * @max_pages: The maximum number of pages we'll have in this static memory + * group. + * + * Register a new static memory group and return the memory group id. + * All memory in the group belongs to a single unit, such as a DIMM. All + * memory belonging to a static memory group is added in one go to be removed + * in one go -- it's static. + * + * Returns an error if out of memory, if the node id is invalid, if no new + * memory groups can be registered, or if max_pages is invalid (0). Otherwise, + * returns the new memory group id. + */ +int memory_group_register_static(int nid, unsigned long max_pages) +{ + struct memory_group group = { + .nid = nid, + .s = { + .max_pages = max_pages, + }, + }; + + if (!max_pages) + return -EINVAL; + return memory_group_register(group); +} +EXPORT_SYMBOL_GPL(memory_group_register_static); + +/** + * memory_group_register_dynamic() - Register a dynamic memory group. + * @nid: The node id. + * @unit_pages: Unit in pages in which is memory added/removed in this dynamic + * memory group. + * + * Register a new dynamic memory group and return the memory group id. + * Memory within a dynamic memory group is added/removed dynamically + * in unit_pages. + * + * Returns an error if out of memory, if the node id is invalid, if no new + * memory groups can be registered, or if unit_pages is invalid (0, not a + * power of two, smaller than a single memory block). Otherwise, returns the + * new memory group id. + */ +int memory_group_register_dynamic(int nid, unsigned long unit_pages) +{ + struct memory_group group = { + .nid = nid, + .is_dynamic = true, + .d = { + .unit_pages = unit_pages, + }, + }; + + if (!unit_pages || !is_power_of_2(unit_pages) || + unit_pages < PHYS_PFN(memory_block_size_bytes())) + return -EINVAL; + return memory_group_register(group); +} +EXPORT_SYMBOL_GPL(memory_group_register_dynamic); + +/** + * memory_group_unregister() - Unregister a memory group. + * @mgid: the memory group id + * + * Unregister a memory group. If any memory block still belongs to this + * memory group, unregistering will fail. + * + * Returns -EINVAL if the memory group id is invalid, returns -EBUSY if some + * memory blocks still belong to this memory group and returns 0 if + * unregistering succeeded. + */ +int memory_group_unregister(int mgid) +{ + struct memory_group *group; + + if (mgid < 0) + return -EINVAL; + + group = xa_load(&memory_groups, mgid); + if (!group) + return -EINVAL; + if (!list_empty(&group->memory_blocks)) + return -EBUSY; + xa_erase(&memory_groups, mgid); + kfree(group); + return 0; +} +EXPORT_SYMBOL_GPL(memory_group_unregister); + +/* + * This is an internal helper only to be used in core memory hotplug code to + * lookup a memory group. We don't care about locking, as we don't expect a + * memory group to get unregistered while adding memory to it -- because + * the group and the memory is managed by the same driver. + */ +struct memory_group *memory_group_find_by_id(int mgid) +{ + return xa_load(&memory_groups, mgid); +} diff --git a/include/linux/memory.h b/include/linux/memory.h index 97e92e8b556a..d505c12c5c77 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -23,6 +23,42 @@ #define MIN_MEMORY_BLOCK_SIZE (1UL << SECTION_SIZE_BITS) +/** + * struct memory_group - a logical group of memory blocks + * @nid: The node id for all memory blocks inside the memory group. + * @blocks: List of all memory blocks belonging to this memory group. + * @is_dynamic: The memory group type: static vs. dynamic + * @s.max_pages: Valid with &memory_group.is_dynamic == false. The maximum + * number of pages we'll have in this static memory group. + * @d.unit_pages: Valid with &memory_group.is_dynamic == true. Unit in pages + * in which memory is added/removed in this dynamic memory group. + * This granularity defines the alignment of a unit in physical + * address space; it has to be at least as big as a single + * memory block. + * + * A memory group logically groups memory blocks; each memory block + * belongs to at most one memory group. A memory group corresponds to + * a memory device, such as a DIMM or a NUMA node, which spans multiple + * memory blocks and might even span multiple non-contiguous physical memory + * ranges. + * + * Modification of members after registration is serialized by memory + * hot(un)plug code. + */ +struct memory_group { + int nid; + struct list_head memory_blocks; + bool is_dynamic; + union { + struct { + unsigned long max_pages; + } s; + struct { + unsigned long unit_pages; + } d; + }; +}; + struct memory_block { unsigned long start_section_nr; unsigned long state; /* serialized by the dev->lock */ @@ -34,6 +70,8 @@ struct memory_block { * lay at the beginning of the memory block. */ unsigned long nr_vmemmap_pages; + struct memory_group *group; /* group (if any) for this block */ + struct list_head group_next; /* next block inside memory group */ }; int arch_get_memory_phys_device(unsigned long start_pfn); @@ -86,7 +124,8 @@ static inline int memory_notify(unsigned long val, void *v) extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); int create_memory_block_devices(unsigned long start, unsigned long size, - unsigned long vmemmap_pages); + unsigned long vmemmap_pages, + struct memory_group *group); void remove_memory_block_devices(unsigned long start, unsigned long size); extern void memory_dev_init(void); extern int memory_notify(unsigned long val, void *v); @@ -96,6 +135,11 @@ extern int walk_memory_blocks(unsigned long start, unsigned long size, void *arg, walk_memory_blocks_func_t func); extern int for_each_memory_block(void *arg, walk_memory_blocks_func_t func); #define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<nid; + } + if (!node_possible(nid)) { WARN(1, "node %d was absent from the node_possible_map\n", nid); return -EINVAL; @@ -1303,7 +1311,8 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) goto error; /* create memory block devices after memory was added */ - ret = create_memory_block_devices(start, size, mhp_altmap.alloc); + ret = create_memory_block_devices(start, size, mhp_altmap.alloc, + group); if (ret) { arch_remove_memory(start, size, NULL); goto error; -- cgit v1.2.3 From 836809ec75cc07c6d07c43036e3844affbe0d46f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 7 Sep 2021 19:55:30 -0700 Subject: mm/memory_hotplug: track present pages in memory groups Let's track all present pages in each memory group. Especially, track memory present in ZONE_MOVABLE and memory present in one of the kernel zones (which really only is ZONE_NORMAL right now as memory groups only apply to hotplugged memory) separately within a memory group, to prepare for making smart auto-online decision for individual memory blocks within a memory group based on group statistics. Link: https://lkml.kernel.org/r/20210806124715.17090-5-david@redhat.com Signed-off-by: David Hildenbrand Cc: Anshuman Khandual Cc: Dan Williams Cc: Dave Hansen Cc: Greg Kroah-Hartman Cc: Hui Zhu Cc: Jason Wang Cc: Len Brown Cc: Marek Kedzierski Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Oscar Salvador Cc: Pankaj Gupta Cc: Pavel Tatashin Cc: Rafael J. Wysocki Cc: "Rafael J. Wysocki" Cc: Vitaly Kuznetsov Cc: Vlastimil Babka Cc: Wei Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 10 +++++----- include/linux/memory.h | 6 ++++++ include/linux/memory_hotplug.h | 13 +++++++++---- mm/memory_hotplug.c | 19 ++++++++++++++----- 4 files changed, 34 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 16f5a3610229..a1082013e10c 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -198,7 +198,7 @@ static int memory_block_online(struct memory_block *mem) } ret = online_pages(start_pfn + nr_vmemmap_pages, - nr_pages - nr_vmemmap_pages, zone); + nr_pages - nr_vmemmap_pages, zone, mem->group); if (ret) { if (nr_vmemmap_pages) mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages); @@ -210,7 +210,7 @@ static int memory_block_online(struct memory_block *mem) * now already properly populated. */ if (nr_vmemmap_pages) - adjust_present_page_count(pfn_to_page(start_pfn), + adjust_present_page_count(pfn_to_page(start_pfn), mem->group, nr_vmemmap_pages); return ret; @@ -228,16 +228,16 @@ static int memory_block_offline(struct memory_block *mem) * can properly be torn down in offline_pages(). */ if (nr_vmemmap_pages) - adjust_present_page_count(pfn_to_page(start_pfn), + adjust_present_page_count(pfn_to_page(start_pfn), mem->group, -nr_vmemmap_pages); ret = offline_pages(start_pfn + nr_vmemmap_pages, - nr_pages - nr_vmemmap_pages); + nr_pages - nr_vmemmap_pages, mem->group); if (ret) { /* offline_pages() failed. Account back. */ if (nr_vmemmap_pages) adjust_present_page_count(pfn_to_page(start_pfn), - nr_vmemmap_pages); + mem->group, nr_vmemmap_pages); return ret; } diff --git a/include/linux/memory.h b/include/linux/memory.h index d505c12c5c77..6ffdc1db385f 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -27,6 +27,10 @@ * struct memory_group - a logical group of memory blocks * @nid: The node id for all memory blocks inside the memory group. * @blocks: List of all memory blocks belonging to this memory group. + * @present_kernel_pages: Present (online) memory outside ZONE_MOVABLE of this + * memory group. + * @present_movable_pages: Present (online) memory in ZONE_MOVABLE of this + * memory group. * @is_dynamic: The memory group type: static vs. dynamic * @s.max_pages: Valid with &memory_group.is_dynamic == false. The maximum * number of pages we'll have in this static memory group. @@ -48,6 +52,8 @@ struct memory_group { int nid; struct list_head memory_blocks; + unsigned long present_kernel_pages; + unsigned long present_movable_pages; bool is_dynamic; union { struct { diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 5d341978b4bc..cf3f423c8a74 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -12,6 +12,7 @@ struct zone; struct pglist_data; struct mem_section; struct memory_block; +struct memory_group; struct resource; struct vmem_altmap; @@ -100,13 +101,15 @@ static inline void zone_seqlock_init(struct zone *zone) extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages); extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages); extern int add_one_highpage(struct page *page, int pfn, int bad_ppro); -extern void adjust_present_page_count(struct page *page, long nr_pages); +extern void adjust_present_page_count(struct page *page, + struct memory_group *group, + long nr_pages); /* VM interface that may be used by firmware interface */ extern int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages, struct zone *zone); extern void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages); extern int online_pages(unsigned long pfn, unsigned long nr_pages, - struct zone *zone); + struct zone *zone, struct memory_group *group); extern struct zone *test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn); extern void __offline_isolated_pages(unsigned long start_pfn, @@ -296,7 +299,8 @@ static inline void pgdat_resize_init(struct pglist_data *pgdat) {} #ifdef CONFIG_MEMORY_HOTREMOVE extern void try_offline_node(int nid); -extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); +extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages, + struct memory_group *group); extern int remove_memory(u64 start, u64 size); extern void __remove_memory(u64 start, u64 size); extern int offline_and_remove_memory(u64 start, u64 size); @@ -304,7 +308,8 @@ extern int offline_and_remove_memory(u64 start, u64 size); #else static inline void try_offline_node(int nid) {} -static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages) +static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages, + struct memory_group *group) { return -EINVAL; } diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index fd57a296dd27..8199a4f98b2b 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -915,9 +915,11 @@ struct zone *zone_for_pfn_range(int online_type, int nid, * This function should only be called by memory_block_{online,offline}, * and {online,offline}_pages. */ -void adjust_present_page_count(struct page *page, long nr_pages) +void adjust_present_page_count(struct page *page, struct memory_group *group, + long nr_pages) { struct zone *zone = page_zone(page); + const bool movable = zone_idx(zone) == ZONE_MOVABLE; /* * We only support onlining/offlining/adding/removing of complete @@ -927,6 +929,11 @@ void adjust_present_page_count(struct page *page, long nr_pages) zone->present_early_pages += nr_pages; zone->present_pages += nr_pages; zone->zone_pgdat->node_present_pages += nr_pages; + + if (group && movable) + group->present_movable_pages += nr_pages; + else if (group && !movable) + group->present_kernel_pages += nr_pages; } int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages, @@ -972,7 +979,8 @@ void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages) kasan_remove_zero_shadow(__va(PFN_PHYS(pfn)), PFN_PHYS(nr_pages)); } -int __ref online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *zone) +int __ref online_pages(unsigned long pfn, unsigned long nr_pages, + struct zone *zone, struct memory_group *group) { unsigned long flags; int need_zonelists_rebuild = 0; @@ -1025,7 +1033,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *z } online_pages_range(pfn, nr_pages); - adjust_present_page_count(pfn_to_page(pfn), nr_pages); + adjust_present_page_count(pfn_to_page(pfn), group, nr_pages); node_states_set_node(nid, &arg); if (need_zonelists_rebuild) @@ -1769,7 +1777,8 @@ static int count_system_ram_pages_cb(unsigned long start_pfn, return 0; } -int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages) +int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages, + struct memory_group *group) { const unsigned long end_pfn = start_pfn + nr_pages; unsigned long pfn, system_ram_pages = 0; @@ -1905,7 +1914,7 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages) /* removal success */ adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages); - adjust_present_page_count(pfn_to_page(start_pfn), -nr_pages); + adjust_present_page_count(pfn_to_page(start_pfn), group, -nr_pages); /* reinitialise watermarks and update pcp limits */ init_per_zone_wmark_min(); -- cgit v1.2.3 From 2a1578397a16fc18677c0c434db792182ba551ed Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 7 Sep 2021 19:55:34 -0700 Subject: ACPI: memhotplug: use a single static memory group for a single memory device Let's group all memory we add for a single memory device - we want a single node for that (which also seems to be the sane thing to do). We won't care for now about memory that was already added to the system (e.g., via e820) -- usually *all* memory of a memory device was already added and we'll fail acpi_memory_enable_device(). Link: https://lkml.kernel.org/r/20210806124715.17090-6-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Rafael J. Wysocki Cc: Anshuman Khandual Cc: Dan Williams Cc: Dave Hansen Cc: Greg Kroah-Hartman Cc: Hui Zhu Cc: Jason Wang Cc: Len Brown Cc: Marek Kedzierski Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Oscar Salvador Cc: Pankaj Gupta Cc: Pavel Tatashin Cc: "Rafael J. Wysocki" Cc: Vitaly Kuznetsov Cc: Vlastimil Babka Cc: Wei Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/acpi/acpi_memhotplug.c | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index eb4faf7c5cad..24f662d8bd39 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -54,6 +54,7 @@ struct acpi_memory_info { struct acpi_memory_device { struct acpi_device *device; struct list_head res_list; + int mgid; }; static acpi_status @@ -169,12 +170,33 @@ static void acpi_unbind_memory_blocks(struct acpi_memory_info *info) static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) { acpi_handle handle = mem_device->device->handle; + mhp_t mhp_flags = MHP_NID_IS_MGID; int result, num_enabled = 0; struct acpi_memory_info *info; - mhp_t mhp_flags = MHP_NONE; - int node; + u64 total_length = 0; + int node, mgid; node = acpi_get_node(handle); + + list_for_each_entry(info, &mem_device->res_list, list) { + if (!info->length) + continue; + /* We want a single node for the whole memory group */ + if (node < 0) + node = memory_add_physaddr_to_nid(info->start_addr); + total_length += info->length; + } + + if (!total_length) { + dev_err(&mem_device->device->dev, "device is empty\n"); + return -EINVAL; + } + + mgid = memory_group_register_static(node, PFN_UP(total_length)); + if (mgid < 0) + return mgid; + mem_device->mgid = mgid; + /* * Tell the VM there is more memory here... * Note: Assume that this function returns zero on success @@ -188,12 +210,10 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) */ if (!info->length) continue; - if (node < 0) - node = memory_add_physaddr_to_nid(info->start_addr); if (mhp_supports_memmap_on_memory(info->length)) mhp_flags |= MHP_MEMMAP_ON_MEMORY; - result = __add_memory(node, info->start_addr, info->length, + result = __add_memory(mgid, info->start_addr, info->length, mhp_flags); /* @@ -253,6 +273,10 @@ static void acpi_memory_device_free(struct acpi_memory_device *mem_device) if (!mem_device) return; + /* In case we succeeded adding *some* memory, unregistering fails. */ + if (mem_device->mgid >= 0) + memory_group_unregister(mem_device->mgid); + acpi_memory_free_device_resources(mem_device); mem_device->device->driver_data = NULL; kfree(mem_device); @@ -273,6 +297,7 @@ static int acpi_memory_device_add(struct acpi_device *device, INIT_LIST_HEAD(&mem_device->res_list); mem_device->device = device; + mem_device->mgid = -1; sprintf(acpi_device_name(device), "%s", ACPI_MEMORY_DEVICE_NAME); sprintf(acpi_device_class(device), "%s", ACPI_MEMORY_DEVICE_CLASS); device->driver_data = mem_device; -- cgit v1.2.3 From eedf634aac3b85b70e7b139c32fc68f565ecf815 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 7 Sep 2021 19:55:37 -0700 Subject: dax/kmem: use a single static memory group for a single probed unit Although dax/kmem users often disable auto-onlining and instead online memory manually (usually to ZONE_MOVABLE), there is still value in having auto-onlining be aware of the relationship of memory blocks. Let's treat one probed unit as a single static memory device, similar to a single ACPI memory device. Link: https://lkml.kernel.org/r/20210806124715.17090-7-david@redhat.com Signed-off-by: David Hildenbrand Cc: Anshuman Khandual Cc: Dan Williams Cc: Dave Hansen Cc: Greg Kroah-Hartman Cc: Hui Zhu Cc: Jason Wang Cc: Len Brown Cc: Marek Kedzierski Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Oscar Salvador Cc: Pankaj Gupta Cc: Pavel Tatashin Cc: Rafael J. Wysocki Cc: "Rafael J. Wysocki" Cc: Vitaly Kuznetsov Cc: Vlastimil Babka Cc: Wei Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/dax/kmem.c | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c index 99e0f60c4c26..a37622060fff 100644 --- a/drivers/dax/kmem.c +++ b/drivers/dax/kmem.c @@ -37,15 +37,16 @@ static int dax_kmem_range(struct dev_dax *dev_dax, int i, struct range *r) struct dax_kmem_data { const char *res_name; + int mgid; struct resource *res[]; }; static int dev_dax_kmem_probe(struct dev_dax *dev_dax) { struct device *dev = &dev_dax->dev; + unsigned long total_len = 0; struct dax_kmem_data *data; - int rc = -ENOMEM; - int i, mapped = 0; + int i, rc, mapped = 0; int numa_node; /* @@ -61,24 +62,44 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax) return -EINVAL; } + for (i = 0; i < dev_dax->nr_range; i++) { + struct range range; + + rc = dax_kmem_range(dev_dax, i, &range); + if (rc) { + dev_info(dev, "mapping%d: %#llx-%#llx too small after alignment\n", + i, range.start, range.end); + continue; + } + total_len += range_len(&range); + } + + if (!total_len) { + dev_warn(dev, "rejecting DAX region without any memory after alignment\n"); + return -EINVAL; + } + data = kzalloc(struct_size(data, res, dev_dax->nr_range), GFP_KERNEL); if (!data) return -ENOMEM; + rc = -ENOMEM; data->res_name = kstrdup(dev_name(dev), GFP_KERNEL); if (!data->res_name) goto err_res_name; + rc = memory_group_register_static(numa_node, total_len); + if (rc < 0) + goto err_reg_mgid; + data->mgid = rc; + for (i = 0; i < dev_dax->nr_range; i++) { struct resource *res; struct range range; rc = dax_kmem_range(dev_dax, i, &range); - if (rc) { - dev_info(dev, "mapping%d: %#llx-%#llx too small after alignment\n", - i, range.start, range.end); + if (rc) continue; - } /* Region is permanently reserved if hotremove fails. */ res = request_mem_region(range.start, range_len(&range), data->res_name); @@ -108,8 +129,8 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax) * Ensure that future kexec'd kernels will not treat * this as RAM automatically. */ - rc = add_memory_driver_managed(numa_node, range.start, - range_len(&range), kmem_name, MHP_NONE); + rc = add_memory_driver_managed(data->mgid, range.start, + range_len(&range), kmem_name, MHP_NID_IS_MGID); if (rc) { dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n", @@ -129,6 +150,8 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax) return 0; err_request_mem: + memory_group_unregister(data->mgid); +err_reg_mgid: kfree(data->res_name); err_res_name: kfree(data); @@ -171,6 +194,7 @@ static void dev_dax_kmem_remove(struct dev_dax *dev_dax) } if (success >= dev_dax->nr_range) { + memory_group_unregister(data->mgid); kfree(data->res_name); kfree(data); dev_set_drvdata(dev, NULL); -- cgit v1.2.3 From ffaa6ce835eaf0fe3eb05ff931de3c1134b07f35 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 7 Sep 2021 19:55:41 -0700 Subject: virtio-mem: use a single dynamic memory group for a single virtio-mem device Let's use a single dynamic memory group. Link: https://lkml.kernel.org/r/20210806124715.17090-8-david@redhat.com Signed-off-by: David Hildenbrand Cc: Anshuman Khandual Cc: Dan Williams Cc: Dave Hansen Cc: Greg Kroah-Hartman Cc: Hui Zhu Cc: Jason Wang Cc: Len Brown Cc: Marek Kedzierski Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Oscar Salvador Cc: Pankaj Gupta Cc: Pavel Tatashin Cc: Rafael J. Wysocki Cc: "Rafael J. Wysocki" Cc: Vitaly Kuznetsov Cc: Vlastimil Babka Cc: Wei Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/virtio/virtio_mem.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 7e83ed373e00..bef8ad6bf466 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -143,6 +143,8 @@ struct virtio_mem { * add_memory_driver_managed(). */ const char *resource_name; + /* Memory group identification. */ + int mgid; /* * We don't want to add too much memory if it's not getting onlined, @@ -626,8 +628,8 @@ static int virtio_mem_add_memory(struct virtio_mem *vm, uint64_t addr, addr + size - 1); /* Memory might get onlined immediately. */ atomic64_add(size, &vm->offline_size); - rc = add_memory_driver_managed(vm->nid, addr, size, vm->resource_name, - MHP_MERGE_RESOURCE); + rc = add_memory_driver_managed(vm->mgid, addr, size, vm->resource_name, + MHP_MERGE_RESOURCE | MHP_NID_IS_MGID); if (rc) { atomic64_sub(size, &vm->offline_size); dev_warn(&vm->vdev->dev, "adding memory failed: %d\n", rc); @@ -2569,6 +2571,7 @@ static bool virtio_mem_has_memory_added(struct virtio_mem *vm) static int virtio_mem_probe(struct virtio_device *vdev) { struct virtio_mem *vm; + uint64_t unit_pages; int rc; BUILD_BUG_ON(sizeof(struct virtio_mem_req) != 24); @@ -2603,6 +2606,16 @@ static int virtio_mem_probe(struct virtio_device *vdev) if (rc) goto out_del_vq; + /* use a single dynamic memory group to cover the whole memory device */ + if (vm->in_sbm) + unit_pages = PHYS_PFN(memory_block_size_bytes()); + else + unit_pages = PHYS_PFN(vm->bbm.bb_size); + rc = memory_group_register_dynamic(vm->nid, unit_pages); + if (rc < 0) + goto out_del_resource; + vm->mgid = rc; + /* * If we still have memory plugged, we have to unplug all memory first. * Registering our parent resource makes sure that this memory isn't @@ -2617,7 +2630,7 @@ static int virtio_mem_probe(struct virtio_device *vdev) vm->memory_notifier.notifier_call = virtio_mem_memory_notifier_cb; rc = register_memory_notifier(&vm->memory_notifier); if (rc) - goto out_del_resource; + goto out_unreg_group; rc = register_virtio_mem_device(vm); if (rc) goto out_unreg_mem; @@ -2631,6 +2644,8 @@ static int virtio_mem_probe(struct virtio_device *vdev) return 0; out_unreg_mem: unregister_memory_notifier(&vm->memory_notifier); +out_unreg_group: + memory_group_unregister(vm->mgid); out_del_resource: virtio_mem_delete_resource(vm); out_del_vq: @@ -2695,6 +2710,7 @@ static void virtio_mem_remove(struct virtio_device *vdev) } else { virtio_mem_delete_resource(vm); kfree_const(vm->resource_name); + memory_group_unregister(vm->mgid); } /* remove all tracking data - no locking needed */ -- cgit v1.2.3 From 445fcf7c721450dd1d4ec6c217b3c6a932602a44 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 7 Sep 2021 19:55:45 -0700 Subject: mm/memory_hotplug: memory group aware "auto-movable" online policy Use memory groups to improve our "auto-movable" onlining policy: 1. For static memory groups (e.g., a DIMM), online a memory block MOVABLE only if all other memory blocks in the group are either MOVABLE or could be onlined MOVABLE. A DIMM will either be MOVABLE or not, not a mixture. 2. For dynamic memory groups (e.g., a virtio-mem device), online a memory block MOVABLE only if all other memory blocks inside the current unit are either MOVABLE or could be onlined MOVABLE. For a virtio-mem device with a device block size with 512 MiB, all 128 MiB memory blocks wihin a 512 MiB unit will either be MOVABLE or not, not a mixture. We have to pass the memory group to zone_for_pfn_range() to take the memory group into account. Note: for now, there seems to be no compelling reason to make this behavior configurable. Link: https://lkml.kernel.org/r/20210806124715.17090-9-david@redhat.com Signed-off-by: David Hildenbrand Cc: Anshuman Khandual Cc: Dan Williams Cc: Dave Hansen Cc: Greg Kroah-Hartman Cc: Hui Zhu Cc: Jason Wang Cc: Len Brown Cc: Marek Kedzierski Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Oscar Salvador Cc: Pankaj Gupta Cc: Pavel Tatashin Cc: Rafael J. Wysocki Cc: "Rafael J. Wysocki" Cc: Vitaly Kuznetsov Cc: Vlastimil Babka Cc: Wei Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 18 +++++++++------- include/linux/memory_hotplug.h | 3 ++- mm/memory_hotplug.c | 48 +++++++++++++++++++++++++++++++++++++++--- 3 files changed, 57 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index a1082013e10c..b699ddc42693 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -182,7 +182,8 @@ static int memory_block_online(struct memory_block *mem) struct zone *zone; int ret; - zone = zone_for_pfn_range(mem->online_type, mem->nid, start_pfn, nr_pages); + zone = zone_for_pfn_range(mem->online_type, mem->nid, mem->group, + start_pfn, nr_pages); /* * Although vmemmap pages have a different lifecycle than the pages @@ -379,12 +380,13 @@ static ssize_t phys_device_show(struct device *dev, #ifdef CONFIG_MEMORY_HOTREMOVE static int print_allowed_zone(char *buf, int len, int nid, + struct memory_group *group, unsigned long start_pfn, unsigned long nr_pages, int online_type, struct zone *default_zone) { struct zone *zone; - zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages); + zone = zone_for_pfn_range(online_type, nid, group, start_pfn, nr_pages); if (zone == default_zone) return 0; @@ -397,9 +399,10 @@ static ssize_t valid_zones_show(struct device *dev, struct memory_block *mem = to_memory_block(dev); unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; + struct memory_group *group = mem->group; struct zone *default_zone; + int nid = mem->nid; int len = 0; - int nid; /* * Check the existing zone. Make sure that we do that only on the @@ -418,14 +421,13 @@ static ssize_t valid_zones_show(struct device *dev, goto out; } - nid = mem->nid; - default_zone = zone_for_pfn_range(MMOP_ONLINE, nid, start_pfn, - nr_pages); + default_zone = zone_for_pfn_range(MMOP_ONLINE, nid, group, + start_pfn, nr_pages); len += sysfs_emit_at(buf, len, "%s", default_zone->name); - len += print_allowed_zone(buf, len, nid, start_pfn, nr_pages, + len += print_allowed_zone(buf, len, nid, group, start_pfn, nr_pages, MMOP_ONLINE_KERNEL, default_zone); - len += print_allowed_zone(buf, len, nid, start_pfn, nr_pages, + len += print_allowed_zone(buf, len, nid, group, start_pfn, nr_pages, MMOP_ONLINE_MOVABLE, default_zone); out: len += sysfs_emit_at(buf, len, "\n"); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index cf3f423c8a74..e5a867c950b2 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -349,7 +349,8 @@ extern void sparse_remove_section(struct mem_section *ms, extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum); extern struct zone *zone_for_pfn_range(int online_type, int nid, - unsigned long start_pfn, unsigned long nr_pages); + struct memory_group *group, unsigned long start_pfn, + unsigned long nr_pages); extern int arch_create_linear_mapping(int nid, u64 start, u64 size, struct mhp_params *params); void arch_remove_linear_mapping(u64 start, u64 size); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 8199a4f98b2b..248e2ba4ac59 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -852,12 +852,53 @@ static struct zone *default_kernel_zone_for_pfn(int nid, unsigned long start_pfn * "present pages" is an upper limit that can get reached at runtime. As * we base our calculations on KERNEL_EARLY, this is not an issue. */ -static struct zone *auto_movable_zone_for_pfn(int nid, unsigned long pfn, +static struct zone *auto_movable_zone_for_pfn(int nid, + struct memory_group *group, + unsigned long pfn, unsigned long nr_pages) { + unsigned long online_pages = 0, max_pages, end_pfn; + struct page *page; + if (!auto_movable_ratio) goto kernel_zone; + if (group && !group->is_dynamic) { + max_pages = group->s.max_pages; + online_pages = group->present_movable_pages; + + /* If anything is !MOVABLE online the rest !MOVABLE. */ + if (group->present_kernel_pages) + goto kernel_zone; + } else if (!group || group->d.unit_pages == nr_pages) { + max_pages = nr_pages; + } else { + max_pages = group->d.unit_pages; + /* + * Take a look at all online sections in the current unit. + * We can safely assume that all pages within a section belong + * to the same zone, because dynamic memory groups only deal + * with hotplugged memory. + */ + pfn = ALIGN_DOWN(pfn, group->d.unit_pages); + end_pfn = pfn + group->d.unit_pages; + for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) { + page = pfn_to_online_page(pfn); + if (!page) + continue; + /* If anything is !MOVABLE online the rest !MOVABLE. */ + if (page_zonenum(page) != ZONE_MOVABLE) + goto kernel_zone; + online_pages += PAGES_PER_SECTION; + } + } + + /* + * Online MOVABLE if we could *currently* online all remaining parts + * MOVABLE. We expect to (add+) online them immediately next, so if + * nobody interferes, all will be MOVABLE if possible. + */ + nr_pages = max_pages - online_pages; if (!auto_movable_can_online_movable(NUMA_NO_NODE, nr_pages)) goto kernel_zone; @@ -897,7 +938,8 @@ static inline struct zone *default_zone_for_pfn(int nid, unsigned long start_pfn } struct zone *zone_for_pfn_range(int online_type, int nid, - unsigned long start_pfn, unsigned long nr_pages) + struct memory_group *group, unsigned long start_pfn, + unsigned long nr_pages) { if (online_type == MMOP_ONLINE_KERNEL) return default_kernel_zone_for_pfn(nid, start_pfn, nr_pages); @@ -906,7 +948,7 @@ struct zone *zone_for_pfn_range(int online_type, int nid, return &NODE_DATA(nid)->node_zones[ZONE_MOVABLE]; if (online_policy == ONLINE_POLICY_AUTO_MOVABLE) - return auto_movable_zone_for_pfn(nid, start_pfn, nr_pages); + return auto_movable_zone_for_pfn(nid, group, start_pfn, nr_pages); return default_zone_for_pfn(nid, start_pfn, nr_pages); } -- cgit v1.2.3 From 3fcebf90209a7f52d384ad7701425aa91be309ab Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 7 Sep 2021 19:55:48 -0700 Subject: mm/memory_hotplug: improved dynamic memory group aware "auto-movable" online policy Currently, the "auto-movable" online policy does not allow for hotplugged KERNEL (ZONE_NORMAL) memory to increase the amount of MOVABLE memory we can have, primarily, because there is no coordiantion across memory devices and we don't want to create zone-imbalances accidentially when unplugging memory. However, within a single memory device it's different. Let's allow for KERNEL memory within a dynamic memory group to allow for more MOVABLE within the same memory group. The only thing we have to take care of is that the managing driver avoids zone imbalances by unplugging MOVABLE memory first, otherwise there can be corner cases where unplug of memory could result in (accidential) zone imbalances. virtio-mem is the only user of dynamic memory groups and recently added support for prioritizing unplug of ZONE_MOVABLE over ZONE_NORMAL, so we don't need a new toggle to enable it for dynamic memory groups. We limit this handling to dynamic memory groups, because: * We want to keep the runtime overhead for collecting stats when onlining a single memory block small. We tend to have only a handful of dynamic memory groups, but we can have quite some static memory groups (e.g., 256 DIMMs). * It doesn't make too much sense for static memory groups, as we try onlining all applicable memory blocks either completely to ZONE_MOVABLE or not. In ordinary operation, we won't have a mixture of zones within a static memory group. When adding memory to a dynamic memory group, we'll first online memory to ZONE_MOVABLE as long as early KERNEL memory allows for it. Then, we'll online the next unit(s) to ZONE_NORMAL, until we can online the next unit(s) to ZONE_MOVABLE. For a simple virtio-mem device with a MOVABLE:KERNEL ratio of 3:1, it will result in a layout like: [M][M][M][M][M][M][M][M][N][M][M][M][N][M][M][M]... ^ movable memory due to early kernel memory ^ allows for more movable memory ... ^-----^ ... here ^ allows for more movable memory ... ^-----^ ... here While the created layout is sub-optimal when it comes to contiguous zones, it gives us the maximum flexibility when dynamically growing/shrinking a device; we can grow small VMs really big in small steps, and still shrink reliably to e.g., 1/4 of the maximum VM size in this example, removing full memory blocks along with meta data more reliably. Mark dynamic memory groups in the xarray such that we can efficiently iterate over them when collecting stats. In usual setups, we have one virtio-mem device per NUMA node, and usually only a small number of NUMA nodes. Note: for now, there seems to be no compelling reason to make this behavior configurable. Link: https://lkml.kernel.org/r/20210806124715.17090-10-david@redhat.com Signed-off-by: David Hildenbrand Cc: Anshuman Khandual Cc: Dan Williams Cc: Dave Hansen Cc: Greg Kroah-Hartman Cc: Hui Zhu Cc: Jason Wang Cc: Len Brown Cc: Marek Kedzierski Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Oscar Salvador Cc: Pankaj Gupta Cc: Pavel Tatashin Cc: Rafael J. Wysocki Cc: "Rafael J. Wysocki" Cc: Vitaly Kuznetsov Cc: Vlastimil Babka Cc: Wei Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 30 +++++++++++++++++++++++++ include/linux/memory.h | 3 +++ mm/memory_hotplug.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 89 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index b699ddc42693..440fd656c002 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -86,6 +86,7 @@ static DEFINE_XARRAY(memory_blocks); * Memory groups, indexed by memory group id (mgid). */ static DEFINE_XARRAY_FLAGS(memory_groups, XA_FLAGS_ALLOC); +#define MEMORY_GROUP_MARK_DYNAMIC XA_MARK_1 static BLOCKING_NOTIFIER_HEAD(memory_chain); @@ -939,6 +940,8 @@ static int memory_group_register(struct memory_group group) if (ret) { kfree(new_group); return ret; + } else if (group.is_dynamic) { + xa_set_mark(&memory_groups, mgid, MEMORY_GROUP_MARK_DYNAMIC); } return mgid; } @@ -1044,3 +1047,30 @@ struct memory_group *memory_group_find_by_id(int mgid) { return xa_load(&memory_groups, mgid); } + +/* + * This is an internal helper only to be used in core memory hotplug code to + * walk all dynamic memory groups excluding a given memory group, either + * belonging to a specific node, or belonging to any node. + */ +int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func, + struct memory_group *excluded, void *arg) +{ + struct memory_group *group; + unsigned long index; + int ret = 0; + + xa_for_each_marked(&memory_groups, index, group, + MEMORY_GROUP_MARK_DYNAMIC) { + if (group == excluded) + continue; +#ifdef CONFIG_NUMA + if (nid != NUMA_NO_NODE && group->nid != nid) + continue; +#endif /* CONFIG_NUMA */ + ret = func(group, arg); + if (ret) + break; + } + return ret; +} diff --git a/include/linux/memory.h b/include/linux/memory.h index 6ffdc1db385f..cbcc43ad2b97 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -146,6 +146,9 @@ extern int memory_group_register_static(int nid, unsigned long max_pages); extern int memory_group_register_dynamic(int nid, unsigned long unit_pages); extern int memory_group_unregister(int mgid); struct memory_group *memory_group_find_by_id(int mgid); +typedef int (*walk_memory_groups_func_t)(struct memory_group *, void *); +int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func, + struct memory_group *excluded, void *arg); #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ #ifdef CONFIG_MEMORY_HOTPLUG diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 248e2ba4ac59..b80fb8164fb8 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -752,11 +752,44 @@ static void auto_movable_stats_account_zone(struct auto_movable_stats *stats, #endif /* CONFIG_CMA */ } } +struct auto_movable_group_stats { + unsigned long movable_pages; + unsigned long req_kernel_early_pages; +}; -static bool auto_movable_can_online_movable(int nid, unsigned long nr_pages) +static int auto_movable_stats_account_group(struct memory_group *group, + void *arg) +{ + const int ratio = READ_ONCE(auto_movable_ratio); + struct auto_movable_group_stats *stats = arg; + long pages; + + /* + * We don't support modifying the config while the auto-movable online + * policy is already enabled. Just avoid the division by zero below. + */ + if (!ratio) + return 0; + + /* + * Calculate how many early kernel pages this group requires to + * satisfy the configured zone ratio. + */ + pages = group->present_movable_pages * 100 / ratio; + pages -= group->present_kernel_pages; + + if (pages > 0) + stats->req_kernel_early_pages += pages; + stats->movable_pages += group->present_movable_pages; + return 0; +} + +static bool auto_movable_can_online_movable(int nid, struct memory_group *group, + unsigned long nr_pages) { - struct auto_movable_stats stats = {}; unsigned long kernel_early_pages, movable_pages; + struct auto_movable_group_stats group_stats = {}; + struct auto_movable_stats stats = {}; pg_data_t *pgdat = NODE_DATA(nid); struct zone *zone; int i; @@ -777,6 +810,21 @@ static bool auto_movable_can_online_movable(int nid, unsigned long nr_pages) kernel_early_pages = stats.kernel_early_pages; movable_pages = stats.movable_pages; + /* + * Kernel memory inside dynamic memory group allows for more MOVABLE + * memory within the same group. Remove the effect of all but the + * current group from the stats. + */ + walk_dynamic_memory_groups(nid, auto_movable_stats_account_group, + group, &group_stats); + if (kernel_early_pages <= group_stats.req_kernel_early_pages) + return false; + kernel_early_pages -= group_stats.req_kernel_early_pages; + movable_pages -= group_stats.movable_pages; + + if (group && group->is_dynamic) + kernel_early_pages += group->present_kernel_pages; + /* * Test if we could online the given number of pages to ZONE_MOVABLE * and still stay in the configured ratio. @@ -834,6 +882,10 @@ static struct zone *default_kernel_zone_for_pfn(int nid, unsigned long start_pfn * with unmovable allocations). While there are corner cases where it might * still work, it is barely relevant in practice. * + * Exceptions are dynamic memory groups, which allow for more MOVABLE + * memory within the same memory group -- because in that case, there is + * coordination within the single memory device managed by a single driver. + * * We rely on "present pages" instead of "managed pages", as the latter is * highly unreliable and dynamic in virtualized environments, and does not * consider boot time allocations. For example, memory ballooning adjusts the @@ -899,12 +951,12 @@ static struct zone *auto_movable_zone_for_pfn(int nid, * nobody interferes, all will be MOVABLE if possible. */ nr_pages = max_pages - online_pages; - if (!auto_movable_can_online_movable(NUMA_NO_NODE, nr_pages)) + if (!auto_movable_can_online_movable(NUMA_NO_NODE, group, nr_pages)) goto kernel_zone; #ifdef CONFIG_NUMA if (auto_movable_numa_aware && - !auto_movable_can_online_movable(nid, nr_pages)) + !auto_movable_can_online_movable(nid, group, nr_pages)) goto kernel_zone; #endif /* CONFIG_NUMA */ -- cgit v1.2.3 From 73b718c617caae17f6e2964fba823e2c18a9b67c Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 7 Sep 2021 19:57:51 -0700 Subject: thermal/drivers/devfreq_cooling: use HZ macros HZ unit conversion macros are available in units.h, use them and remove the duplicate definition. The new macro uses a unsigned long type which is already the type in the current code via the 'freq' variable. Link: https://lkml.kernel.org/r/20210816114732.1834145-4-daniel.lezcano@linaro.org Signed-off-by: Daniel Lezcano Reviewed-by: Andy Shevchenko Reviewed-by: Christian Eggers Cc: Chanwoo Choi Cc: Guenter Roeck Cc: Jonathan Cameron Cc: Jonathan Cameron Cc: Kyungmin Park Cc: Lars-Peter Clausen Cc: Lukasz Luba Cc: Maxime Coquelin Cc: Miquel Raynal Cc: MyungJoo Ham Cc: Peter Meerwald Cc: "Rafael J. Wysocki" Cc: Zhang Rui Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/thermal/devfreq_cooling.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c index 5a86cffd78f6..4310cb342a9f 100644 --- a/drivers/thermal/devfreq_cooling.c +++ b/drivers/thermal/devfreq_cooling.c @@ -18,10 +18,10 @@ #include #include #include +#include #include -#define HZ_PER_KHZ 1000 #define SCALE_ERROR_MITIGATION 100 /** -- cgit v1.2.3 From 04c8984ae3fa4373948483e367561c79fda7f571 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 7 Sep 2021 19:57:54 -0700 Subject: devfreq: use HZ macros HZ unit conversion macros are available in units.h, use them and remove the duplicate definition. The new macro has an unsigned long type. All the code is dealing with unsigned long and the code using the macro is doing a coercitive cast to unsigned long. Link: https://lkml.kernel.org/r/20210816114732.1834145-5-daniel.lezcano@linaro.org Signed-off-by: Daniel Lezcano Reviewed-by: Christian Eggers Reviewed-by: Andy Shevchenko Acked-by: Chanwoo Choi Cc: Guenter Roeck Cc: Jonathan Cameron Cc: Jonathan Cameron Cc: Kyungmin Park Cc: Lars-Peter Clausen Cc: Lukasz Luba Cc: Maxime Coquelin Cc: Miquel Raynal Cc: MyungJoo Ham Cc: Peter Meerwald Cc: "Rafael J. Wysocki" Cc: Zhang Rui Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/devfreq/devfreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 28f3e0ba6cdd..85faa7a5c7d1 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "governor.h" #define CREATE_TRACE_POINTS @@ -34,7 +35,6 @@ #define IS_SUPPORTED_FLAG(f, name) ((f & DEVFREQ_GOV_FLAG_##name) ? true : false) #define IS_SUPPORTED_ATTR(f, name) ((f & DEVFREQ_GOV_ATTR_##name) ? true : false) -#define HZ_PER_KHZ 1000 static struct class *devfreq_class; static struct dentry *devfreq_debugfs; -- cgit v1.2.3 From 55c653e0be71acb70f0dd2235b8c7aabebe3ed98 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 7 Sep 2021 19:57:58 -0700 Subject: iio/drivers/as73211: use HZ macros HZ unit conversion macros are available in units.h, use them and remove the duplicate definition. Link: https://lkml.kernel.org/r/20210816114732.1834145-6-daniel.lezcano@linaro.org Signed-off-by: Daniel Lezcano Reviewed-by: Christian Eggers Reviewed-by: Andy Shevchenko Acked-by: Jonathan Cameron Cc: Chanwoo Choi Cc: Guenter Roeck Cc: Jonathan Cameron Cc: Kyungmin Park Cc: Lars-Peter Clausen Cc: Lukasz Luba Cc: Maxime Coquelin Cc: Miquel Raynal Cc: MyungJoo Ham Cc: Peter Meerwald Cc: "Rafael J. Wysocki" Cc: Zhang Rui Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/iio/light/as73211.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/iio/light/as73211.c b/drivers/iio/light/as73211.c index 7b32dfaee9b3..3ba2378df3dd 100644 --- a/drivers/iio/light/as73211.c +++ b/drivers/iio/light/as73211.c @@ -24,8 +24,7 @@ #include #include #include - -#define HZ_PER_KHZ 1000 +#include #define AS73211_DRV_NAME "as73211" -- cgit v1.2.3 From d59eacaac953242853c2e53db0ee7cc25613ae03 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 7 Sep 2021 19:58:01 -0700 Subject: hwmon/drivers/mr75203: use HZ macros HZ unit conversion macros are available in units.h, use them and remove the duplicate definition. The new macro is an unsigned long. The code dealing with it is considering as an unsigned long also. Link: https://lkml.kernel.org/r/20210816114732.1834145-7-daniel.lezcano@linaro.org Signed-off-by: Daniel Lezcano Reviewed-by: Christian Eggers Reviewed-by: Andy Shevchenko Acked-by: Guenter Roeck Cc: Chanwoo Choi Cc: Jonathan Cameron Cc: Jonathan Cameron Cc: Kyungmin Park Cc: Lars-Peter Clausen Cc: Lukasz Luba Cc: Maxime Coquelin Cc: Miquel Raynal Cc: MyungJoo Ham Cc: Peter Meerwald Cc: "Rafael J. Wysocki" Cc: Zhang Rui Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/hwmon/mr75203.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/hwmon/mr75203.c b/drivers/hwmon/mr75203.c index 18da5a25e89a..868243dba1ee 100644 --- a/drivers/hwmon/mr75203.c +++ b/drivers/hwmon/mr75203.c @@ -17,6 +17,7 @@ #include #include #include +#include /* PVT Common register */ #define PVT_IP_CONFIG 0x04 @@ -37,7 +38,6 @@ #define CLK_SYNTH_EN BIT(24) #define CLK_SYS_CYCLES_MAX 514 #define CLK_SYS_CYCLES_MIN 2 -#define HZ_PER_MHZ 1000000L #define SDIF_DISABLE 0x04 -- cgit v1.2.3 From 87000e7fe0a201572fd8756e3497c0b5ee4f4231 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 7 Sep 2021 19:58:05 -0700 Subject: iio/drivers/hid-sensor: use HZ macros HZ unit conversion macros are available in units.h, use them and remove the duplicate definition. Link: https://lkml.kernel.org/r/20210816114732.1834145-8-daniel.lezcano@linaro.org Signed-off-by: Daniel Lezcano Reviewed-by: Andy Shevchenko Acked-by: Jonathan Cameron Cc: Chanwoo Choi Cc: Christian Eggers Cc: Guenter Roeck Cc: Jonathan Cameron Cc: Kyungmin Park Cc: Lars-Peter Clausen Cc: Lukasz Luba Cc: Maxime Coquelin Cc: Miquel Raynal Cc: MyungJoo Ham Cc: Peter Meerwald Cc: "Rafael J. Wysocki" Cc: Zhang Rui Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/iio/common/hid-sensors/hid-sensor-attributes.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c index 043f199e7bc6..9b279937a24e 100644 --- a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c +++ b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c @@ -6,12 +6,11 @@ #include #include #include +#include #include #include -#define HZ_PER_MHZ 1000000L - static struct { u32 usage_id; int unit; /* 0 for default others from HID sensor spec */ -- cgit v1.2.3 From 09704a941c42296c34955651d7bcc860635f8a59 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 7 Sep 2021 19:58:08 -0700 Subject: i2c/drivers/ov02q10: use HZ macros HZ unit conversion macros are available in units.h, use them and remove the duplicate definition. Link: https://lkml.kernel.org/r/20210816114732.1834145-9-daniel.lezcano@linaro.org Signed-off-by: Daniel Lezcano Reviewed-by: Andy Shevchenko Cc: Chanwoo Choi Cc: Christian Eggers Cc: Guenter Roeck Cc: Jonathan Cameron Cc: Jonathan Cameron Cc: Kyungmin Park Cc: Lars-Peter Clausen Cc: Lukasz Luba Cc: Maxime Coquelin Cc: Miquel Raynal Cc: MyungJoo Ham Cc: Peter Meerwald Cc: "Rafael J. Wysocki" Cc: Zhang Rui Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/media/i2c/ov02a10.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/media/i2c/ov02a10.c b/drivers/media/i2c/ov02a10.c index a3ce5500d355..0f08c05333ea 100644 --- a/drivers/media/i2c/ov02a10.c +++ b/drivers/media/i2c/ov02a10.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -64,7 +65,6 @@ /* Test pattern control */ #define OV02A10_REG_TEST_PATTERN 0xb6 -#define HZ_PER_MHZ 1000000L #define OV02A10_LINK_FREQ_390MHZ (390 * HZ_PER_MHZ) #define OV02A10_ECLK_FREQ (24 * HZ_PER_MHZ) -- cgit v1.2.3 From 9ef347c3df98ac69d04ff2d959cb5a3c7d4367d0 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 7 Sep 2021 19:58:11 -0700 Subject: mtd/drivers/nand: use HZ macros HZ unit conversion macros are available in units.h, use them and remove the duplicate definition. Link: https://lkml.kernel.org/r/20210816114732.1834145-10-daniel.lezcano@linaro.org Signed-off-by: Daniel Lezcano Acked-by: Miquel Raynal Reviewed-by: Andy Shevchenko Cc: Chanwoo Choi Cc: Christian Eggers Cc: Guenter Roeck Cc: Jonathan Cameron Cc: Jonathan Cameron Cc: Kyungmin Park Cc: Lars-Peter Clausen Cc: Lukasz Luba Cc: Maxime Coquelin Cc: MyungJoo Ham Cc: Peter Meerwald Cc: "Rafael J. Wysocki" Cc: Zhang Rui Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/mtd/nand/raw/intel-nand-controller.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/mtd/nand/raw/intel-nand-controller.c b/drivers/mtd/nand/raw/intel-nand-controller.c index 8b49fd56cf96..709f0402fbae 100644 --- a/drivers/mtd/nand/raw/intel-nand-controller.c +++ b/drivers/mtd/nand/raw/intel-nand-controller.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #define EBU_CLC 0x000 @@ -102,7 +103,6 @@ #define MAX_CS 2 -#define HZ_PER_MHZ 1000000L #define USEC_PER_SEC 1000000L struct ebu_nand_cs { -- cgit v1.2.3 From 18821693b97bd5601d796a41ecc7da9cc2766769 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 7 Sep 2021 19:58:15 -0700 Subject: phy/drivers/stm32: use HZ macros HZ unit conversion macros are available in units.h, use them and remove the duplicate definition. Link: https://lkml.kernel.org/r/20210816114732.1834145-11-daniel.lezcano@linaro.org Signed-off-by: Daniel Lezcano Reviewed-by: Andy Shevchenko Cc: Chanwoo Choi Cc: Christian Eggers Cc: Guenter Roeck Cc: Jonathan Cameron Cc: Jonathan Cameron Cc: Kyungmin Park Cc: Lars-Peter Clausen Cc: Lukasz Luba Cc: Maxime Coquelin Cc: Miquel Raynal Cc: MyungJoo Ham Cc: Peter Meerwald Cc: "Rafael J. Wysocki" Cc: Zhang Rui Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/phy/st/phy-stm32-usbphyc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/phy/st/phy-stm32-usbphyc.c b/drivers/phy/st/phy-stm32-usbphyc.c index 3e491dfb2525..937a14fa7448 100644 --- a/drivers/phy/st/phy-stm32-usbphyc.c +++ b/drivers/phy/st/phy-stm32-usbphyc.c @@ -15,6 +15,7 @@ #include #include #include +#include #define STM32_USBPHYC_PLL 0x0 #define STM32_USBPHYC_MISC 0x8 @@ -47,7 +48,6 @@ #define PLL_FVCO_MHZ 2880 #define PLL_INFF_MIN_RATE_HZ 19200000 #define PLL_INFF_MAX_RATE_HZ 38400000 -#define HZ_PER_MHZ 1000000L struct pll_params { u8 ndiv; -- cgit v1.2.3