From d622abf74f3d81365e41c3bfdbbda50ecd99ba3d Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 3 Jun 2020 15:56:53 -0700 Subject: mm: memblock: replace dereferences of memblock_region.nid with API calls Patch series "mm: rework free_area_init*() funcitons". After the discussion [1] about removal of CONFIG_NODES_SPAN_OTHER_NODES and CONFIG_HAVE_MEMBLOCK_NODE_MAP options, I took it a bit further and updated the node/zone initialization. Since all architectures have memblock, it is possible to use only the newer version of free_area_init_node() that calculates the zone and node boundaries based on memblock node mapping and architectural limits on possible zone PFNs. The architectures that still determined zone and hole sizes can be switched to the generic code and the old code that took those zone and hole sizes can be simply removed. And, since it all started from the removal of CONFIG_NODES_SPAN_OTHER_NODES, the memmap_init() is now updated to iterate over memblocks and so it does not need to perform early_pfn_to_nid() query for every PFN. [1] https://lore.kernel.org/lkml/1585420282-25630-1-git-send-email-Hoan@os.amperecomputing.com This patch (of 21): There are several places in the code that directly dereference memblock_region.nid despite this field being defined only when CONFIG_HAVE_MEMBLOCK_NODE_MAP=y. Replace these with calls to memblock_get_region_nid() to improve code robustness and to avoid possible breakage when CONFIG_HAVE_MEMBLOCK_NODE_MAP will be removed. Signed-off-by: Mike Rapoport Signed-off-by: Andrew Morton Tested-by: Hoan Tran [arm64] Reviewed-by: Baoquan He Cc: Brian Cain Cc: Catalin Marinas Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Ungerer Cc: Guan Xuetao Cc: Guo Ren Cc: Heiko Carstens Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Jonathan Corbet Cc: Ley Foon Tan Cc: Mark Salter Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Hocko Cc: Michal Simek Cc: Mike Rapoport Cc: Nick Hu Cc: Paul Walmsley Cc: Richard Weinberger Cc: Rich Felker Cc: Russell King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Tony Luck Cc: Vineet Gupta Cc: Yoshinori Sato Link: http://lkml.kernel.org/r/20200412194859.12663-1-rppt@kernel.org Link: http://lkml.kernel.org/r/20200412194859.12663-2-rppt@kernel.org Signed-off-by: Linus Torvalds --- arch/x86/mm/numa.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 59ba008504dc..fe024b2ac796 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -517,8 +517,10 @@ static void __init numa_clear_kernel_node_hotplug(void) * reserve specific pages for Sandy Bridge graphics. ] */ for_each_memblock(reserved, mb_region) { - if (mb_region->nid != MAX_NUMNODES) - node_set(mb_region->nid, reserved_nodemask); + int nid = memblock_get_region_node(mb_region); + + if (nid != MAX_NUMNODES) + node_set(nid, reserved_nodemask); } /* -- cgit v1.2.3 From 3f08a302f533f74ad2e909e7a61274aa7eebc0ab Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 3 Jun 2020 15:57:02 -0700 Subject: mm: remove CONFIG_HAVE_MEMBLOCK_NODE_MAP option CONFIG_HAVE_MEMBLOCK_NODE_MAP is used to differentiate initialization of nodes and zones structures between the systems that have region to node mapping in memblock and those that don't. Currently all the NUMA architectures enable this option and for the non-NUMA systems we can presume that all the memory belongs to node 0 and therefore the compile time configuration option is not required. The remaining few architectures that use DISCONTIGMEM without NUMA are easily updated to use memblock_add_node() instead of memblock_add() and thus have proper correspondence of memblock regions to NUMA nodes. Still, free_area_init_node() must have a backward compatible version because its semantics with and without CONFIG_HAVE_MEMBLOCK_NODE_MAP is different. Once all the architectures will use the new semantics, the entire compatibility layer can be dropped. To avoid addition of extra run time memory to store node id for architectures that keep memblock but have only a single node, the node id field of the memblock_region is guarded by CONFIG_NEED_MULTIPLE_NODES and the corresponding accessors presume that in those cases it is always 0. Signed-off-by: Mike Rapoport Signed-off-by: Andrew Morton Tested-by: Hoan Tran [arm64] Acked-by: Catalin Marinas [arm64] Cc: Baoquan He Cc: Brian Cain Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Ungerer Cc: Guan Xuetao Cc: Guo Ren Cc: Heiko Carstens Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Jonathan Corbet Cc: Ley Foon Tan Cc: Mark Salter Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Hocko Cc: Michal Simek Cc: Nick Hu Cc: Paul Walmsley Cc: Richard Weinberger Cc: Rich Felker Cc: Russell King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Tony Luck Cc: Vineet Gupta Cc: Yoshinori Sato Link: http://lkml.kernel.org/r/20200412194859.12663-4-rppt@kernel.org Signed-off-by: Linus Torvalds --- .../features/vm/numa-memblock/arch-support.txt | 34 ------- arch/alpha/mm/numa.c | 4 +- arch/arm64/Kconfig | 1 - arch/ia64/Kconfig | 1 - arch/m68k/mm/motorola.c | 4 +- arch/microblaze/Kconfig | 1 - arch/mips/Kconfig | 1 - arch/powerpc/Kconfig | 1 - arch/riscv/Kconfig | 1 - arch/s390/Kconfig | 1 - arch/sh/Kconfig | 1 - arch/sparc/Kconfig | 1 - arch/x86/Kconfig | 1 - include/linux/memblock.h | 8 +- include/linux/mm.h | 12 +-- include/linux/mmzone.h | 2 +- mm/Kconfig | 3 - mm/memblock.c | 11 +-- mm/memory_hotplug.c | 4 - mm/page_alloc.c | 101 ++++++++++++--------- 20 files changed, 74 insertions(+), 119 deletions(-) delete mode 100644 Documentation/features/vm/numa-memblock/arch-support.txt (limited to 'arch/x86') diff --git a/Documentation/features/vm/numa-memblock/arch-support.txt b/Documentation/features/vm/numa-memblock/arch-support.txt deleted file mode 100644 index 3004beb0fd71..000000000000 --- a/Documentation/features/vm/numa-memblock/arch-support.txt +++ /dev/null @@ -1,34 +0,0 @@ -# -# Feature name: numa-memblock -# Kconfig: HAVE_MEMBLOCK_NODE_MAP -# description: arch supports NUMA aware memblocks -# - ----------------------- - | arch |status| - ----------------------- - | alpha: | TODO | - | arc: | .. | - | arm: | .. | - | arm64: | ok | - | c6x: | .. | - | csky: | .. | - | h8300: | .. | - | hexagon: | .. | - | ia64: | ok | - | m68k: | .. | - | microblaze: | ok | - | mips: | ok | - | nds32: | TODO | - | nios2: | .. | - | openrisc: | .. | - | parisc: | .. | - | powerpc: | ok | - | riscv: | ok | - | s390: | ok | - | sh: | ok | - | sparc: | ok | - | um: | .. | - | unicore32: | .. | - | x86: | ok | - | xtensa: | .. | - ----------------------- diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c index d0b73371e985..a24cd13e71cb 100644 --- a/arch/alpha/mm/numa.c +++ b/arch/alpha/mm/numa.c @@ -144,8 +144,8 @@ setup_memory_node(int nid, void *kernel_end) if (!nid && (node_max_pfn < end_kernel_pfn || node_min_pfn > start_kernel_pfn)) panic("kernel loaded out of ram"); - memblock_add(PFN_PHYS(node_min_pfn), - (node_max_pfn - node_min_pfn) << PAGE_SHIFT); + memblock_add_node(PFN_PHYS(node_min_pfn), + (node_max_pfn - node_min_pfn) << PAGE_SHIFT, nid); /* Zone start phys-addr must be 2^(MAX_ORDER-1) aligned. Note that we round this down, not up - node memory diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 552d36cacc05..1a9b480c6f1d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -162,7 +162,6 @@ config ARM64 select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT if PERF_EVENTS select HAVE_IRQ_TIME_ACCOUNTING - select HAVE_MEMBLOCK_NODE_MAP if NUMA select HAVE_NMI select HAVE_PATA_PLATFORM select HAVE_PERF_EVENTS diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index bab7cd878464..88b05b5256a9 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -31,7 +31,6 @@ config IA64 select HAVE_FUNCTION_TRACER select TTY select HAVE_ARCH_TRACEHOOK - select HAVE_MEMBLOCK_NODE_MAP select HAVE_VIRT_CPU_ACCOUNTING select DMA_NONCOHERENT_MMAP select ARCH_HAS_SYNC_DMA_FOR_CPU diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c index fc16190ec2d6..84ab5963cabb 100644 --- a/arch/m68k/mm/motorola.c +++ b/arch/m68k/mm/motorola.c @@ -386,7 +386,7 @@ void __init paging_init(void) min_addr = m68k_memory[0].addr; max_addr = min_addr + m68k_memory[0].size; - memblock_add(m68k_memory[0].addr, m68k_memory[0].size); + memblock_add_node(m68k_memory[0].addr, m68k_memory[0].size, 0); for (i = 1; i < m68k_num_memory;) { if (m68k_memory[i].addr < min_addr) { printk("Ignoring memory chunk at 0x%lx:0x%lx before the first chunk\n", @@ -397,7 +397,7 @@ void __init paging_init(void) (m68k_num_memory - i) * sizeof(struct m68k_mem_info)); continue; } - memblock_add(m68k_memory[i].addr, m68k_memory[i].size); + memblock_add_node(m68k_memory[i].addr, m68k_memory[i].size, i); addr = m68k_memory[i].addr + m68k_memory[i].size; if (addr > max_addr) max_addr = addr; diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 9606c244b5b8..d262ac0c8714 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -32,7 +32,6 @@ config MICROBLAZE select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER - select HAVE_MEMBLOCK_NODE_MAP select HAVE_OPROFILE select HAVE_PCI select IRQ_DOMAIN diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 690718b3701a..94a91b5b7759 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -72,7 +72,6 @@ config MIPS select HAVE_KPROBES select HAVE_KRETPROBES select HAVE_LD_DEAD_CODE_DATA_ELIMINATION - select HAVE_MEMBLOCK_NODE_MAP select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI select HAVE_OPROFILE diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index b29d7cb38368..41ba42b107c0 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -211,7 +211,6 @@ config PPC select HAVE_KRETPROBES select HAVE_LD_DEAD_CODE_DATA_ELIMINATION select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS - select HAVE_MEMBLOCK_NODE_MAP select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) select HAVE_HARDLOCKUP_DETECTOR_ARCH if (PPC64 && PPC_BOOK3S) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index a31e1a41913a..5c07ca4d5cd6 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -32,7 +32,6 @@ config RISCV select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_SECCOMP_FILTER select HAVE_ASM_MODVERSIONS - select HAVE_MEMBLOCK_NODE_MAP select HAVE_DMA_CONTIGUOUS if MMU select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_PERF_EVENTS diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 2167bce993ff..d6dc6933adc2 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -162,7 +162,6 @@ config S390 select HAVE_LIVEPATCH select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP - select HAVE_MEMBLOCK_NODE_MAP select HAVE_MEMBLOCK_PHYS_MAP select MMU_GATHER_NO_GATHER select HAVE_MOD_ARCH_SPECIFIC diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 97656d20b9ea..0424b8f2f8d3 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -9,7 +9,6 @@ config SUPERH select CLKDEV_LOOKUP select DMA_DECLARE_COHERENT select HAVE_IDE if HAS_IOPORT_MAP - select HAVE_MEMBLOCK_NODE_MAP select HAVE_OPROFILE select HAVE_ARCH_TRACEHOOK select HAVE_PERF_EVENTS diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index da515fdad83d..795206b7b552 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -65,7 +65,6 @@ config SPARC64 select HAVE_KRETPROBES select HAVE_KPROBES select MMU_GATHER_RCU_TABLE_FREE if SMP - select HAVE_MEMBLOCK_NODE_MAP select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e5d38cd11df0..c669328abf58 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -192,7 +192,6 @@ config X86 select HAVE_KRETPROBES select HAVE_KVM select HAVE_LIVEPATCH if X86_64 - select HAVE_MEMBLOCK_NODE_MAP select HAVE_MIXED_BREAKPOINTS_REGS select HAVE_MOD_ARCH_SPECIFIC select HAVE_MOVE_PMD diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 6bc37a731d27..45abfc54da37 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -50,7 +50,7 @@ struct memblock_region { phys_addr_t base; phys_addr_t size; enum memblock_flags flags; -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP +#ifdef CONFIG_NEED_MULTIPLE_NODES int nid; #endif }; @@ -215,7 +215,6 @@ static inline bool memblock_is_nomap(struct memblock_region *m) return m->flags & MEMBLOCK_NOMAP; } -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn, unsigned long *end_pfn); void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, @@ -234,7 +233,6 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, #define for_each_mem_pfn_range(i, nid, p_start, p_end, p_nid) \ for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \ i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid)) -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, @@ -310,10 +308,10 @@ void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, for_each_mem_range_rev(i, &memblock.memory, &memblock.reserved, \ nid, flags, p_start, p_end, p_nid) -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP int memblock_set_node(phys_addr_t base, phys_addr_t size, struct memblock_type *type, int nid); +#ifdef CONFIG_NEED_MULTIPLE_NODES static inline void memblock_set_region_node(struct memblock_region *r, int nid) { r->nid = nid; @@ -332,7 +330,7 @@ static inline int memblock_get_region_node(const struct memblock_region *r) { return 0; } -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ +#endif /* CONFIG_NEED_MULTIPLE_NODES */ /* Flags for memblock allocation APIs */ #define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0) diff --git a/include/linux/mm.h b/include/linux/mm.h index 4288e6993dc8..5f15d8723167 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2401,9 +2401,8 @@ static inline unsigned long get_num_physpages(void) return phys_pages; } -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP /* - * With CONFIG_HAVE_MEMBLOCK_NODE_MAP set, an architecture may initialise its + * Using memblock node mappings, an architecture may initialise its * zones, allocate the backing mem_map and account for memory holes in a more * architecture independent manner. This is a substitute for creating the * zone_sizes[] and zholes_size[] arrays and passing them to @@ -2424,9 +2423,6 @@ static inline unsigned long get_num_physpages(void) * registered physical page range. Similarly * sparse_memory_present_with_active_regions() calls memory_present() for * each range when SPARSEMEM is enabled. - * - * See mm/page_alloc.c for more information on each function exposed by - * CONFIG_HAVE_MEMBLOCK_NODE_MAP. */ extern void free_area_init_nodes(unsigned long *max_zone_pfn); unsigned long node_map_pfn_alignment(void); @@ -2441,13 +2437,9 @@ extern void free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn); extern void sparse_memory_present_with_active_regions(int nid); -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ - -#if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \ - !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) +#ifndef CONFIG_NEED_MULTIPLE_NODES static inline int early_pfn_to_nid(unsigned long pfn) { - BUILD_BUG_ON(IS_ENABLED(CONFIG_NUMA)); return 0; } #else diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index c3a77eb85b42..0c575c3d7feb 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -876,7 +876,7 @@ extern int movable_zone; #ifdef CONFIG_HIGHMEM static inline int zone_movable_is_highmem(void) { -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP +#ifdef CONFIG_NEED_MULTIPLE_NODES return movable_zone == ZONE_HIGHMEM; #else return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM; diff --git a/mm/Kconfig b/mm/Kconfig index 5c0362bd8d56..3af64646f343 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -126,9 +126,6 @@ config SPARSEMEM_VMEMMAP pfn_to_page and page_to_pfn operations. This is the most efficient option when sufficient kernel resources are available. -config HAVE_MEMBLOCK_NODE_MAP - bool - config HAVE_MEMBLOCK_PHYS_MAP bool diff --git a/mm/memblock.c b/mm/memblock.c index 43e2fd3006c1..743659d88fc4 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -620,7 +620,7 @@ repeat: * area, insert that portion. */ if (rbase > base) { -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP +#ifdef CONFIG_NEED_MULTIPLE_NODES WARN_ON(nid != memblock_get_region_node(rgn)); #endif WARN_ON(flags != rgn->flags); @@ -1197,7 +1197,6 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, *idx = ULLONG_MAX; } -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP /* * Common iterator interface used to define for_each_mem_pfn_range(). */ @@ -1247,6 +1246,7 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid, int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, struct memblock_type *type, int nid) { +#ifdef CONFIG_NEED_MULTIPLE_NODES int start_rgn, end_rgn; int i, ret; @@ -1258,9 +1258,10 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, memblock_set_region_node(&type->regions[i], nid); memblock_merge_regions(type); +#endif return 0; } -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ + #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /** * __next_mem_pfn_range_in_zone - iterator for for_each_*_range_in_zone() @@ -1799,7 +1800,6 @@ bool __init_memblock memblock_is_map_memory(phys_addr_t addr) return !memblock_is_nomap(&memblock.memory.regions[i]); } -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP int __init_memblock memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn, unsigned long *end_pfn) { @@ -1814,7 +1814,6 @@ int __init_memblock memblock_search_pfn_nid(unsigned long pfn, return memblock_get_region_node(&type->regions[mid]); } -#endif /** * memblock_is_region_memory - check if a region is a subset of memory @@ -1905,7 +1904,7 @@ static void __init_memblock memblock_dump(struct memblock_type *type) size = rgn->size; end = base + size - 1; flags = rgn->flags; -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP +#ifdef CONFIG_NEED_MULTIPLE_NODES if (memblock_get_region_node(rgn) != MAX_NUMNODES) snprintf(nid_buf, sizeof(nid_buf), " on node %d", memblock_get_region_node(rgn)); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index fc0aad0bc1f5..e67dc501576a 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1372,11 +1372,7 @@ check_pages_isolated_cb(unsigned long start_pfn, unsigned long nr_pages, static int __init cmdline_parse_movable_node(char *p) { -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP movable_node_enabled = true; -#else - pr_warn("movable_node parameter depends on CONFIG_HAVE_MEMBLOCK_NODE_MAP to work properly\n"); -#endif return 0; } early_param("movable_node", cmdline_parse_movable_node); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8741ae0828e1..430e35384b78 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -335,7 +335,6 @@ static unsigned long nr_kernel_pages __initdata; static unsigned long nr_all_pages __initdata; static unsigned long dma_reserve __initdata; -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP static unsigned long arch_zone_lowest_possible_pfn[MAX_NR_ZONES] __initdata; static unsigned long arch_zone_highest_possible_pfn[MAX_NR_ZONES] __initdata; static unsigned long required_kernelcore __initdata; @@ -348,7 +347,6 @@ static bool mirrored_kernelcore __meminitdata; /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ int movable_zone; EXPORT_SYMBOL(movable_zone); -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ #if MAX_NUMNODES > 1 unsigned int nr_node_ids __read_mostly = MAX_NUMNODES; @@ -1499,8 +1497,7 @@ void __free_pages_core(struct page *page, unsigned int order) __free_pages(page, order); } -#if defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) || \ - defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) +#ifdef CONFIG_NEED_MULTIPLE_NODES static struct mminit_pfnnid_cache early_pfnnid_cache __meminitdata; @@ -1542,7 +1539,7 @@ int __meminit early_pfn_to_nid(unsigned long pfn) return nid; } -#endif +#endif /* CONFIG_NEED_MULTIPLE_NODES */ #ifdef CONFIG_NODES_SPAN_OTHER_NODES /* Only safe to use early in boot when initialisation is single-threaded */ @@ -5936,7 +5933,6 @@ void __ref build_all_zonelists(pg_data_t *pgdat) static bool __meminit overlap_memmap_init(unsigned long zone, unsigned long *pfn) { -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP static struct memblock_region *r; if (mirrored_kernelcore && zone == ZONE_MOVABLE) { @@ -5952,7 +5948,6 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn) return true; } } -#endif return false; } @@ -6585,8 +6580,7 @@ static unsigned long __init zone_absent_pages_in_node(int nid, return nr_absent; } -#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ -static inline unsigned long __init zone_spanned_pages_in_node(int nid, +static inline unsigned long __init compat_zone_spanned_pages_in_node(int nid, unsigned long zone_type, unsigned long node_start_pfn, unsigned long node_end_pfn, @@ -6605,7 +6599,7 @@ static inline unsigned long __init zone_spanned_pages_in_node(int nid, return zones_size[zone_type]; } -static inline unsigned long __init zone_absent_pages_in_node(int nid, +static inline unsigned long __init compat_zone_absent_pages_in_node(int nid, unsigned long zone_type, unsigned long node_start_pfn, unsigned long node_end_pfn, @@ -6617,13 +6611,12 @@ static inline unsigned long __init zone_absent_pages_in_node(int nid, return zholes_size[zone_type]; } -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ - static void __init calculate_node_totalpages(struct pglist_data *pgdat, unsigned long node_start_pfn, unsigned long node_end_pfn, unsigned long *zones_size, - unsigned long *zholes_size) + unsigned long *zholes_size, + bool compat) { unsigned long realtotalpages = 0, totalpages = 0; enum zone_type i; @@ -6631,17 +6624,38 @@ static void __init calculate_node_totalpages(struct pglist_data *pgdat, for (i = 0; i < MAX_NR_ZONES; i++) { struct zone *zone = pgdat->node_zones + i; unsigned long zone_start_pfn, zone_end_pfn; + unsigned long spanned, absent; unsigned long size, real_size; - size = zone_spanned_pages_in_node(pgdat->node_id, i, - node_start_pfn, - node_end_pfn, - &zone_start_pfn, - &zone_end_pfn, - zones_size); - real_size = size - zone_absent_pages_in_node(pgdat->node_id, i, - node_start_pfn, node_end_pfn, - zholes_size); + if (compat) { + spanned = compat_zone_spanned_pages_in_node( + pgdat->node_id, i, + node_start_pfn, + node_end_pfn, + &zone_start_pfn, + &zone_end_pfn, + zones_size); + absent = compat_zone_absent_pages_in_node( + pgdat->node_id, i, + node_start_pfn, + node_end_pfn, + zholes_size); + } else { + spanned = zone_spanned_pages_in_node(pgdat->node_id, i, + node_start_pfn, + node_end_pfn, + &zone_start_pfn, + &zone_end_pfn, + zones_size); + absent = zone_absent_pages_in_node(pgdat->node_id, i, + node_start_pfn, + node_end_pfn, + zholes_size); + } + + size = spanned; + real_size = size - absent; + if (size) zone->zone_start_pfn = zone_start_pfn; else @@ -6941,10 +6955,8 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat) */ if (pgdat == NODE_DATA(0)) { mem_map = NODE_DATA(0)->node_mem_map; -#if defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) || defined(CONFIG_FLATMEM) if (page_to_pfn(mem_map) != pgdat->node_start_pfn) mem_map -= offset; -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ } #endif } @@ -6961,9 +6973,10 @@ static inline void pgdat_set_deferred_range(pg_data_t *pgdat) static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {} #endif -void __init free_area_init_node(int nid, unsigned long *zones_size, - unsigned long node_start_pfn, - unsigned long *zholes_size) +static void __init __free_area_init_node(int nid, unsigned long *zones_size, + unsigned long node_start_pfn, + unsigned long *zholes_size, + bool compat) { pg_data_t *pgdat = NODE_DATA(nid); unsigned long start_pfn = 0; @@ -6975,16 +6988,16 @@ void __init free_area_init_node(int nid, unsigned long *zones_size, pgdat->node_id = nid; pgdat->node_start_pfn = node_start_pfn; pgdat->per_cpu_nodestats = NULL; -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP - get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); - pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid, - (u64)start_pfn << PAGE_SHIFT, - end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0); -#else - start_pfn = node_start_pfn; -#endif + if (!compat) { + get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); + pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid, + (u64)start_pfn << PAGE_SHIFT, + end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0); + } else { + start_pfn = node_start_pfn; + } calculate_node_totalpages(pgdat, start_pfn, end_pfn, - zones_size, zholes_size); + zones_size, zholes_size, compat); alloc_node_mem_map(pgdat); pgdat_set_deferred_range(pgdat); @@ -6992,6 +7005,14 @@ void __init free_area_init_node(int nid, unsigned long *zones_size, free_area_init_core(pgdat); } +void __init free_area_init_node(int nid, unsigned long *zones_size, + unsigned long node_start_pfn, + unsigned long *zholes_size) +{ + __free_area_init_node(nid, zones_size, node_start_pfn, zholes_size, + true); +} + #if !defined(CONFIG_FLAT_NODE_MEM_MAP) /* * Initialize all valid struct pages in the range [spfn, epfn) and mark them @@ -7075,8 +7096,6 @@ static inline void __init init_unavailable_mem(void) } #endif /* !CONFIG_FLAT_NODE_MEM_MAP */ -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP - #if MAX_NUMNODES > 1 /* * Figure out the number of possible node ids. @@ -7505,8 +7524,8 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) init_unavailable_mem(); for_each_online_node(nid) { pg_data_t *pgdat = NODE_DATA(nid); - free_area_init_node(nid, NULL, - find_min_pfn_for_node(nid), NULL); + __free_area_init_node(nid, NULL, + find_min_pfn_for_node(nid), NULL, false); /* Any memory on that node */ if (pgdat->node_present_pages) @@ -7571,8 +7590,6 @@ static int __init cmdline_parse_movablecore(char *p) early_param("kernelcore", cmdline_parse_kernelcore); early_param("movablecore", cmdline_parse_movablecore); -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ - void adjust_managed_page_count(struct page *page, long count) { atomic_long_add(count, &page_zone(page)->managed_pages); -- cgit v1.2.3 From 9691a071aa26a21fc8dac804a2b98d3c24f76f9a Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 3 Jun 2020 15:57:10 -0700 Subject: mm: use free_area_init() instead of free_area_init_nodes() free_area_init() has effectively became a wrapper for free_area_init_nodes() and there is no point of keeping it. Still free_area_init() name is shorter and more general as it does not imply necessity to initialize multiple nodes. Rename free_area_init_nodes() to free_area_init(), update the callers and drop old version of free_area_init(). Signed-off-by: Mike Rapoport Signed-off-by: Andrew Morton Tested-by: Hoan Tran [arm64] Reviewed-by: Baoquan He Acked-by: Catalin Marinas Cc: Brian Cain Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Ungerer Cc: Guan Xuetao Cc: Guo Ren Cc: Heiko Carstens Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Jonathan Corbet Cc: Ley Foon Tan Cc: Mark Salter Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Hocko Cc: Michal Simek Cc: Nick Hu Cc: Paul Walmsley Cc: Richard Weinberger Cc: Rich Felker Cc: Russell King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Tony Luck Cc: Vineet Gupta Cc: Yoshinori Sato Link: http://lkml.kernel.org/r/20200412194859.12663-6-rppt@kernel.org Signed-off-by: Linus Torvalds --- arch/arm64/mm/init.c | 2 +- arch/ia64/mm/contig.c | 2 +- arch/ia64/mm/discontig.c | 2 +- arch/microblaze/mm/init.c | 2 +- arch/mips/loongson64/numa.c | 2 +- arch/mips/mm/init.c | 2 +- arch/mips/sgi-ip27/ip27-memory.c | 2 +- arch/powerpc/mm/mem.c | 2 +- arch/riscv/mm/init.c | 2 +- arch/s390/mm/init.c | 2 +- arch/sh/mm/init.c | 2 +- arch/sparc/mm/init_64.c | 2 +- arch/x86/mm/init.c | 2 +- include/linux/mm.h | 7 +++---- mm/page_alloc.c | 10 ++-------- 15 files changed, 18 insertions(+), 25 deletions(-) (limited to 'arch/x86') diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index d2df416b840e..5dae97f2f628 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -206,7 +206,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) #endif max_zone_pfns[ZONE_NORMAL] = max; - free_area_init_nodes(max_zone_pfns); + free_area_init(max_zone_pfns); } #else diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index 5b00dc3898e1..8786fa5c7612 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -210,6 +210,6 @@ paging_init (void) printk("Virtual mem_map starts at 0x%p\n", mem_map); } #endif /* !CONFIG_VIRTUAL_MEM_MAP */ - free_area_init_nodes(max_zone_pfns); + free_area_init(max_zone_pfns); zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); } diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 4f33f6e7e206..dd8284bcbf16 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -627,7 +627,7 @@ void __init paging_init(void) max_zone_pfns[ZONE_DMA32] = max_dma; #endif max_zone_pfns[ZONE_NORMAL] = max_pfn; - free_area_init_nodes(max_zone_pfns); + free_area_init(max_zone_pfns); zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); } diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 1ffbfa96b9b8..dcaa53d11339 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -112,7 +112,7 @@ static void __init paging_init(void) #endif /* We don't have holes in memory map */ - free_area_init_nodes(zones_size); + free_area_init(zones_size); } void __init setup_memory(void) diff --git a/arch/mips/loongson64/numa.c b/arch/mips/loongson64/numa.c index 1ae072df4831..901f5be5ee76 100644 --- a/arch/mips/loongson64/numa.c +++ b/arch/mips/loongson64/numa.c @@ -247,7 +247,7 @@ void __init paging_init(void) zones_size[ZONE_DMA32] = MAX_DMA32_PFN; #endif zones_size[ZONE_NORMAL] = max_low_pfn; - free_area_init_nodes(zones_size); + free_area_init(zones_size); } void __init mem_init(void) diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 79684000de0e..19719e8b41a5 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -418,7 +418,7 @@ void __init paging_init(void) } #endif - free_area_init_nodes(max_zone_pfns); + free_area_init(max_zone_pfns); } #ifdef CONFIG_64BIT diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index a45691e6ab90..1213215ea965 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -419,7 +419,7 @@ void __init paging_init(void) pagetable_init(); zones_size[ZONE_NORMAL] = max_low_pfn; - free_area_init_nodes(zones_size); + free_area_init(zones_size); } void __init mem_init(void) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 041ed7cfd341..0fcea21f26b4 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -271,7 +271,7 @@ void __init paging_init(void) max_zone_pfns[ZONE_HIGHMEM] = max_pfn; #endif - free_area_init_nodes(max_zone_pfns); + free_area_init(max_zone_pfns); mark_nonram_nosave(); } diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 736de6c8739f..6168b1985b77 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -39,7 +39,7 @@ static void __init zone_sizes_init(void) #endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; - free_area_init_nodes(max_zone_pfns); + free_area_init(max_zone_pfns); } static void setup_zero_page(void) diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 87b2d024e75a..b11bcf4da531 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -122,7 +122,7 @@ void __init paging_init(void) memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS); max_zone_pfns[ZONE_NORMAL] = max_low_pfn; - free_area_init_nodes(max_zone_pfns); + free_area_init(max_zone_pfns); } void mark_rodata_ro(void) diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 8d2a68aea1fc..628f461b8993 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -334,7 +334,7 @@ void __init paging_init(void) memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); max_zone_pfns[ZONE_NORMAL] = max_low_pfn; - free_area_init_nodes(max_zone_pfns); + free_area_init(max_zone_pfns); } unsigned int mem_init_done = 0; diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 1cf0d666dea3..79d3c5e0802e 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2488,7 +2488,7 @@ void __init paging_init(void) max_zone_pfns[ZONE_NORMAL] = end_pfn; - free_area_init_nodes(max_zone_pfns); + free_area_init(max_zone_pfns); } printk("Booting Linux...\n"); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index a573a3e63f02..1decb645dac0 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -947,7 +947,7 @@ void __init zone_sizes_init(void) max_zone_pfns[ZONE_HIGHMEM] = max_pfn; #endif - free_area_init_nodes(max_zone_pfns); + free_area_init(max_zone_pfns); } __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { diff --git a/include/linux/mm.h b/include/linux/mm.h index 788704977de0..ff2c19e14c1e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2329,7 +2329,6 @@ static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud) } extern void __init pagecache_init(void); -extern void free_area_init(unsigned long * max_zone_pfn); extern void __init free_area_init_node(int nid, unsigned long * zones_size, unsigned long zone_start_pfn, unsigned long *zholes_size); extern void free_initmem(void); @@ -2410,21 +2409,21 @@ static inline unsigned long get_num_physpages(void) * * An architecture is expected to register range of page frames backed by * physical memory with memblock_add[_node]() before calling - * free_area_init_nodes() passing in the PFN each zone ends at. At a basic + * free_area_init() passing in the PFN each zone ends at. At a basic * usage, an architecture is expected to do something like * * unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn, * max_highmem_pfn}; * for_each_valid_physical_page_range() * memblock_add_node(base, size, nid) - * free_area_init_nodes(max_zone_pfns); + * free_area_init(max_zone_pfns); * * free_bootmem_with_active_regions() calls free_bootmem_node() for each * registered physical page range. Similarly * sparse_memory_present_with_active_regions() calls memory_present() for * each range when SPARSEMEM is enabled. */ -extern void free_area_init_nodes(unsigned long *max_zone_pfn); +void free_area_init(unsigned long *max_zone_pfn); unsigned long node_map_pfn_alignment(void); unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn, unsigned long end_pfn); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index cf420e947d9c..644a59d17318 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7440,7 +7440,7 @@ static void check_for_memory(pg_data_t *pgdat, int nid) } /** - * free_area_init_nodes - Initialise all pg_data_t and zone data + * free_area_init - Initialise all pg_data_t and zone data * @max_zone_pfn: an array of max PFNs for each zone * * This will call free_area_init_node() for each active node in the system. @@ -7452,7 +7452,7 @@ static void check_for_memory(pg_data_t *pgdat, int nid) * starts where the previous one ended. For example, ZONE_DMA32 starts * at arch_max_dma_pfn. */ -void __init free_area_init_nodes(unsigned long *max_zone_pfn) +void __init free_area_init(unsigned long *max_zone_pfn) { unsigned long start_pfn, end_pfn; int i, nid; @@ -7712,12 +7712,6 @@ void __init set_dma_reserve(unsigned long new_dma_reserve) dma_reserve = new_dma_reserve; } -void __init free_area_init(unsigned long *max_zone_pfn) -{ - init_unavailable_mem(); - free_area_init_nodes(max_zone_pfn); -} - static int page_alloc_cpu_dead(unsigned int cpu) { -- cgit v1.2.3 From acd3f5c441e9635857f02a7c21e7dd590dcf672e Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 3 Jun 2020 15:57:59 -0700 Subject: mm: remove early_pfn_in_nid() and CONFIG_NODES_SPAN_OTHER_NODES The memmap_init() function was made to iterate over memblock regions and as the result the early_pfn_in_nid() function became obsolete. Since CONFIG_NODES_SPAN_OTHER_NODES is only used to pick a stub or a real implementation of early_pfn_in_nid(), it is also not needed anymore. Remove both early_pfn_in_nid() and the CONFIG_NODES_SPAN_OTHER_NODES. Co-developed-by: Hoan Tran Signed-off-by: Hoan Tran Signed-off-by: Mike Rapoport Signed-off-by: Andrew Morton Tested-by: Hoan Tran [arm64] Cc: Baoquan He Cc: Brian Cain Cc: Catalin Marinas Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Ungerer Cc: Guan Xuetao Cc: Guo Ren Cc: Heiko Carstens Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Jonathan Corbet Cc: Ley Foon Tan Cc: Mark Salter Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Hocko Cc: Michal Simek Cc: Nick Hu Cc: Paul Walmsley Cc: Richard Weinberger Cc: Rich Felker Cc: Russell King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Tony Luck Cc: Vineet Gupta Cc: Yoshinori Sato Link: http://lkml.kernel.org/r/20200412194859.12663-17-rppt@kernel.org Signed-off-by: Linus Torvalds --- arch/powerpc/Kconfig | 9 --------- arch/sparc/Kconfig | 9 --------- arch/x86/Kconfig | 9 --------- mm/page_alloc.c | 20 -------------------- 4 files changed, 47 deletions(-) (limited to 'arch/x86') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 41ba42b107c0..a8eee7a64add 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -686,15 +686,6 @@ config ARCH_MEMORY_PROBE def_bool y depends on MEMORY_HOTPLUG -# Some NUMA nodes have memory ranges that span -# other nodes. Even though a pfn is valid and -# between a node's start and end pfns, it may not -# reside on that node. See memmap_init_zone() -# for details. -config NODES_SPAN_OTHER_NODES - def_bool y - depends on NEED_MULTIPLE_NODES - config STDBINUTILS bool "Using standard binutils settings" depends on 44x diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 795206b7b552..0e4f3891b904 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -286,15 +286,6 @@ config NODES_SHIFT Specify the maximum number of NUMA Nodes available on the target system. Increases memory reserved to accommodate various tables. -# Some NUMA nodes have memory ranges that span -# other nodes. Even though a pfn is valid and -# between a node's start and end pfns, it may not -# reside on that node. See memmap_init_zone() -# for details. -config NODES_SPAN_OTHER_NODES - def_bool y - depends on NEED_MULTIPLE_NODES - config ARCH_SPARSEMEM_ENABLE def_bool y if SPARC64 select SPARSEMEM_VMEMMAP_ENABLE diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c669328abf58..f3b910fe1d34 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1583,15 +1583,6 @@ config X86_64_ACPI_NUMA ---help--- Enable ACPI SRAT based node topology detection. -# Some NUMA nodes have memory ranges that span -# other nodes. Even though a pfn is valid and -# between a node's start and end pfns, it may not -# reside on that node. See memmap_init_zone() -# for details. -config NODES_SPAN_OTHER_NODES - def_bool y - depends on X86_64_ACPI_NUMA - config NUMA_EMU bool "NUMA emulation" depends on NUMA diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 40587d74cd1c..1f7eff7120d7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1541,26 +1541,6 @@ int __meminit early_pfn_to_nid(unsigned long pfn) } #endif /* CONFIG_NEED_MULTIPLE_NODES */ -#ifdef CONFIG_NODES_SPAN_OTHER_NODES -/* Only safe to use early in boot when initialisation is single-threaded */ -static inline bool __meminit early_pfn_in_nid(unsigned long pfn, int node) -{ - int nid; - - nid = __early_pfn_to_nid(pfn, &early_pfnnid_cache); - if (nid >= 0 && nid != node) - return false; - return true; -} - -#else -static inline bool __meminit early_pfn_in_nid(unsigned long pfn, int node) -{ - return true; -} -#endif - - void __init memblock_free_pages(struct page *page, unsigned long pfn, unsigned int order) { -- cgit v1.2.3 From bc9331a19d758706493cbebba67ca70382edddac Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 3 Jun 2020 15:58:09 -0700 Subject: mm: rename free_area_init_node() to free_area_init_memoryless_node() free_area_init_node() is only used by x86 to initialize a memory-less nodes. Make its name reflect this and drop all the function parameters except node ID as they are anyway zero. Signed-off-by: Mike Rapoport Signed-off-by: Andrew Morton Tested-by: Hoan Tran [arm64] Cc: Baoquan He Cc: Brian Cain Cc: Catalin Marinas Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Ungerer Cc: Guan Xuetao Cc: Guo Ren Cc: Heiko Carstens Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Jonathan Corbet Cc: Ley Foon Tan Cc: Mark Salter Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Hocko Cc: Michal Simek Cc: Nick Hu Cc: Paul Walmsley Cc: Richard Weinberger Cc: Rich Felker Cc: Russell King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Tony Luck Cc: Vineet Gupta Cc: Yoshinori Sato Link: http://lkml.kernel.org/r/20200412194859.12663-19-rppt@kernel.org Signed-off-by: Linus Torvalds --- arch/x86/mm/numa.c | 5 +---- include/linux/mm.h | 9 +++------ mm/page_alloc.c | 7 ++----- 3 files changed, 6 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index fe024b2ac796..8ee952038c80 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -737,12 +737,9 @@ void __init x86_numa_init(void) static void __init init_memory_less_node(int nid) { - unsigned long zones_size[MAX_NR_ZONES] = {0}; - unsigned long zholes_size[MAX_NR_ZONES] = {0}; - /* Allocate and initialize node data. Memory-less node is now online.*/ alloc_node_data(nid); - free_area_init_node(nid, zones_size, 0, zholes_size); + free_area_init_memoryless_node(nid); /* * All zonelists will be built later in start_kernel() after per cpu diff --git a/include/linux/mm.h b/include/linux/mm.h index 21cf171ae9de..0d998c84231c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2329,8 +2329,7 @@ static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud) } extern void __init pagecache_init(void); -extern void __init free_area_init_node(int nid, unsigned long * zones_size, - unsigned long zone_start_pfn, unsigned long *zholes_size); +extern void __init free_area_init_memoryless_node(int nid); extern void free_initmem(void); /* @@ -2402,10 +2401,8 @@ static inline unsigned long get_num_physpages(void) /* * Using memblock node mappings, an architecture may initialise its - * zones, allocate the backing mem_map and account for memory holes in a more - * architecture independent manner. This is a substitute for creating the - * zone_sizes[] and zholes_size[] arrays and passing them to - * free_area_init_node() + * zones, allocate the backing mem_map and account for memory holes in an + * architecture independent manner. * * An architecture is expected to register range of page frames backed by * physical memory with memblock_add[_node]() before calling diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 36d93c73f2bb..cc96ecbe52f7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6974,12 +6974,9 @@ static void __init __free_area_init_node(int nid, unsigned long *zones_size, free_area_init_core(pgdat); } -void __init free_area_init_node(int nid, unsigned long *zones_size, - unsigned long node_start_pfn, - unsigned long *zholes_size) +void __init free_area_init_memoryless_node(int nid) { - __free_area_init_node(nid, zones_size, node_start_pfn, zholes_size, - true); + __free_area_init_node(nid, NULL, 0, NULL, false); } #if !defined(CONFIG_FLAT_NODE_MEM_MAP) -- cgit v1.2.3 From ecd096506922332fdb36ff1211e03601befe6e18 Mon Sep 17 00:00:00 2001 From: Daniel Jordan Date: Wed, 3 Jun 2020 15:59:55 -0700 Subject: mm: make deferred init's max threads arch-specific Using padata during deferred init has only been tested on x86, so for now limit it to this architecture. If another arch wants this, it can find the max thread limit that's best for it and override deferred_page_init_max_threads(). Signed-off-by: Daniel Jordan Signed-off-by: Andrew Morton Tested-by: Josh Triplett Cc: Alexander Duyck Cc: Alex Williamson Cc: Dan Williams Cc: Dave Hansen Cc: David Hildenbrand Cc: Herbert Xu Cc: Jason Gunthorpe Cc: Jonathan Corbet Cc: Kirill Tkhai Cc: Michal Hocko Cc: Pavel Machek Cc: Pavel Tatashin Cc: Peter Zijlstra Cc: Randy Dunlap Cc: Robert Elliott Cc: Shile Zhang Cc: Steffen Klassert Cc: Steven Sistare Cc: Tejun Heo Cc: Zi Yan Link: http://lkml.kernel.org/r/20200527173608.2885243-8-daniel.m.jordan@oracle.com Signed-off-by: Linus Torvalds --- arch/x86/mm/init_64.c | 12 ++++++++++++ include/linux/memblock.h | 3 +++ mm/page_alloc.c | 13 ++++++++----- 3 files changed, 23 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 96274a90c5ff..e08f1007f776 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1265,6 +1265,18 @@ void __init mem_init(void) mem_init_print_info(NULL); } +#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT +int __init deferred_page_init_max_threads(const struct cpumask *node_cpumask) +{ + /* + * More CPUs always led to greater speedups on tested systems, up to + * all the nodes' CPUs. Use all since the system is otherwise idle + * now. + */ + return max_t(int, cpumask_weight(node_cpumask), 1); +} +#endif + int kernel_set_to_readonly; void mark_rodata_ro(void) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 45abfc54da37..807ab9daf0cd 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -273,6 +273,9 @@ void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, #define for_each_free_mem_pfn_range_in_zone_from(i, zone, p_start, p_end) \ for (; i != U64_MAX; \ __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end)) + +int __init deferred_page_init_max_threads(const struct cpumask *node_cpumask); + #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ /** diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 27ec5dc4db33..fb9dec1c1976 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1836,6 +1836,13 @@ deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn, } } +/* An arch may override for more concurrency. */ +__weak int __init +deferred_page_init_max_threads(const struct cpumask *node_cpumask) +{ + return 1; +} + /* Initialise remaining memory on a node */ static int __init deferred_init_memmap(void *data) { @@ -1884,11 +1891,7 @@ static int __init deferred_init_memmap(void *data) first_init_pfn)) goto zone_empty; - /* - * More CPUs always led to greater speedups on tested systems, up to - * all the nodes' CPUs. Use all since the system is otherwise idle now. - */ - max_threads = max(cpumask_weight(cpumask), 1u); + max_threads = deferred_page_init_max_threads(cpumask); while (spfn < epfn) { unsigned long epfn_align = ALIGN(epfn, PAGES_PER_SECTION); -- cgit v1.2.3 From ae94da898133947c2d1f005da10838478e4548db Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 3 Jun 2020 16:00:34 -0700 Subject: hugetlbfs: add arch_hugetlb_valid_size Patch series "Clean up hugetlb boot command line processing", v4. Longpeng(Mike) reported a weird message from hugetlb command line processing and proposed a solution [1]. While the proposed patch does address the specific issue, there are other related issues in command line processing. As hugetlbfs evolved, updates to command line processing have been made to meet immediate needs and not necessarily in a coordinated manner. The result is that some processing is done in arch specific code, some is done in arch independent code and coordination is problematic. Semantics can vary between architectures. The patch series does the following: - Define arch specific arch_hugetlb_valid_size routine used to validate passed huge page sizes. - Move hugepagesz= command line parsing out of arch specific code and into an arch independent routine. - Clean up command line processing to follow desired semantics and document those semantics. [1] https://lore.kernel.org/linux-mm/20200305033014.1152-1-longpeng2@huawei.com This patch (of 3): The architecture independent routine hugetlb_default_setup sets up the default huge pages size. It has no way to verify if the passed value is valid, so it accepts it and attempts to validate at a later time. This requires undocumented cooperation between the arch specific and arch independent code. For architectures that support more than one huge page size, provide a routine arch_hugetlb_valid_size to validate a huge page size. hugetlb_default_setup can use this to validate passed values. arch_hugetlb_valid_size will also be used in a subsequent patch to move processing of the "hugepagesz=" in arch specific code to a common routine in arch independent code. Signed-off-by: Mike Kravetz Signed-off-by: Andrew Morton Acked-by: Gerald Schaefer [s390] Acked-by: Will Deacon Cc: Catalin Marinas Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: David S. Miller Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Dave Hansen Cc: Jonathan Corbet Cc: Longpeng Cc: Christophe Leroy Cc: Randy Dunlap Cc: Mina Almasry Cc: Peter Xu Cc: Nitesh Narayan Lal Cc: Anders Roxell Cc: "Aneesh Kumar K.V" Cc: Qian Cai Cc: Stephen Rothwell Link: http://lkml.kernel.org/r/20200428205614.246260-1-mike.kravetz@oracle.com Link: http://lkml.kernel.org/r/20200428205614.246260-2-mike.kravetz@oracle.com Link: http://lkml.kernel.org/r/20200417185049.275845-1-mike.kravetz@oracle.com Link: http://lkml.kernel.org/r/20200417185049.275845-2-mike.kravetz@oracle.com Signed-off-by: Linus Torvalds --- arch/arm64/mm/hugetlbpage.c | 17 +++++++++++++---- arch/powerpc/mm/hugetlbpage.c | 20 +++++++++++++------- arch/riscv/mm/hugetlbpage.c | 26 +++++++++++++++++--------- arch/s390/mm/hugetlbpage.c | 16 ++++++++++++---- arch/sparc/mm/init_64.c | 24 ++++++++++++++++-------- arch/x86/mm/hugetlbpage.c | 17 +++++++++++++---- include/linux/hugetlb.h | 1 + mm/hugetlb.c | 21 ++++++++++++++++++--- 8 files changed, 103 insertions(+), 39 deletions(-) (limited to 'arch/x86') diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 0be3355e3499..2ac41cefe699 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -464,17 +464,26 @@ static int __init hugetlbpage_init(void) } arch_initcall(hugetlbpage_init); -static __init int setup_hugepagesz(char *opt) +bool __init arch_hugetlb_valid_size(unsigned long size) { - unsigned long ps = memparse(opt, &opt); - - switch (ps) { + switch (size) { #ifdef CONFIG_ARM64_4K_PAGES case PUD_SIZE: #endif case CONT_PMD_SIZE: case PMD_SIZE: case CONT_PTE_SIZE: + return true; + } + + return false; +} + +static __init int setup_hugepagesz(char *opt) +{ + unsigned long ps = memparse(opt, &opt); + + if (arch_hugetlb_valid_size(ps)) { add_huge_page_size(ps); return 1; } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 33b3461d91e8..de54d2a37830 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -558,7 +558,7 @@ unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) return vma_kernel_pagesize(vma); } -static int __init add_huge_page_size(unsigned long long size) +bool __init arch_hugetlb_valid_size(unsigned long size) { int shift = __ffs(size); int mmu_psize; @@ -566,20 +566,26 @@ static int __init add_huge_page_size(unsigned long long size) /* Check that it is a page size supported by the hardware and * that it fits within pagetable and slice limits. */ if (size <= PAGE_SIZE || !is_power_of_2(size)) - return -EINVAL; + return false; mmu_psize = check_and_get_huge_psize(shift); if (mmu_psize < 0) - return -EINVAL; + return false; BUG_ON(mmu_psize_defs[mmu_psize].shift != shift); - /* Return if huge page size has already been setup */ - if (size_to_hstate(size)) - return 0; + return true; +} - hugetlb_add_hstate(shift - PAGE_SHIFT); +static int __init add_huge_page_size(unsigned long long size) +{ + int shift = __ffs(size); + + if (!arch_hugetlb_valid_size((unsigned long)size)) + return -EINVAL; + if (!size_to_hstate(size)) + hugetlb_add_hstate(shift - PAGE_SHIFT); return 0; } diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index a6189ed36c5f..da1f516bc451 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -12,21 +12,29 @@ int pmd_huge(pmd_t pmd) return pmd_leaf(pmd); } +bool __init arch_hugetlb_valid_size(unsigned long size) +{ + if (size == HPAGE_SIZE) + return true; + else if (IS_ENABLED(CONFIG_64BIT) && size == PUD_SIZE) + return true; + else + return false; +} + static __init int setup_hugepagesz(char *opt) { unsigned long ps = memparse(opt, &opt); - if (ps == HPAGE_SIZE) { - hugetlb_add_hstate(HPAGE_SHIFT - PAGE_SHIFT); - } else if (IS_ENABLED(CONFIG_64BIT) && ps == PUD_SIZE) { - hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); - } else { - hugetlb_bad_size(); - pr_err("hugepagesz: Unsupported page size %lu M\n", ps >> 20); - return 0; + if (arch_hugetlb_valid_size(ps)) { + hugetlb_add_hstate(ilog2(ps) - PAGE_SHIFT); + return 1; } - return 1; + hugetlb_bad_size(); + pr_err("hugepagesz: Unsupported page size %lu M\n", ps >> 20); + return 0; + } __setup("hugepagesz=", setup_hugepagesz); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 4632d4e26b66..2f2b6b5b3d29 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -254,16 +254,24 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address, return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT); } +bool __init arch_hugetlb_valid_size(unsigned long size) +{ + if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) + return true; + else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) + return true; + else + return false; +} + static __init int setup_hugepagesz(char *opt) { unsigned long size; char *string = opt; size = memparse(opt, &opt); - if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) { - hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); - } else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) { - hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); + if (arch_hugetlb_valid_size(size)) { + hugetlb_add_hstate(ilog2(size) - PAGE_SHIFT); } else { hugetlb_bad_size(); pr_err("hugepagesz= specifies an unsupported page size %s\n", diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 79d3c5e0802e..24475b73657a 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -360,16 +360,11 @@ static void __init pud_huge_patch(void) __asm__ __volatile__("flush %0" : : "r" (addr)); } -static int __init setup_hugepagesz(char *string) +bool __init arch_hugetlb_valid_size(unsigned long size) { - unsigned long long hugepage_size; - unsigned int hugepage_shift; + unsigned int hugepage_shift = ilog2(size); unsigned short hv_pgsz_idx; unsigned int hv_pgsz_mask; - int rc = 0; - - hugepage_size = memparse(string, &string); - hugepage_shift = ilog2(hugepage_size); switch (hugepage_shift) { case HPAGE_16GB_SHIFT: @@ -397,7 +392,20 @@ static int __init setup_hugepagesz(char *string) hv_pgsz_mask = 0; } - if ((hv_pgsz_mask & cpu_pgsz_mask) == 0U) { + if ((hv_pgsz_mask & cpu_pgsz_mask) == 0U) + return false; + + return true; +} + +static int __init setup_hugepagesz(char *string) +{ + unsigned long long hugepage_size; + int rc = 0; + + hugepage_size = memparse(string, &string); + + if (!arch_hugetlb_valid_size((unsigned long)hugepage_size)) { hugetlb_bad_size(); pr_err("hugepagesz=%llu not supported by MMU.\n", hugepage_size); diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 5bfd5aef5378..1c4372bfe782 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -181,13 +181,22 @@ get_unmapped_area: #endif /* CONFIG_HUGETLB_PAGE */ #ifdef CONFIG_X86_64 +bool __init arch_hugetlb_valid_size(unsigned long size) +{ + if (size == PMD_SIZE) + return true; + else if (size == PUD_SIZE && boot_cpu_has(X86_FEATURE_GBPAGES)) + return true; + else + return false; +} + static __init int setup_hugepagesz(char *opt) { unsigned long ps = memparse(opt, &opt); - if (ps == PMD_SIZE) { - hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); - } else if (ps == PUD_SIZE && boot_cpu_has(X86_FEATURE_GBPAGES)) { - hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); + + if (arch_hugetlb_valid_size(ps)) { + hugetlb_add_hstate(ilog2(ps) - PAGE_SHIFT); } else { hugetlb_bad_size(); printk(KERN_ERR "hugepagesz: Unsupported page size %lu M\n", diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 43a1cef8f0f1..2eb15f5ab01e 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -521,6 +521,7 @@ int __init alloc_bootmem_huge_page(struct hstate *h); void __init hugetlb_bad_size(void); void __init hugetlb_add_hstate(unsigned order); +bool __init arch_hugetlb_valid_size(unsigned long size); struct hstate *size_to_hstate(unsigned long size); #ifndef HUGE_MAX_HSTATE diff --git a/mm/hugetlb.c b/mm/hugetlb.c index bcabbe02192b..63ca4241ea87 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3256,6 +3256,12 @@ static int __init hugetlb_init(void) } subsys_initcall(hugetlb_init); +/* Overwritten by architectures with more huge page sizes */ +bool __init __attribute((weak)) arch_hugetlb_valid_size(unsigned long size) +{ + return size == HPAGE_SIZE; +} + /* Should be called on processing a hugepagesz=... option */ void __init hugetlb_bad_size(void) { @@ -3331,12 +3337,21 @@ static int __init hugetlb_nrpages_setup(char *s) } __setup("hugepages=", hugetlb_nrpages_setup); -static int __init hugetlb_default_setup(char *s) +static int __init default_hugepagesz_setup(char *s) { - default_hstate_size = memparse(s, &s); + unsigned long size; + + size = (unsigned long)memparse(s, NULL); + + if (!arch_hugetlb_valid_size(size)) { + pr_err("HugeTLB: unsupported default_hugepagesz %s\n", s); + return 0; + } + + default_hstate_size = size; return 1; } -__setup("default_hugepagesz=", hugetlb_default_setup); +__setup("default_hugepagesz=", default_hugepagesz_setup); static unsigned int cpuset_mems_nr(unsigned int *array) { -- cgit v1.2.3 From 359f25443a8dada0fb709dd044a422017031790f Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 3 Jun 2020 16:00:38 -0700 Subject: hugetlbfs: move hugepagesz= parsing to arch independent code Now that architectures provide arch_hugetlb_valid_size(), parsing of "hugepagesz=" can be done in architecture independent code. Create a single routine to handle hugepagesz= parsing and remove all arch specific routines. We can also remove the interface hugetlb_bad_size() as this is no longer used outside arch independent code. This also provides consistent behavior of hugetlbfs command line options. The hugepagesz= option should only be specified once for a specific size, but some architectures allow multiple instances. This appears to be more of an oversight when code was added by some architectures to set up ALL huge pages sizes. Signed-off-by: Mike Kravetz Signed-off-by: Andrew Morton Tested-by: Sandipan Das Reviewed-by: Peter Xu Acked-by: Mina Almasry Acked-by: Gerald Schaefer [s390] Acked-by: Will Deacon Cc: Albert Ou Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Dave Hansen Cc: David S. Miller Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Longpeng Cc: Nitesh Narayan Lal Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Randy Dunlap Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Anders Roxell Cc: "Aneesh Kumar K.V" Cc: Qian Cai Cc: Stephen Rothwell Link: http://lkml.kernel.org/r/20200417185049.275845-3-mike.kravetz@oracle.com Link: http://lkml.kernel.org/r/20200428205614.246260-3-mike.kravetz@oracle.com Signed-off-by: Linus Torvalds --- arch/arm64/mm/hugetlbpage.c | 15 --------------- arch/powerpc/mm/hugetlbpage.c | 15 --------------- arch/riscv/mm/hugetlbpage.c | 16 ---------------- arch/s390/mm/hugetlbpage.c | 18 ------------------ arch/sparc/mm/init_64.c | 22 ---------------------- arch/x86/mm/hugetlbpage.c | 16 ---------------- include/linux/hugetlb.h | 1 - mm/hugetlb.c | 23 +++++++++++++++++------ 8 files changed, 17 insertions(+), 109 deletions(-) (limited to 'arch/x86') diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 2ac41cefe699..d6cb9fe71b44 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -478,18 +478,3 @@ bool __init arch_hugetlb_valid_size(unsigned long size) return false; } - -static __init int setup_hugepagesz(char *opt) -{ - unsigned long ps = memparse(opt, &opt); - - if (arch_hugetlb_valid_size(ps)) { - add_huge_page_size(ps); - return 1; - } - - hugetlb_bad_size(); - pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10); - return 0; -} -__setup("hugepagesz=", setup_hugepagesz); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index de54d2a37830..2c3fa0a7787b 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -589,21 +589,6 @@ static int __init add_huge_page_size(unsigned long long size) return 0; } -static int __init hugepage_setup_sz(char *str) -{ - unsigned long long size; - - size = memparse(str, &str); - - if (add_huge_page_size(size) != 0) { - hugetlb_bad_size(); - pr_err("Invalid huge page size specified(%llu)\n", size); - } - - return 1; -} -__setup("hugepagesz=", hugepage_setup_sz); - static int __init hugetlbpage_init(void) { bool configured = false; diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index da1f516bc451..4e5d7e9f0eef 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -22,22 +22,6 @@ bool __init arch_hugetlb_valid_size(unsigned long size) return false; } -static __init int setup_hugepagesz(char *opt) -{ - unsigned long ps = memparse(opt, &opt); - - if (arch_hugetlb_valid_size(ps)) { - hugetlb_add_hstate(ilog2(ps) - PAGE_SHIFT); - return 1; - } - - hugetlb_bad_size(); - pr_err("hugepagesz: Unsupported page size %lu M\n", ps >> 20); - return 0; - -} -__setup("hugepagesz=", setup_hugepagesz); - #ifdef CONFIG_CONTIG_ALLOC static __init int gigantic_pages_init(void) { diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 2f2b6b5b3d29..82df06d720e8 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -264,24 +264,6 @@ bool __init arch_hugetlb_valid_size(unsigned long size) return false; } -static __init int setup_hugepagesz(char *opt) -{ - unsigned long size; - char *string = opt; - - size = memparse(opt, &opt); - if (arch_hugetlb_valid_size(size)) { - hugetlb_add_hstate(ilog2(size) - PAGE_SHIFT); - } else { - hugetlb_bad_size(); - pr_err("hugepagesz= specifies an unsupported page size %s\n", - string); - return 0; - } - return 1; -} -__setup("hugepagesz=", setup_hugepagesz); - static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 24475b73657a..1b1f1ac1869e 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -397,28 +397,6 @@ bool __init arch_hugetlb_valid_size(unsigned long size) return true; } - -static int __init setup_hugepagesz(char *string) -{ - unsigned long long hugepage_size; - int rc = 0; - - hugepage_size = memparse(string, &string); - - if (!arch_hugetlb_valid_size((unsigned long)hugepage_size)) { - hugetlb_bad_size(); - pr_err("hugepagesz=%llu not supported by MMU.\n", - hugepage_size); - goto out; - } - - add_huge_page_size(hugepage_size); - rc = 1; - -out: - return rc; -} -__setup("hugepagesz=", setup_hugepagesz); #endif /* CONFIG_HUGETLB_PAGE */ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 1c4372bfe782..937d640a89e3 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -191,22 +191,6 @@ bool __init arch_hugetlb_valid_size(unsigned long size) return false; } -static __init int setup_hugepagesz(char *opt) -{ - unsigned long ps = memparse(opt, &opt); - - if (arch_hugetlb_valid_size(ps)) { - hugetlb_add_hstate(ilog2(ps) - PAGE_SHIFT); - } else { - hugetlb_bad_size(); - printk(KERN_ERR "hugepagesz: Unsupported page size %lu M\n", - ps >> 20); - return 0; - } - return 1; -} -__setup("hugepagesz=", setup_hugepagesz); - #ifdef CONFIG_CONTIG_ALLOC static __init int gigantic_pages_init(void) { diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 2eb15f5ab01e..0c13706054ef 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -519,7 +519,6 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping, int __init __alloc_bootmem_huge_page(struct hstate *h); int __init alloc_bootmem_huge_page(struct hstate *h); -void __init hugetlb_bad_size(void); void __init hugetlb_add_hstate(unsigned order); bool __init arch_hugetlb_valid_size(unsigned long size); struct hstate *size_to_hstate(unsigned long size); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 63ca4241ea87..6a8454bc2917 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3262,12 +3262,6 @@ bool __init __attribute((weak)) arch_hugetlb_valid_size(unsigned long size) return size == HPAGE_SIZE; } -/* Should be called on processing a hugepagesz=... option */ -void __init hugetlb_bad_size(void) -{ - parsed_valid_hugepagesz = false; -} - void __init hugetlb_add_hstate(unsigned int order) { struct hstate *h; @@ -3337,6 +3331,23 @@ static int __init hugetlb_nrpages_setup(char *s) } __setup("hugepages=", hugetlb_nrpages_setup); +static int __init hugepagesz_setup(char *s) +{ + unsigned long size; + + size = (unsigned long)memparse(s, NULL); + + if (!arch_hugetlb_valid_size(size)) { + parsed_valid_hugepagesz = false; + pr_err("HugeTLB: unsupported hugepagesz %s\n", s); + return 0; + } + + hugetlb_add_hstate(ilog2(size) - PAGE_SHIFT); + return 1; +} +__setup("hugepagesz=", hugepagesz_setup); + static int __init default_hugepagesz_setup(char *s) { unsigned long size; -- cgit v1.2.3 From 38237830882ba8d425a397066982d5e32b4ced21 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 3 Jun 2020 16:00:42 -0700 Subject: hugetlbfs: remove hugetlb_add_hstate() warning for existing hstate hugetlb_add_hstate() prints a warning if the hstate already exists. This was originally done as part of kernel command line parsing. If 'hugepagesz=' was specified more than once, the warning pr_warn("hugepagesz= specified twice, ignoring\n"); would be printed. Some architectures want to enable all huge page sizes. They would call hugetlb_add_hstate for all supported sizes. However, this was done after command line processing and as a result hstates could have already been created for some sizes. To make sure no warning were printed, there would often be code like: if (!size_to_hstate(size) hugetlb_add_hstate(ilog2(size) - PAGE_SHIFT) The only time we want to print the warning is as the result of command line processing. So, remove the warning from hugetlb_add_hstate and add it to the single arch independent routine processing "hugepagesz=". After this, calls to size_to_hstate() in arch specific code can be removed and hugetlb_add_hstate can be called without worrying about warning messages. [mike.kravetz@oracle.com: fix hugetlb initialization] Link: http://lkml.kernel.org/r/4c36c6ce-3774-78fa-abc4-b7346bf24348@oracle.com Link: http://lkml.kernel.org/r/20200428205614.246260-5-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Signed-off-by: Andrew Morton Tested-by: Anders Roxell Acked-by: Mina Almasry Acked-by: Gerald Schaefer [s390] Acked-by: Will Deacon Cc: Albert Ou Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Dave Hansen Cc: David S. Miller Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Longpeng Cc: Nitesh Narayan Lal Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Peter Xu Cc: Randy Dunlap Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: "Aneesh Kumar K.V" Cc: Qian Cai Cc: Stephen Rothwell Link: http://lkml.kernel.org/r/20200417185049.275845-4-mike.kravetz@oracle.com Link: http://lkml.kernel.org/r/20200428205614.246260-4-mike.kravetz@oracle.com Signed-off-by: Linus Torvalds --- arch/arm64/mm/hugetlbpage.c | 16 ++++------------ arch/powerpc/mm/hugetlbpage.c | 3 +-- arch/riscv/mm/hugetlbpage.c | 2 +- arch/sparc/mm/init_64.c | 19 ++++--------------- arch/x86/mm/hugetlbpage.c | 2 +- mm/hugetlb.c | 9 ++++++--- 6 files changed, 17 insertions(+), 34 deletions(-) (limited to 'arch/x86') diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index d6cb9fe71b44..07f154b8b84a 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -443,22 +443,14 @@ void huge_ptep_clear_flush(struct vm_area_struct *vma, clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig); } -static void __init add_huge_page_size(unsigned long size) -{ - if (size_to_hstate(size)) - return; - - hugetlb_add_hstate(ilog2(size) - PAGE_SHIFT); -} - static int __init hugetlbpage_init(void) { #ifdef CONFIG_ARM64_4K_PAGES - add_huge_page_size(PUD_SIZE); + hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); #endif - add_huge_page_size(CONT_PMD_SIZE); - add_huge_page_size(PMD_SIZE); - add_huge_page_size(CONT_PTE_SIZE); + hugetlb_add_hstate((CONT_PMD_SHIFT + PMD_SHIFT) - PAGE_SHIFT); + hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); + hugetlb_add_hstate((CONT_PTE_SHIFT + PAGE_SHIFT) - PAGE_SHIFT); return 0; } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 2c3fa0a7787b..4d5ed1093615 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -584,8 +584,7 @@ static int __init add_huge_page_size(unsigned long long size) if (!arch_hugetlb_valid_size((unsigned long)size)) return -EINVAL; - if (!size_to_hstate(size)) - hugetlb_add_hstate(shift - PAGE_SHIFT); + hugetlb_add_hstate(shift - PAGE_SHIFT); return 0; } diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index 4e5d7e9f0eef..932dadfdca54 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -26,7 +26,7 @@ bool __init arch_hugetlb_valid_size(unsigned long size) static __init int gigantic_pages_init(void) { /* With CONTIG_ALLOC, we can allocate gigantic pages at runtime */ - if (IS_ENABLED(CONFIG_64BIT) && !size_to_hstate(1UL << PUD_SHIFT)) + if (IS_ENABLED(CONFIG_64BIT)) hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); return 0; } diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 1b1f1ac1869e..5774529ceb43 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -325,23 +325,12 @@ static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_inde } #ifdef CONFIG_HUGETLB_PAGE -static void __init add_huge_page_size(unsigned long size) -{ - unsigned int order; - - if (size_to_hstate(size)) - return; - - order = ilog2(size) - PAGE_SHIFT; - hugetlb_add_hstate(order); -} - static int __init hugetlbpage_init(void) { - add_huge_page_size(1UL << HPAGE_64K_SHIFT); - add_huge_page_size(1UL << HPAGE_SHIFT); - add_huge_page_size(1UL << HPAGE_256MB_SHIFT); - add_huge_page_size(1UL << HPAGE_2GB_SHIFT); + hugetlb_add_hstate(HPAGE_64K_SHIFT - PAGE_SHIFT); + hugetlb_add_hstate(HPAGE_SHIFT - PAGE_SHIFT); + hugetlb_add_hstate(HPAGE_256MB_SHIFT - PAGE_SHIFT); + hugetlb_add_hstate(HPAGE_2GB_SHIFT - PAGE_SHIFT); return 0; } diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 937d640a89e3..cf5781142716 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -195,7 +195,7 @@ bool __init arch_hugetlb_valid_size(unsigned long size) static __init int gigantic_pages_init(void) { /* With compaction or CMA we can allocate gigantic pages at runtime */ - if (boot_cpu_has(X86_FEATURE_GBPAGES) && !size_to_hstate(1UL << PUD_SHIFT)) + if (boot_cpu_has(X86_FEATURE_GBPAGES)) hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); return 0; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 6a8454bc2917..2ae0e506cfc7 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3222,8 +3222,7 @@ static int __init hugetlb_init(void) } default_hstate_size = HPAGE_SIZE; - if (!size_to_hstate(default_hstate_size)) - hugetlb_add_hstate(HUGETLB_PAGE_ORDER); + hugetlb_add_hstate(HUGETLB_PAGE_ORDER); } default_hstate_idx = hstate_index(size_to_hstate(default_hstate_size)); if (default_hstate_max_huge_pages) { @@ -3268,7 +3267,6 @@ void __init hugetlb_add_hstate(unsigned int order) unsigned long i; if (size_to_hstate(PAGE_SIZE << order)) { - pr_warn("hugepagesz= specified twice, ignoring\n"); return; } BUG_ON(hugetlb_max_hstate >= HUGE_MAX_HSTATE); @@ -3343,6 +3341,11 @@ static int __init hugepagesz_setup(char *s) return 0; } + if (size_to_hstate(size)) { + pr_warn("HugeTLB: hugepagesz %s specified twice, ignoring\n", s); + return 0; + } + hugetlb_add_hstate(ilog2(size) - PAGE_SHIFT); return 1; } -- cgit v1.2.3 From b0eae98c66fe4ccac3a5a79a1479c057f2c7cdd7 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 3 Jun 2020 16:01:01 -0700 Subject: mm/hugetlb: define a generic fallback for is_hugepage_only_range() There are multiple similar definitions for is_hugepage_only_range() on various platforms. Lets just add it's generic fallback definition for platforms that do not override. This help reduce code duplication. Signed-off-by: Anshuman Khandual Signed-off-by: Andrew Morton Acked-by: Mike Kravetz Cc: Russell King Cc: Catalin Marinas Cc: Will Deacon Cc: Tony Luck Cc: Fenghua Yu Cc: Thomas Bogendoerfer Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Yoshinori Sato Cc: Rich Felker Cc: "David S. Miller" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Link: http://lkml.kernel.org/r/1588907271-11920-3-git-send-email-anshuman.khandual@arm.com Signed-off-by: Linus Torvalds --- arch/arm/include/asm/hugetlb.h | 6 ------ arch/arm64/include/asm/hugetlb.h | 6 ------ arch/ia64/include/asm/hugetlb.h | 1 + arch/mips/include/asm/hugetlb.h | 7 ------- arch/parisc/include/asm/hugetlb.h | 6 ------ arch/powerpc/include/asm/hugetlb.h | 1 + arch/riscv/include/asm/hugetlb.h | 6 ------ arch/s390/include/asm/hugetlb.h | 7 ------- arch/sh/include/asm/hugetlb.h | 6 ------ arch/sparc/include/asm/hugetlb.h | 6 ------ arch/x86/include/asm/hugetlb.h | 6 ------ include/linux/hugetlb.h | 9 +++++++++ 12 files changed, 11 insertions(+), 56 deletions(-) (limited to 'arch/x86') diff --git a/arch/arm/include/asm/hugetlb.h b/arch/arm/include/asm/hugetlb.h index 318dcf5921ab..9ecd516d1ff7 100644 --- a/arch/arm/include/asm/hugetlb.h +++ b/arch/arm/include/asm/hugetlb.h @@ -14,12 +14,6 @@ #include #include -static inline int is_hugepage_only_range(struct mm_struct *mm, - unsigned long addr, unsigned long len) -{ - return 0; -} - static inline void arch_clear_hugepage_flags(struct page *page) { clear_bit(PG_dcache_clean, &page->flags); diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index b88878ddc88b..8f58e052697a 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -17,12 +17,6 @@ extern bool arch_hugetlb_migration_supported(struct hstate *h); #endif -static inline int is_hugepage_only_range(struct mm_struct *mm, - unsigned long addr, unsigned long len) -{ - return 0; -} - static inline void arch_clear_hugepage_flags(struct page *page) { clear_bit(PG_dcache_clean, &page->flags); diff --git a/arch/ia64/include/asm/hugetlb.h b/arch/ia64/include/asm/hugetlb.h index 36cc0396b214..6ef50b9a4bdf 100644 --- a/arch/ia64/include/asm/hugetlb.h +++ b/arch/ia64/include/asm/hugetlb.h @@ -20,6 +20,7 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, return (REGION_NUMBER(addr) == RGN_HPAGE || REGION_NUMBER((addr)+(len)-1) == RGN_HPAGE); } +#define is_hugepage_only_range is_hugepage_only_range #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h index 425bb6fc3bda..8b201e281f67 100644 --- a/arch/mips/include/asm/hugetlb.h +++ b/arch/mips/include/asm/hugetlb.h @@ -11,13 +11,6 @@ #include -static inline int is_hugepage_only_range(struct mm_struct *mm, - unsigned long addr, - unsigned long len) -{ - return 0; -} - #define __HAVE_ARCH_PREPARE_HUGEPAGE_RANGE static inline int prepare_hugepage_range(struct file *file, unsigned long addr, diff --git a/arch/parisc/include/asm/hugetlb.h b/arch/parisc/include/asm/hugetlb.h index 7cb595dcb7d7..411d9d867baa 100644 --- a/arch/parisc/include/asm/hugetlb.h +++ b/arch/parisc/include/asm/hugetlb.h @@ -12,12 +12,6 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); -static inline int is_hugepage_only_range(struct mm_struct *mm, - unsigned long addr, - unsigned long len) { - return 0; -} - /* * If the arch doesn't supply something else, assume that hugepage * size aligned regions are ok without further preparation. diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index bd6504c28c2f..b167c869d72d 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -30,6 +30,7 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, return slice_is_hugepage_only_range(mm, addr, len); return 0; } +#define is_hugepage_only_range is_hugepage_only_range #define __HAVE_ARCH_HUGETLB_FREE_PGD_RANGE void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h index 728a5db66597..866f6ae6467c 100644 --- a/arch/riscv/include/asm/hugetlb.h +++ b/arch/riscv/include/asm/hugetlb.h @@ -5,12 +5,6 @@ #include #include -static inline int is_hugepage_only_range(struct mm_struct *mm, - unsigned long addr, - unsigned long len) { - return 0; -} - static inline void arch_clear_hugepage_flags(struct page *page) { } diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index de8f0bf5f238..7d27ea96ec2f 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -21,13 +21,6 @@ pte_t huge_ptep_get(pte_t *ptep); pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); -static inline bool is_hugepage_only_range(struct mm_struct *mm, - unsigned long addr, - unsigned long len) -{ - return false; -} - /* * If the arch doesn't supply something else, assume that hugepage * size aligned regions are ok without further preparation. diff --git a/arch/sh/include/asm/hugetlb.h b/arch/sh/include/asm/hugetlb.h index 6f025fe18146..536ad2cb8aa4 100644 --- a/arch/sh/include/asm/hugetlb.h +++ b/arch/sh/include/asm/hugetlb.h @@ -5,12 +5,6 @@ #include #include -static inline int is_hugepage_only_range(struct mm_struct *mm, - unsigned long addr, - unsigned long len) { - return 0; -} - /* * If the arch doesn't supply something else, assume that hugepage * size aligned regions are ok without further preparation. diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h index 3963f80d1cb3..a056fe1119f5 100644 --- a/arch/sparc/include/asm/hugetlb.h +++ b/arch/sparc/include/asm/hugetlb.h @@ -20,12 +20,6 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); -static inline int is_hugepage_only_range(struct mm_struct *mm, - unsigned long addr, - unsigned long len) { - return 0; -} - #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index f65cfb48cfdd..cc98f79074d0 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h @@ -7,12 +7,6 @@ #define hugepages_supported() boot_cpu_has(X86_FEATURE_PSE) -static inline int is_hugepage_only_range(struct mm_struct *mm, - unsigned long addr, - unsigned long len) { - return 0; -} - static inline void arch_clear_hugepage_flags(struct page *page) { } diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 0c13706054ef..bf97b17ab206 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -591,6 +591,15 @@ static inline unsigned int blocks_per_huge_page(struct hstate *h) #include +#ifndef is_hugepage_only_range +static inline int is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, unsigned long len) +{ + return 0; +} +#define is_hugepage_only_range is_hugepage_only_range +#endif + #ifndef arch_make_huge_pte static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, struct page *page, int writable) -- cgit v1.2.3 From 5be993432821750f382809df5e20bf4c129b24f7 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 3 Jun 2020 16:01:05 -0700 Subject: mm/hugetlb: define a generic fallback for arch_clear_hugepage_flags() There are multiple similar definitions for arch_clear_hugepage_flags() on various platforms. Lets just add it's generic fallback definition for platforms that do not override. This help reduce code duplication. Signed-off-by: Anshuman Khandual Signed-off-by: Andrew Morton Acked-by: Mike Kravetz Cc: Russell King Cc: Catalin Marinas Cc: Will Deacon Cc: Tony Luck Cc: Fenghua Yu Cc: Thomas Bogendoerfer Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Yoshinori Sato Cc: Rich Felker Cc: "David S. Miller" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Link: http://lkml.kernel.org/r/1588907271-11920-4-git-send-email-anshuman.khandual@arm.com Signed-off-by: Linus Torvalds --- arch/arm/include/asm/hugetlb.h | 1 + arch/arm64/include/asm/hugetlb.h | 1 + arch/ia64/include/asm/hugetlb.h | 4 ---- arch/mips/include/asm/hugetlb.h | 4 ---- arch/parisc/include/asm/hugetlb.h | 4 ---- arch/powerpc/include/asm/hugetlb.h | 4 ---- arch/riscv/include/asm/hugetlb.h | 4 ---- arch/s390/include/asm/hugetlb.h | 1 + arch/sh/include/asm/hugetlb.h | 1 + arch/sparc/include/asm/hugetlb.h | 4 ---- arch/x86/include/asm/hugetlb.h | 4 ---- include/linux/hugetlb.h | 5 +++++ 12 files changed, 9 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/arm/include/asm/hugetlb.h b/arch/arm/include/asm/hugetlb.h index 9ecd516d1ff7..d02d6ca88e92 100644 --- a/arch/arm/include/asm/hugetlb.h +++ b/arch/arm/include/asm/hugetlb.h @@ -18,5 +18,6 @@ static inline void arch_clear_hugepage_flags(struct page *page) { clear_bit(PG_dcache_clean, &page->flags); } +#define arch_clear_hugepage_flags arch_clear_hugepage_flags #endif /* _ASM_ARM_HUGETLB_H */ diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index 8f58e052697a..94ba0c5bced2 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -21,6 +21,7 @@ static inline void arch_clear_hugepage_flags(struct page *page) { clear_bit(PG_dcache_clean, &page->flags); } +#define arch_clear_hugepage_flags arch_clear_hugepage_flags extern pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, struct page *page, int writable); diff --git a/arch/ia64/include/asm/hugetlb.h b/arch/ia64/include/asm/hugetlb.h index 6ef50b9a4bdf..7e46ebde8c0c 100644 --- a/arch/ia64/include/asm/hugetlb.h +++ b/arch/ia64/include/asm/hugetlb.h @@ -28,10 +28,6 @@ static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, { } -static inline void arch_clear_hugepage_flags(struct page *page) -{ -} - #include #endif /* _ASM_IA64_HUGETLB_H */ diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h index 8b201e281f67..10e3be870df7 100644 --- a/arch/mips/include/asm/hugetlb.h +++ b/arch/mips/include/asm/hugetlb.h @@ -75,10 +75,6 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, return changed; } -static inline void arch_clear_hugepage_flags(struct page *page) -{ -} - #include #endif /* __ASM_HUGETLB_H */ diff --git a/arch/parisc/include/asm/hugetlb.h b/arch/parisc/include/asm/hugetlb.h index 411d9d867baa..a69cf9efb0c1 100644 --- a/arch/parisc/include/asm/hugetlb.h +++ b/arch/parisc/include/asm/hugetlb.h @@ -42,10 +42,6 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty); -static inline void arch_clear_hugepage_flags(struct page *page) -{ -} - #include #endif /* _ASM_PARISC64_HUGETLB_H */ diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index b167c869d72d..e6dfa63da552 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -61,10 +61,6 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty); -static inline void arch_clear_hugepage_flags(struct page *page) -{ -} - #include #else /* ! CONFIG_HUGETLB_PAGE */ diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h index 866f6ae6467c..a5c2ca1d1cd8 100644 --- a/arch/riscv/include/asm/hugetlb.h +++ b/arch/riscv/include/asm/hugetlb.h @@ -5,8 +5,4 @@ #include #include -static inline void arch_clear_hugepage_flags(struct page *page) -{ -} - #endif /* _ASM_RISCV_HUGETLB_H */ diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index 7d27ea96ec2f..9ddf4a43a590 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -39,6 +39,7 @@ static inline void arch_clear_hugepage_flags(struct page *page) { clear_bit(PG_arch_1, &page->flags); } +#define arch_clear_hugepage_flags arch_clear_hugepage_flags static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long sz) diff --git a/arch/sh/include/asm/hugetlb.h b/arch/sh/include/asm/hugetlb.h index 536ad2cb8aa4..ae4de7b89210 100644 --- a/arch/sh/include/asm/hugetlb.h +++ b/arch/sh/include/asm/hugetlb.h @@ -30,6 +30,7 @@ static inline void arch_clear_hugepage_flags(struct page *page) { clear_bit(PG_dcache_clean, &page->flags); } +#define arch_clear_hugepage_flags arch_clear_hugepage_flags #include diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h index a056fe1119f5..53838a173f62 100644 --- a/arch/sparc/include/asm/hugetlb.h +++ b/arch/sparc/include/asm/hugetlb.h @@ -47,10 +47,6 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, return changed; } -static inline void arch_clear_hugepage_flags(struct page *page) -{ -} - #define __HAVE_ARCH_HUGETLB_FREE_PGD_RANGE void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index cc98f79074d0..1721b1aadeb1 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h @@ -7,8 +7,4 @@ #define hugepages_supported() boot_cpu_has(X86_FEATURE_PSE) -static inline void arch_clear_hugepage_flags(struct page *page) -{ -} - #endif /* _ASM_X86_HUGETLB_H */ diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index bf97b17ab206..2e66b71c1236 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -600,6 +600,11 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, #define is_hugepage_only_range is_hugepage_only_range #endif +#ifndef arch_clear_hugepage_flags +static inline void arch_clear_hugepage_flags(struct page *page) { } +#define arch_clear_hugepage_flags arch_clear_hugepage_flags +#endif + #ifndef arch_make_huge_pte static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, struct page *page, int writable) -- cgit v1.2.3 From 86ec2da037b85436b63afe3df43ed48fa0e52b0e Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 3 Jun 2020 16:03:45 -0700 Subject: mm/thp: rename pmd_mknotpresent() as pmd_mkinvalid() pmd_present() is expected to test positive after pmdp_mknotpresent() as the PMD entry still points to a valid huge page in memory. pmdp_mknotpresent() implies that given PMD entry is just invalidated from MMU perspective while still holding on to pmd_page() referred valid huge page thus also clearing pmd_present() test. This creates the following situation which is counter intuitive. [pmd_present(pmd_mknotpresent(pmd)) = true] This renames pmd_mknotpresent() as pmd_mkinvalid() reflecting the helper's functionality more accurately while changing the above mentioned situation as follows. This does not create any functional change. [pmd_present(pmd_mkinvalid(pmd)) = true] This is not applicable for platforms that define own pmdp_invalidate() via __HAVE_ARCH_PMDP_INVALIDATE. Suggestion for renaming came during a previous discussion here. https://patchwork.kernel.org/patch/11019637/ [anshuman.khandual@arm.com: change pmd_mknotvalid() to pmd_mkinvalid() per Will] Link: http://lkml.kernel.org/r/1587520326-10099-3-git-send-email-anshuman.khandual@arm.com Suggested-by: Catalin Marinas Signed-off-by: Anshuman Khandual Signed-off-by: Andrew Morton Acked-by: Will Deacon Cc: Vineet Gupta Cc: Russell King Cc: Catalin Marinas Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Steven Rostedt Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Michael Ellerman Cc: Paul Mackerras Link: http://lkml.kernel.org/r/1584680057-13753-3-git-send-email-anshuman.khandual@arm.com Signed-off-by: Linus Torvalds --- arch/arc/include/asm/hugepage.h | 2 +- arch/arm/include/asm/pgtable-3level.h | 2 +- arch/arm64/include/asm/pgtable.h | 2 +- arch/mips/include/asm/pgtable.h | 2 +- arch/x86/include/asm/pgtable.h | 2 +- arch/x86/mm/kmmio.c | 2 +- mm/pgtable-generic.c | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h index 30ac40fed2c5..4eef17c5c1da 100644 --- a/arch/arc/include/asm/hugepage.h +++ b/arch/arc/include/asm/hugepage.h @@ -26,7 +26,7 @@ static inline pmd_t pte_pmd(pte_t pte) #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) #define pmd_mkhuge(pmd) pte_pmd(pte_mkhuge(pmd_pte(pmd))) -#define pmd_mknotpresent(pmd) pte_pmd(pte_mknotpresent(pmd_pte(pmd))) +#define pmd_mkinvalid(pmd) pte_pmd(pte_mknotpresent(pmd_pte(pmd))) #define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) #define pmd_write(pmd) pte_write(pmd_pte(pmd)) diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 36805f94939e..1933aed9f68d 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -221,7 +221,7 @@ PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF); #define pmdp_establish generic_pmdp_establish /* represent a notpresent pmd by faulting entry, this is used by pmdp_invalidate */ -static inline pmd_t pmd_mknotpresent(pmd_t pmd) +static inline pmd_t pmd_mkinvalid(pmd_t pmd) { return __pmd(pmd_val(pmd) & ~L_PMD_SECT_VALID); } diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index dae0466d19d6..9ce000f22d9e 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -366,7 +366,7 @@ static inline int pmd_protnone(pmd_t pmd) #define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) -#define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) & ~PMD_SECT_VALID)) +#define pmd_mkinvalid(pmd) (__pmd(pmd_val(pmd) & ~PMD_SECT_VALID)) #define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index f1801e7a4b15..bfbab6652e20 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -631,7 +631,7 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) return pmd; } -static inline pmd_t pmd_mknotpresent(pmd_t pmd) +static inline pmd_t pmd_mkinvalid(pmd_t pmd) { pmd_val(pmd) &= ~(_PAGE_PRESENT | _PAGE_VALID | _PAGE_DIRTY); diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 4d02e64af1b3..f51d8997ed00 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -624,7 +624,7 @@ static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot) return __pud(pfn | check_pgprot(pgprot)); } -static inline pmd_t pmd_mknotpresent(pmd_t pmd) +static inline pmd_t pmd_mkinvalid(pmd_t pmd) { return pfn_pmd(pmd_pfn(pmd), __pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE))); diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 9994353fb75d..22bae5828c3d 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -130,7 +130,7 @@ static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old) pmdval_t v = pmd_val(*pmd); if (clear) { *old = v; - new_pmd = pmd_mknotpresent(*pmd); + new_pmd = pmd_mkinvalid(*pmd); } else { /* Presume this has been called with clear==true previously */ new_pmd = __pmd(*old); diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 3d7c01e76efc..d18f0e1b6792 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -194,7 +194,7 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { - pmd_t old = pmdp_establish(vma, address, pmdp, pmd_mknotpresent(*pmdp)); + pmd_t old = pmdp_establish(vma, address, pmdp, pmd_mkinvalid(*pmdp)); flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); return old; } -- cgit v1.2.3 From 7e01ccb43d62558dc65c53477a78da133ad8c377 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Wed, 3 Jun 2020 16:03:58 -0700 Subject: x86: mm: use ARCH_HAS_DEBUG_WX instead of arch defined Extract DEBUG_WX to mm/Kconfig.debug for shared use. Change to use ARCH_HAS_DEBUG_WX instead of DEBUG_WX defined by arch port. Signed-off-by: Zong Li Signed-off-by: Andrew Morton Cc: Borislav Petkov Cc: Catalin Marinas Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/430736828d149df3f5b462d291e845ec690e0141.1587455584.git.zong.li@sifive.com Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 1 + arch/x86/Kconfig.debug | 27 --------------------------- 2 files changed, 1 insertion(+), 27 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f3b910fe1d34..5534b4335802 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -81,6 +81,7 @@ config X86 select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE select ARCH_HAS_SYSCALL_WRAPPER select ARCH_HAS_UBSAN_SANITIZE_ALL + select ARCH_HAS_DEBUG_WX select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI select ARCH_MIGHT_HAVE_PC_PARPORT diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index f909d3ce36e6..fdf1431ac8c2 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -72,33 +72,6 @@ config EFI_PGT_DUMP issues with the mapping of the EFI runtime regions into that table. -config DEBUG_WX - bool "Warn on W+X mappings at boot" - select PTDUMP_CORE - ---help--- - Generate a warning if any W+X mappings are found at boot. - - This is useful for discovering cases where the kernel is leaving - W+X mappings after applying NX, as such mappings are a security risk. - - Look for a message in dmesg output like this: - - x86/mm: Checked W+X mappings: passed, no W+X pages found. - - or like this, if the check failed: - - x86/mm: Checked W+X mappings: FAILED, W+X pages found. - - Note that even if the check fails, your kernel is possibly - still fine, as W+X mappings are not a security hole in - themselves, what they do is that they make the exploitation - of other unfixed kernel bugs easier. - - There is no runtime or memory usage effect of this option - once the kernel has booted up - it's a one time check. - - If in doubt, say "Y". - config DEBUG_TLBFLUSH bool "Set upper limit of TLB entries to flush one-by-one" depends on DEBUG_KERNEL -- cgit v1.2.3