From 55ce6e23ebd159bc3d8f0a20e27503e09b5d8138 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:53:52 +0100 Subject: mm: don't export __add_pages This function isn't used by any modules, and is only to be called from core MM code. This includes the calls for the add_pages wrapper that might be inlined. Signed-off-by: Christoph Hellwig Signed-off-by: Dan Williams --- mm/memory_hotplug.c | 1 - 1 file changed, 1 deletion(-) (limited to 'mm') diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index c52aa05b106c..5c6f96e6b334 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -334,7 +334,6 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn, out: return err; } -EXPORT_SYMBOL_GPL(__add_pages); #ifdef CONFIG_MEMORY_HOTREMOVE /* find the smallest valid pfn in the range [start_pfn, end_pfn) */ -- cgit v1.2.3 From 24e6d5a59ac7d31adc0322de2d0117dfa370936f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:53:53 +0100 Subject: mm: pass the vmem_altmap to arch_add_memory and __add_pages We can just pass this on instead of having to do a radix tree lookup without proper locking 2 levels into the callchain. Signed-off-by: Christoph Hellwig Signed-off-by: Dan Williams --- arch/ia64/mm/init.c | 5 +++-- arch/powerpc/mm/mem.c | 5 +++-- arch/s390/mm/init.c | 5 +++-- arch/sh/mm/init.c | 5 +++-- arch/x86/mm/init_32.c | 5 +++-- arch/x86/mm/init_64.c | 11 ++++++----- include/linux/memory_hotplug.h | 17 ++++++++++------- kernel/memremap.c | 3 ++- mm/hmm.c | 5 +++-- mm/memory_hotplug.c | 7 +++---- 10 files changed, 39 insertions(+), 29 deletions(-) (limited to 'mm') diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 7af4e05bb61e..2e2e4f532204 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -647,13 +647,14 @@ mem_init (void) } #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; int ret; - ret = __add_pages(nid, start_pfn, nr_pages, want_memblock); + ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); if (ret) printk("%s: Problem encountered in __add_pages() as ret=%d\n", __func__, ret); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 4362b86ef84c..e670cfc2766e 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -127,7 +127,8 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end) return -ENODEV; } -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; @@ -144,7 +145,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) return -EFAULT; } - return __add_pages(nid, start_pfn, nr_pages, want_memblock); + return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); } #ifdef CONFIG_MEMORY_HOTREMOVE diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 671535e64aba..e12c5af50cd7 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -222,7 +222,8 @@ device_initcall(s390_cma_mem_init); #endif /* CONFIG_CMA */ -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = PFN_DOWN(start); unsigned long size_pages = PFN_DOWN(size); @@ -232,7 +233,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) if (rc) return rc; - rc = __add_pages(nid, start_pfn, size_pages, want_memblock); + rc = __add_pages(nid, start_pfn, size_pages, altmap, want_memblock); if (rc) vmem_remove_mapping(start, size); return rc; diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index afc54d593a26..552afbf55bad 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -485,14 +485,15 @@ void free_initrd_mem(unsigned long start, unsigned long end) #endif #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = PFN_DOWN(start); unsigned long nr_pages = size >> PAGE_SHIFT; int ret; /* We only have ZONE_NORMAL, so this is easy.. */ - ret = __add_pages(nid, start_pfn, nr_pages, want_memblock); + ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); if (unlikely(ret)) printk("%s: Failed, __add_pages() == %d\n", __func__, ret); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 135c9a7898c7..8a3091511a71 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -829,12 +829,13 @@ void __init mem_init(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - return __add_pages(nid, start_pfn, nr_pages, want_memblock); + return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); } #ifdef CONFIG_MEMORY_HOTREMOVE diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 8acdc35c2dfa..e80bb4189254 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -772,12 +772,12 @@ static void update_end_of_memory_vars(u64 start, u64 size) } } -int add_pages(int nid, unsigned long start_pfn, - unsigned long nr_pages, bool want_memblock) +int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, + struct vmem_altmap *altmap, bool want_memblock) { int ret; - ret = __add_pages(nid, start_pfn, nr_pages, want_memblock); + ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); WARN_ON_ONCE(ret); /* update max_pfn, max_low_pfn and high_memory */ @@ -787,14 +787,15 @@ int add_pages(int nid, unsigned long start_pfn, return ret; } -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; init_memory_mapping(start, start + size); - return add_pages(nid, start_pfn, nr_pages, want_memblock); + return add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); } #define PAGE_INUSE 0xFD diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 58e110aee7ab..db276afbefcc 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -13,6 +13,7 @@ struct pglist_data; struct mem_section; struct memory_block; struct resource; +struct vmem_altmap; #ifdef CONFIG_MEMORY_HOTPLUG /* @@ -131,18 +132,19 @@ extern int __remove_pages(struct zone *zone, unsigned long start_pfn, #endif /* CONFIG_MEMORY_HOTREMOVE */ /* reasonably generic interface to expand the physical pages */ -extern int __add_pages(int nid, unsigned long start_pfn, - unsigned long nr_pages, bool want_memblock); +extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, + struct vmem_altmap *altmap, bool want_memblock); #ifndef CONFIG_ARCH_HAS_ADD_PAGES static inline int add_pages(int nid, unsigned long start_pfn, - unsigned long nr_pages, bool want_memblock) + unsigned long nr_pages, struct vmem_altmap *altmap, + bool want_memblock) { - return __add_pages(nid, start_pfn, nr_pages, want_memblock); + return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); } #else /* ARCH_HAS_ADD_PAGES */ -int add_pages(int nid, unsigned long start_pfn, - unsigned long nr_pages, bool want_memblock); +int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, + struct vmem_altmap *altmap, bool want_memblock); #endif /* ARCH_HAS_ADD_PAGES */ #ifdef CONFIG_NUMA @@ -318,7 +320,8 @@ extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, void *arg, int (*func)(struct memory_block *, void *)); extern int add_memory(int nid, u64 start, u64 size); extern int add_memory_resource(int nid, struct resource *resource, bool online); -extern int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock); +extern int arch_add_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap, bool want_memblock); extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); diff --git a/kernel/memremap.c b/kernel/memremap.c index 403ab9cdb949..8488cdeead16 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -382,6 +382,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, if (altmap) { memcpy(&page_map->altmap, altmap, sizeof(*altmap)); pgmap->altmap = &page_map->altmap; + altmap = pgmap->altmap; } pgmap->ref = ref; pgmap->res = &page_map->res; @@ -427,7 +428,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, goto err_pfn_remap; mem_hotplug_begin(); - error = arch_add_memory(nid, align_start, align_size, false); + error = arch_add_memory(nid, align_start, align_size, altmap, false); if (!error) move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], align_start >> PAGE_SHIFT, diff --git a/mm/hmm.c b/mm/hmm.c index ea19742a5d60..231aaacd1997 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -931,10 +931,11 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem) * want the linear mapping and thus use arch_add_memory(). */ if (devmem->pagemap.type == MEMORY_DEVICE_PUBLIC) - ret = arch_add_memory(nid, align_start, align_size, false); + ret = arch_add_memory(nid, align_start, align_size, NULL, + false); else ret = add_pages(nid, align_start >> PAGE_SHIFT, - align_size >> PAGE_SHIFT, false); + align_size >> PAGE_SHIFT, NULL, false); if (ret) { mem_hotplug_done(); goto error_add_memory; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 5c6f96e6b334..fc0485dcece1 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -292,18 +292,17 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, * add the new pages. */ int __ref __add_pages(int nid, unsigned long phys_start_pfn, - unsigned long nr_pages, bool want_memblock) + unsigned long nr_pages, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long i; int err = 0; int start_sec, end_sec; - struct vmem_altmap *altmap; /* during initialize mem_map, align hot-added range to section */ start_sec = pfn_to_section_nr(phys_start_pfn); end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); - altmap = to_vmem_altmap((unsigned long) pfn_to_page(phys_start_pfn)); if (altmap) { /* * Validate altmap is within bounds of the total request @@ -1148,7 +1147,7 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online) } /* call arch's memory hotadd */ - ret = arch_add_memory(nid, start, size, true); + ret = arch_add_memory(nid, start, size, NULL, true); if (ret < 0) goto error; -- cgit v1.2.3 From 7b73d978a5d0d2a3637bdd57191cb6ffbad3feca Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:53:54 +0100 Subject: mm: pass the vmem_altmap to vmemmap_populate We can just pass this on instead of having to do a radix tree lookup without proper locking a few levels into the callchain. Signed-off-by: Christoph Hellwig Signed-off-by: Dan Williams --- arch/arm64/mm/mmu.c | 6 ++++-- arch/ia64/mm/discontig.c | 3 ++- arch/powerpc/mm/init_64.c | 7 ++----- arch/s390/mm/vmem.c | 3 ++- arch/sparc/mm/init_64.c | 2 +- arch/x86/mm/init_64.c | 4 ++-- include/linux/memory_hotplug.h | 3 ++- include/linux/mm.h | 6 ++++-- mm/memory_hotplug.c | 7 ++++--- mm/sparse-vmemmap.c | 7 ++++--- mm/sparse.c | 20 ++++++++++++-------- 11 files changed, 39 insertions(+), 29 deletions(-) (limited to 'mm') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 267d2b79d52d..ec8952ff13be 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -654,12 +654,14 @@ int kern_addr_valid(unsigned long addr) } #ifdef CONFIG_SPARSEMEM_VMEMMAP #if !ARM64_SWAPPER_USES_SECTION_MAPS -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { return vmemmap_populate_basepages(start, end, node); } #else /* !ARM64_SWAPPER_USES_SECTION_MAPS */ -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { unsigned long addr = start; unsigned long next; diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 9b2d994cddf6..1555aecaaf85 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -754,7 +754,8 @@ void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat) #endif #ifdef CONFIG_SPARSEMEM_VMEMMAP -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { return vmemmap_populate_basepages(start, end, node); } diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index a07722531b32..779b74a96b8f 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -183,7 +183,8 @@ static __meminit void vmemmap_list_populate(unsigned long phys, vmemmap_list = vmem_back; } -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; @@ -193,16 +194,12 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node); for (; start < end; start += page_size) { - struct vmem_altmap *altmap; void *p; int rc; if (vmemmap_populated(start, page_size)) continue; - /* altmap lookups only work at section boundaries */ - altmap = to_vmem_altmap(SECTION_ALIGN_DOWN(start)); - p = __vmemmap_alloc_block_buf(page_size, node, altmap); if (!p) return -ENOMEM; diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 3316d463fc29..c44ef0e7c466 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -211,7 +211,8 @@ static void vmem_remove_range(unsigned long start, unsigned long size) /* * Add a backed mem_map array to the virtual mem_map array. */ -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { unsigned long pgt_prot, sgt_prot; unsigned long address = start; diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 55ba62957e64..42d27a1a042a 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2628,7 +2628,7 @@ EXPORT_SYMBOL(_PAGE_CACHE); #ifdef CONFIG_SPARSEMEM_VMEMMAP int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, - int node) + int node, struct vmem_altmap *altmap) { unsigned long pte_base; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index e80bb4189254..594902ef56ef 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1411,9 +1411,9 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start, return 0; } -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { - struct vmem_altmap *altmap = to_vmem_altmap(start); int err; if (boot_cpu_has(X86_FEATURE_PSE)) diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index db276afbefcc..cbdd6d52e877 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -327,7 +327,8 @@ extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); extern bool is_memblock_offlined(struct memory_block *mem); extern void remove_memory(int nid, u64 start, u64 size); -extern int sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn); +extern int sparse_add_one_section(struct pglist_data *pgdat, + unsigned long start_pfn, struct vmem_altmap *altmap); extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, unsigned long map_offset); extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, diff --git a/include/linux/mm.h b/include/linux/mm.h index ea818ff739cd..2f3a7ebecbe2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2538,7 +2538,8 @@ void sparse_mem_maps_populate_node(struct page **map_map, unsigned long map_count, int nodeid); -struct page *sparse_mem_map_populate(unsigned long pnum, int nid); +struct page *sparse_mem_map_populate(unsigned long pnum, int nid, + struct vmem_altmap *altmap); pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node); pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node); @@ -2556,7 +2557,8 @@ static inline void *vmemmap_alloc_block_buf(unsigned long size, int node) void vmemmap_verify(pte_t *, int, unsigned long, unsigned long); int vmemmap_populate_basepages(unsigned long start, unsigned long end, int node); -int vmemmap_populate(unsigned long start, unsigned long end, int node); +int vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap); void vmemmap_populate_print_last(void); #ifdef CONFIG_MEMORY_HOTPLUG void vmemmap_free(unsigned long start, unsigned long end); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index fc0485dcece1..b36f1822c432 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -250,7 +250,7 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat) #endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, - bool want_memblock) + struct vmem_altmap *altmap, bool want_memblock) { int ret; int i; @@ -258,7 +258,7 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, if (pfn_valid(phys_start_pfn)) return -EEXIST; - ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn); + ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn, altmap); if (ret < 0) return ret; @@ -317,7 +317,8 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn, } for (i = start_sec; i <= end_sec; i++) { - err = __add_section(nid, section_nr_to_pfn(i), want_memblock); + err = __add_section(nid, section_nr_to_pfn(i), altmap, + want_memblock); /* * EEXIST is finally dealt with by ioresource collision diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 17acf01791fa..376dcf05a39c 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -278,7 +278,8 @@ int __meminit vmemmap_populate_basepages(unsigned long start, return 0; } -struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid) +struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid, + struct vmem_altmap *altmap) { unsigned long start; unsigned long end; @@ -288,7 +289,7 @@ struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid) start = (unsigned long)map; end = (unsigned long)(map + PAGES_PER_SECTION); - if (vmemmap_populate(start, end, nid)) + if (vmemmap_populate(start, end, nid, altmap)) return NULL; return map; @@ -318,7 +319,7 @@ void __init sparse_mem_maps_populate_node(struct page **map_map, if (!present_section_nr(pnum)) continue; - map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); + map_map[pnum] = sparse_mem_map_populate(pnum, nodeid, NULL); if (map_map[pnum]) continue; ms = __nr_to_section(pnum); diff --git a/mm/sparse.c b/mm/sparse.c index 7a5dacaa06e3..5f4a0dac7836 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -417,7 +417,8 @@ static void __init sparse_early_usemaps_alloc_node(void *data, } #ifndef CONFIG_SPARSEMEM_VMEMMAP -struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid) +struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid, + struct vmem_altmap *altmap) { struct page *map; unsigned long size; @@ -472,7 +473,7 @@ void __init sparse_mem_maps_populate_node(struct page **map_map, if (!present_section_nr(pnum)) continue; - map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); + map_map[pnum] = sparse_mem_map_populate(pnum, nodeid, NULL); if (map_map[pnum]) continue; ms = __nr_to_section(pnum); @@ -500,7 +501,7 @@ static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) struct mem_section *ms = __nr_to_section(pnum); int nid = sparse_early_nid(ms); - map = sparse_mem_map_populate(pnum, nid); + map = sparse_mem_map_populate(pnum, nid, NULL); if (map) return map; @@ -678,10 +679,11 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn) #endif #ifdef CONFIG_SPARSEMEM_VMEMMAP -static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid) +static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, + struct vmem_altmap *altmap) { /* This will make the necessary allocations eventually. */ - return sparse_mem_map_populate(pnum, nid); + return sparse_mem_map_populate(pnum, nid, altmap); } static void __kfree_section_memmap(struct page *memmap) { @@ -721,7 +723,8 @@ got_map_ptr: return ret; } -static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid) +static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, + struct vmem_altmap *altmap) { return __kmalloc_section_memmap(); } @@ -773,7 +776,8 @@ static void free_map_bootmem(struct page *memmap) * set. If this is <=0, then that means that the passed-in * map was not consumed and must be freed. */ -int __meminit sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn) +int __meminit sparse_add_one_section(struct pglist_data *pgdat, + unsigned long start_pfn, struct vmem_altmap *altmap) { unsigned long section_nr = pfn_to_section_nr(start_pfn); struct mem_section *ms; @@ -789,7 +793,7 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat, unsigned long st ret = sparse_index_init(section_nr, pgdat->node_id); if (ret < 0 && ret != -EEXIST) return ret; - memmap = kmalloc_section_memmap(section_nr, pgdat->node_id); + memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, altmap); if (!memmap) return -ENOMEM; usemap = __kmalloc_section_usemap(); -- cgit v1.2.3 From da024512a1fa5c979257e442130ee1d468285057 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:53:55 +0100 Subject: mm: pass the vmem_altmap to arch_remove_memory and __remove_pages We can just pass this on instead of having to do a radix tree lookup without proper locking 2 levels into the callchain. Signed-off-by: Christoph Hellwig Signed-off-by: Dan Williams --- arch/ia64/mm/init.c | 4 ++-- arch/powerpc/mm/mem.c | 6 ++---- arch/s390/mm/init.c | 2 +- arch/sh/mm/init.c | 4 ++-- arch/x86/mm/init_32.c | 4 ++-- arch/x86/mm/init_64.c | 6 ++---- include/linux/memory_hotplug.h | 5 +++-- kernel/memremap.c | 2 +- mm/hmm.c | 4 ++-- mm/memory_hotplug.c | 8 ++------ 10 files changed, 19 insertions(+), 26 deletions(-) (limited to 'mm') diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 2e2e4f532204..6a8ce9e1536e 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -663,7 +663,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) +int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; @@ -671,7 +671,7 @@ int arch_remove_memory(u64 start, u64 size) int ret; zone = page_zone(pfn_to_page(start_pfn)); - ret = __remove_pages(zone, start_pfn, nr_pages); + ret = __remove_pages(zone, start_pfn, nr_pages, altmap); if (ret) pr_warn("%s: Problem encountered in __remove_pages() as" " ret=%d\n", __func__, ret); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index e670cfc2766e..22aa528b78a2 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -149,11 +149,10 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) +int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct vmem_altmap *altmap; struct page *page; int ret; @@ -162,11 +161,10 @@ int arch_remove_memory(u64 start, u64 size) * when querying the zone. */ page = pfn_to_page(start_pfn); - altmap = to_vmem_altmap((unsigned long) page); if (altmap) page += vmem_altmap_offset(altmap); - ret = __remove_pages(page_zone(page), start_pfn, nr_pages); + ret = __remove_pages(page_zone(page), start_pfn, nr_pages, altmap); if (ret) return ret; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index e12c5af50cd7..3fa3e5323612 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -240,7 +240,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) +int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { /* * There is no hardware or firmware interface which could trigger a diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 552afbf55bad..ce0bbaa7e404 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -510,7 +510,7 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) +int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = PFN_DOWN(start); unsigned long nr_pages = size >> PAGE_SHIFT; @@ -518,7 +518,7 @@ int arch_remove_memory(u64 start, u64 size) int ret; zone = page_zone(pfn_to_page(start_pfn)); - ret = __remove_pages(zone, start_pfn, nr_pages); + ret = __remove_pages(zone, start_pfn, nr_pages, altmap); if (unlikely(ret)) pr_warn("%s: Failed, __remove_pages() == %d\n", __func__, ret); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 8a3091511a71..79cb066f40c0 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -839,14 +839,14 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) +int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; struct zone *zone; zone = page_zone(pfn_to_page(start_pfn)); - return __remove_pages(zone, start_pfn, nr_pages); + return __remove_pages(zone, start_pfn, nr_pages, altmap); } #endif #endif diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 594902ef56ef..3c046618cc7e 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1132,21 +1132,19 @@ kernel_physical_mapping_remove(unsigned long start, unsigned long end) remove_pagetable(start, end, true); } -int __ref arch_remove_memory(u64 start, u64 size) +int __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; struct page *page = pfn_to_page(start_pfn); - struct vmem_altmap *altmap; struct zone *zone; int ret; /* With altmap the first mapped page is offset from @start */ - altmap = to_vmem_altmap((unsigned long) page); if (altmap) page += vmem_altmap_offset(altmap); zone = page_zone(page); - ret = __remove_pages(zone, start_pfn, nr_pages); + ret = __remove_pages(zone, start_pfn, nr_pages, altmap); WARN_ON_ONCE(ret); kernel_physical_mapping_remove(start, start + size); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index cbdd6d52e877..e71927d0d46b 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -126,9 +126,10 @@ static inline bool movable_node_is_enabled(void) #ifdef CONFIG_MEMORY_HOTREMOVE extern bool is_pageblock_removable_nolock(struct page *page); -extern int arch_remove_memory(u64 start, u64 size); +extern int arch_remove_memory(u64 start, u64 size, + struct vmem_altmap *altmap); extern int __remove_pages(struct zone *zone, unsigned long start_pfn, - unsigned long nr_pages); + unsigned long nr_pages, struct vmem_altmap *altmap); #endif /* CONFIG_MEMORY_HOTREMOVE */ /* reasonably generic interface to expand the physical pages */ diff --git a/kernel/memremap.c b/kernel/memremap.c index 8488cdeead16..380fca1c4a02 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -304,7 +304,7 @@ static void devm_memremap_pages_release(struct device *dev, void *data) align_size = ALIGN(resource_size(res), SECTION_SIZE); mem_hotplug_begin(); - arch_remove_memory(align_start, align_size); + arch_remove_memory(align_start, align_size, pgmap->altmap); mem_hotplug_done(); untrack_pfn(NULL, PHYS_PFN(align_start), align_size); diff --git a/mm/hmm.c b/mm/hmm.c index 231aaacd1997..5d17ba89062f 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -838,10 +838,10 @@ static void hmm_devmem_release(struct device *dev, void *data) mem_hotplug_begin(); if (resource->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) - __remove_pages(zone, start_pfn, npages); + __remove_pages(zone, start_pfn, npages, NULL); else arch_remove_memory(start_pfn << PAGE_SHIFT, - npages << PAGE_SHIFT); + npages << PAGE_SHIFT, NULL); mem_hotplug_done(); hmm_devmem_radix_release(resource); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index b36f1822c432..eae6bf47caf7 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -569,7 +569,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms, * calling offline_pages(). */ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, - unsigned long nr_pages) + unsigned long nr_pages, struct vmem_altmap *altmap) { unsigned long i; unsigned long map_offset = 0; @@ -577,10 +577,6 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, /* In the ZONE_DEVICE case device driver owns the memory region */ if (is_dev_zone(zone)) { - struct page *page = pfn_to_page(phys_start_pfn); - struct vmem_altmap *altmap; - - altmap = to_vmem_altmap((unsigned long) page); if (altmap) map_offset = vmem_altmap_offset(altmap); } else { @@ -1890,7 +1886,7 @@ void __ref remove_memory(int nid, u64 start, u64 size) memblock_free(start, size); memblock_remove(start, size); - arch_remove_memory(start, size); + arch_remove_memory(start, size, NULL); try_offline_node(nid); -- cgit v1.2.3 From 24b6d4164348370c6b6a58b4248babd85ff9e982 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:53:56 +0100 Subject: mm: pass the vmem_altmap to vmemmap_free We can just pass this on instead of having to do a radix tree lookup without proper locking a few levels into the callchain. Signed-off-by: Christoph Hellwig Signed-off-by: Dan Williams --- arch/arm64/mm/mmu.c | 3 +- arch/ia64/mm/discontig.c | 3 +- arch/powerpc/mm/init_64.c | 5 ++-- arch/s390/mm/vmem.c | 3 +- arch/sparc/mm/init_64.c | 3 +- arch/x86/mm/init_64.c | 67 ++++++++++++++++++++++++------------------ include/linux/memory_hotplug.h | 2 +- include/linux/mm.h | 3 +- mm/memory_hotplug.c | 7 +++-- mm/sparse.c | 23 ++++++++------- 10 files changed, 68 insertions(+), 51 deletions(-) (limited to 'mm') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index ec8952ff13be..0b1f13e0b4b3 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -696,7 +696,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, return 0; } #endif /* CONFIG_ARM64_64K_PAGES */ -void vmemmap_free(unsigned long start, unsigned long end) +void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 1555aecaaf85..5ea0d8d0968b 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -760,7 +760,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, return vmemmap_populate_basepages(start, end, node); } -void vmemmap_free(unsigned long start, unsigned long end) +void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { } #endif diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 779b74a96b8f..db7d4e092157 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -254,7 +254,8 @@ static unsigned long vmemmap_list_free(unsigned long start) return vmem_back->phys; } -void __ref vmemmap_free(unsigned long start, unsigned long end) +void __ref vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; unsigned long page_order = get_order(page_size); @@ -265,7 +266,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end) for (; start < end; start += page_size) { unsigned long nr_pages, addr; - struct vmem_altmap *altmap; struct page *section_base; struct page *page; @@ -285,7 +285,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end) section_base = pfn_to_page(vmemmap_section_start(start)); nr_pages = 1 << page_order; - altmap = to_vmem_altmap((unsigned long) section_base); if (altmap) { vmem_altmap_free(altmap, nr_pages); } else if (PageReserved(page)) { diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index c44ef0e7c466..db55561c5981 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -297,7 +297,8 @@ out: return ret; } -void vmemmap_free(unsigned long start, unsigned long end) +void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { } diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 42d27a1a042a..995f9490334d 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2671,7 +2671,8 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, return 0; } -void vmemmap_free(unsigned long start, unsigned long end) +void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3c046618cc7e..0cab4b5b59ba 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -800,11 +800,11 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, #define PAGE_INUSE 0xFD -static void __meminit free_pagetable(struct page *page, int order) +static void __meminit free_pagetable(struct page *page, int order, + struct vmem_altmap *altmap) { unsigned long magic; unsigned int nr_pages = 1 << order; - struct vmem_altmap *altmap = to_vmem_altmap((unsigned long) page); if (altmap) { vmem_altmap_free(altmap, nr_pages); @@ -826,7 +826,8 @@ static void __meminit free_pagetable(struct page *page, int order) free_pages((unsigned long)page_address(page), order); } -static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) +static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd, + struct vmem_altmap *altmap) { pte_t *pte; int i; @@ -838,13 +839,14 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) } /* free a pte talbe */ - free_pagetable(pmd_page(*pmd), 0); + free_pagetable(pmd_page(*pmd), 0, altmap); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); spin_unlock(&init_mm.page_table_lock); } -static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) +static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud, + struct vmem_altmap *altmap) { pmd_t *pmd; int i; @@ -856,13 +858,14 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) } /* free a pmd talbe */ - free_pagetable(pud_page(*pud), 0); + free_pagetable(pud_page(*pud), 0, altmap); spin_lock(&init_mm.page_table_lock); pud_clear(pud); spin_unlock(&init_mm.page_table_lock); } -static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) +static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d, + struct vmem_altmap *altmap) { pud_t *pud; int i; @@ -874,7 +877,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) } /* free a pud talbe */ - free_pagetable(p4d_page(*p4d), 0); + free_pagetable(p4d_page(*p4d), 0, altmap); spin_lock(&init_mm.page_table_lock); p4d_clear(p4d); spin_unlock(&init_mm.page_table_lock); @@ -882,7 +885,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) static void __meminit remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, - bool direct) + struct vmem_altmap *altmap, bool direct) { unsigned long next, pages = 0; pte_t *pte; @@ -913,7 +916,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, * freed when offlining, or simplely not in use. */ if (!direct) - free_pagetable(pte_page(*pte), 0); + free_pagetable(pte_page(*pte), 0, altmap); spin_lock(&init_mm.page_table_lock); pte_clear(&init_mm, addr, pte); @@ -936,7 +939,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, page_addr = page_address(pte_page(*pte)); if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) { - free_pagetable(pte_page(*pte), 0); + free_pagetable(pte_page(*pte), 0, altmap); spin_lock(&init_mm.page_table_lock); pte_clear(&init_mm, addr, pte); @@ -953,7 +956,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, - bool direct) + bool direct, struct vmem_altmap *altmap) { unsigned long next, pages = 0; pte_t *pte_base; @@ -972,7 +975,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, IS_ALIGNED(next, PMD_SIZE)) { if (!direct) free_pagetable(pmd_page(*pmd), - get_order(PMD_SIZE)); + get_order(PMD_SIZE), + altmap); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); @@ -986,7 +990,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, if (!memchr_inv(page_addr, PAGE_INUSE, PMD_SIZE)) { free_pagetable(pmd_page(*pmd), - get_order(PMD_SIZE)); + get_order(PMD_SIZE), + altmap); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); @@ -998,8 +1003,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, } pte_base = (pte_t *)pmd_page_vaddr(*pmd); - remove_pte_table(pte_base, addr, next, direct); - free_pte_table(pte_base, pmd); + remove_pte_table(pte_base, addr, next, altmap, direct); + free_pte_table(pte_base, pmd, altmap); } /* Call free_pmd_table() in remove_pud_table(). */ @@ -1009,7 +1014,7 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, - bool direct) + struct vmem_altmap *altmap, bool direct) { unsigned long next, pages = 0; pmd_t *pmd_base; @@ -1028,7 +1033,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, IS_ALIGNED(next, PUD_SIZE)) { if (!direct) free_pagetable(pud_page(*pud), - get_order(PUD_SIZE)); + get_order(PUD_SIZE), + altmap); spin_lock(&init_mm.page_table_lock); pud_clear(pud); @@ -1042,7 +1048,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, if (!memchr_inv(page_addr, PAGE_INUSE, PUD_SIZE)) { free_pagetable(pud_page(*pud), - get_order(PUD_SIZE)); + get_order(PUD_SIZE), + altmap); spin_lock(&init_mm.page_table_lock); pud_clear(pud); @@ -1054,8 +1061,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, } pmd_base = pmd_offset(pud, 0); - remove_pmd_table(pmd_base, addr, next, direct); - free_pmd_table(pmd_base, pud); + remove_pmd_table(pmd_base, addr, next, direct, altmap); + free_pmd_table(pmd_base, pud, altmap); } if (direct) @@ -1064,7 +1071,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, static void __meminit remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, - bool direct) + struct vmem_altmap *altmap, bool direct) { unsigned long next, pages = 0; pud_t *pud_base; @@ -1080,14 +1087,14 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, BUILD_BUG_ON(p4d_large(*p4d)); pud_base = pud_offset(p4d, 0); - remove_pud_table(pud_base, addr, next, direct); + remove_pud_table(pud_base, addr, next, altmap, direct); /* * For 4-level page tables we do not want to free PUDs, but in the * 5-level case we should free them. This code will have to change * to adapt for boot-time switching between 4 and 5 level page tables. */ if (CONFIG_PGTABLE_LEVELS == 5) - free_pud_table(pud_base, p4d); + free_pud_table(pud_base, p4d, altmap); } if (direct) @@ -1096,7 +1103,8 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, /* start and end are both virtual address. */ static void __meminit -remove_pagetable(unsigned long start, unsigned long end, bool direct) +remove_pagetable(unsigned long start, unsigned long end, bool direct, + struct vmem_altmap *altmap) { unsigned long next; unsigned long addr; @@ -1111,15 +1119,16 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct) continue; p4d = p4d_offset(pgd, 0); - remove_p4d_table(p4d, addr, next, direct); + remove_p4d_table(p4d, addr, next, altmap, direct); } flush_tlb_all(); } -void __ref vmemmap_free(unsigned long start, unsigned long end) +void __ref vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { - remove_pagetable(start, end, false); + remove_pagetable(start, end, false, altmap); } #ifdef CONFIG_MEMORY_HOTREMOVE @@ -1129,7 +1138,7 @@ kernel_physical_mapping_remove(unsigned long start, unsigned long end) start = (unsigned long)__va(start); end = (unsigned long)__va(end); - remove_pagetable(start, end, true); + remove_pagetable(start, end, true, NULL); } int __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index e71927d0d46b..20dd98ad44a0 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -331,7 +331,7 @@ extern void remove_memory(int nid, u64 start, u64 size); extern int sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn, struct vmem_altmap *altmap); extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, - unsigned long map_offset); + unsigned long map_offset, struct vmem_altmap *altmap); extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum); extern bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages, diff --git a/include/linux/mm.h b/include/linux/mm.h index 2f3a7ebecbe2..9d4cd4c1dc6d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2561,7 +2561,8 @@ int vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap); void vmemmap_populate_print_last(void); #ifdef CONFIG_MEMORY_HOTPLUG -void vmemmap_free(unsigned long start, unsigned long end); +void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap); #endif void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, unsigned long nr_pages); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index eae6bf47caf7..a8dde9734120 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -536,7 +536,7 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn) } static int __remove_section(struct zone *zone, struct mem_section *ms, - unsigned long map_offset) + unsigned long map_offset, struct vmem_altmap *altmap) { unsigned long start_pfn; int scn_nr; @@ -553,7 +553,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms, start_pfn = section_nr_to_pfn((unsigned long)scn_nr); __remove_zone(zone, start_pfn); - sparse_remove_one_section(zone, ms, map_offset); + sparse_remove_one_section(zone, ms, map_offset, altmap); return 0; } @@ -607,7 +607,8 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, for (i = 0; i < sections_to_remove; i++) { unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION; - ret = __remove_section(zone, __pfn_to_section(pfn), map_offset); + ret = __remove_section(zone, __pfn_to_section(pfn), map_offset, + altmap); map_offset = 0; if (ret) break; diff --git a/mm/sparse.c b/mm/sparse.c index 5f4a0dac7836..06130c13dc99 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -685,12 +685,13 @@ static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, /* This will make the necessary allocations eventually. */ return sparse_mem_map_populate(pnum, nid, altmap); } -static void __kfree_section_memmap(struct page *memmap) +static void __kfree_section_memmap(struct page *memmap, + struct vmem_altmap *altmap) { unsigned long start = (unsigned long)memmap; unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION); - vmemmap_free(start, end); + vmemmap_free(start, end, altmap); } #ifdef CONFIG_MEMORY_HOTREMOVE static void free_map_bootmem(struct page *memmap) @@ -698,7 +699,7 @@ static void free_map_bootmem(struct page *memmap) unsigned long start = (unsigned long)memmap; unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION); - vmemmap_free(start, end); + vmemmap_free(start, end, NULL); } #endif /* CONFIG_MEMORY_HOTREMOVE */ #else @@ -729,7 +730,8 @@ static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, return __kmalloc_section_memmap(); } -static void __kfree_section_memmap(struct page *memmap) +static void __kfree_section_memmap(struct page *memmap, + struct vmem_altmap *altmap) { if (is_vmalloc_addr(memmap)) vfree(memmap); @@ -798,7 +800,7 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat, return -ENOMEM; usemap = __kmalloc_section_usemap(); if (!usemap) { - __kfree_section_memmap(memmap); + __kfree_section_memmap(memmap, altmap); return -ENOMEM; } @@ -820,7 +822,7 @@ out: pgdat_resize_unlock(pgdat, &flags); if (ret <= 0) { kfree(usemap); - __kfree_section_memmap(memmap); + __kfree_section_memmap(memmap, altmap); } return ret; } @@ -847,7 +849,8 @@ static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) } #endif -static void free_section_usemap(struct page *memmap, unsigned long *usemap) +static void free_section_usemap(struct page *memmap, unsigned long *usemap, + struct vmem_altmap *altmap) { struct page *usemap_page; @@ -861,7 +864,7 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap) if (PageSlab(usemap_page) || PageCompound(usemap_page)) { kfree(usemap); if (memmap) - __kfree_section_memmap(memmap); + __kfree_section_memmap(memmap, altmap); return; } @@ -875,7 +878,7 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap) } void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, - unsigned long map_offset) + unsigned long map_offset, struct vmem_altmap *altmap) { struct page *memmap = NULL; unsigned long *usemap = NULL, flags; @@ -893,7 +896,7 @@ void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, clear_hwpoisoned_pages(memmap + map_offset, PAGES_PER_SECTION - map_offset); - free_section_usemap(memmap, usemap); + free_section_usemap(memmap, usemap, altmap); } #endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_MEMORY_HOTPLUG */ -- cgit v1.2.3 From a99583e780c751003ac9c0105eec9a3b23ec3bc4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:53:57 +0100 Subject: mm: pass the vmem_altmap to memmap_init_zone Pass the vmem_altmap two levels down instead of needing a lookup. Signed-off-by: Christoph Hellwig Signed-off-by: Dan Williams --- arch/ia64/mm/init.c | 9 +++++---- include/linux/memory_hotplug.h | 2 +- include/linux/mm.h | 4 ++-- kernel/memremap.c | 2 +- mm/hmm.c | 2 +- mm/memory_hotplug.c | 9 +++++---- mm/page_alloc.c | 6 +++--- 7 files changed, 18 insertions(+), 16 deletions(-) (limited to 'mm') diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 6a8ce9e1536e..18278b448530 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -501,7 +501,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg) if (map_start < map_end) memmap_init_zone((unsigned long)(map_end - map_start), args->nid, args->zone, page_to_pfn(map_start), - MEMMAP_EARLY); + MEMMAP_EARLY, NULL); return 0; } @@ -509,9 +509,10 @@ void __meminit memmap_init (unsigned long size, int nid, unsigned long zone, unsigned long start_pfn) { - if (!vmem_map) - memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY); - else { + if (!vmem_map) { + memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY, + NULL); + } else { struct page *start; struct memmap_init_callback_data args; diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 20dd98ad44a0..aba5f86eb038 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -324,7 +324,7 @@ extern int add_memory_resource(int nid, struct resource *resource, bool online); extern int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, bool want_memblock); extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, - unsigned long nr_pages); + unsigned long nr_pages, struct vmem_altmap *altmap); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); extern bool is_memblock_offlined(struct memory_block *mem); extern void remove_memory(int nid, u64 start, u64 size); diff --git a/include/linux/mm.h b/include/linux/mm.h index 9d4cd4c1dc6d..fd01135324b6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2069,8 +2069,8 @@ static inline void zero_resv_unavail(void) {} #endif extern void set_dma_reserve(unsigned long new_dma_reserve); -extern void memmap_init_zone(unsigned long, int, unsigned long, - unsigned long, enum memmap_context); +extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long, + enum memmap_context, struct vmem_altmap *); extern void setup_per_zone_wmarks(void); extern int __meminit init_per_zone_wmark_min(void); extern void mem_init(void); diff --git a/kernel/memremap.c b/kernel/memremap.c index 380fca1c4a02..64b12c806cc5 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -432,7 +432,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, if (!error) move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], align_start >> PAGE_SHIFT, - align_size >> PAGE_SHIFT); + align_size >> PAGE_SHIFT, altmap); mem_hotplug_done(); if (error) goto err_add_memory; diff --git a/mm/hmm.c b/mm/hmm.c index 5d17ba89062f..2f2e13c61040 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -942,7 +942,7 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem) } move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], align_start >> PAGE_SHIFT, - align_size >> PAGE_SHIFT); + align_size >> PAGE_SHIFT, NULL); mem_hotplug_done(); for (pfn = devmem->pfn_first; pfn < devmem->pfn_last; pfn++) { diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index a8dde9734120..12df8a5fadcc 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -798,8 +798,8 @@ static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned lon pgdat->node_spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - pgdat->node_start_pfn; } -void __ref move_pfn_range_to_zone(struct zone *zone, - unsigned long start_pfn, unsigned long nr_pages) +void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, + unsigned long nr_pages, struct vmem_altmap *altmap) { struct pglist_data *pgdat = zone->zone_pgdat; int nid = pgdat->node_id; @@ -824,7 +824,8 @@ void __ref move_pfn_range_to_zone(struct zone *zone, * expects the zone spans the pfn range. All the pages in the range * are reserved so nobody should be touching them so we should be safe */ - memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, MEMMAP_HOTPLUG); + memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, + MEMMAP_HOTPLUG, altmap); set_zone_contiguous(zone); } @@ -896,7 +897,7 @@ static struct zone * __meminit move_pfn_range(int online_type, int nid, struct zone *zone; zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages); - move_pfn_range_to_zone(zone, start_pfn, nr_pages); + move_pfn_range_to_zone(zone, start_pfn, nr_pages, NULL); return zone; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7e5e775e97f4..1748dd4a4b1b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5314,9 +5314,9 @@ void __ref build_all_zonelists(pg_data_t *pgdat) * done. Non-atomic initialization, single-pass. */ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, - unsigned long start_pfn, enum memmap_context context) + unsigned long start_pfn, enum memmap_context context, + struct vmem_altmap *altmap) { - struct vmem_altmap *altmap = to_vmem_altmap(__pfn_to_phys(start_pfn)); unsigned long end_pfn = start_pfn + size; pg_data_t *pgdat = NODE_DATA(nid); unsigned long pfn; @@ -5417,7 +5417,7 @@ static void __meminit zone_init_free_lists(struct zone *zone) #ifndef __HAVE_ARCH_MEMMAP_INIT #define memmap_init(size, nid, zone, start_pfn) \ - memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY) + memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY, NULL) #endif static int zone_batchsize(struct zone *zone) -- cgit v1.2.3 From a8fc357b2875da8732c91eb085862a0648d82767 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:53:58 +0100 Subject: mm: split altmap memory map allocation from normal case No functional changes, just untangling the call chain and document why the altmap is passed around the hotplug code. Signed-off-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Signed-off-by: Dan Williams --- arch/powerpc/mm/init_64.c | 5 ++++- arch/x86/mm/init_64.c | 5 ++++- include/linux/mm.h | 9 ++------- mm/sparse-vmemmap.c | 15 +++------------ 4 files changed, 13 insertions(+), 21 deletions(-) (limited to 'mm') diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index db7d4e092157..7a2251d99ed3 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -200,7 +200,10 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, if (vmemmap_populated(start, page_size)) continue; - p = __vmemmap_alloc_block_buf(page_size, node, altmap); + if (altmap) + p = altmap_alloc_block_buf(page_size, altmap); + else + p = vmemmap_alloc_block_buf(page_size, node); if (!p) return -ENOMEM; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 0cab4b5b59ba..1ab42c852069 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1385,7 +1385,10 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start, if (pmd_none(*pmd)) { void *p; - p = __vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); + if (altmap) + p = altmap_alloc_block_buf(PMD_SIZE, altmap); + else + p = vmemmap_alloc_block_buf(PMD_SIZE, node); if (p) { pte_t entry; diff --git a/include/linux/mm.h b/include/linux/mm.h index fd01135324b6..09637c353de0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2547,13 +2547,8 @@ pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node); pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node); void *vmemmap_alloc_block(unsigned long size, int node); struct vmem_altmap; -void *__vmemmap_alloc_block_buf(unsigned long size, int node, - struct vmem_altmap *altmap); -static inline void *vmemmap_alloc_block_buf(unsigned long size, int node) -{ - return __vmemmap_alloc_block_buf(size, node, NULL); -} - +void *vmemmap_alloc_block_buf(unsigned long size, int node); +void *altmap_alloc_block_buf(unsigned long size, struct vmem_altmap *altmap); void vmemmap_verify(pte_t *, int, unsigned long, unsigned long); int vmemmap_populate_basepages(unsigned long start, unsigned long end, int node); diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 376dcf05a39c..d012c9e2811b 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -74,7 +74,7 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node) } /* need to make sure size is all the same during early stage */ -static void * __meminit alloc_block_buf(unsigned long size, int node) +void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node) { void *ptr; @@ -129,7 +129,7 @@ static unsigned long __meminit vmem_altmap_alloc(struct vmem_altmap *altmap, return pfn + nr_align; } -static void * __meminit altmap_alloc_block_buf(unsigned long size, +void * __meminit altmap_alloc_block_buf(unsigned long size, struct vmem_altmap *altmap) { unsigned long pfn, nr_pfns; @@ -153,15 +153,6 @@ static void * __meminit altmap_alloc_block_buf(unsigned long size, return ptr; } -/* need to make sure size is all the same during early stage */ -void * __meminit __vmemmap_alloc_block_buf(unsigned long size, int node, - struct vmem_altmap *altmap) -{ - if (altmap) - return altmap_alloc_block_buf(size, altmap); - return alloc_block_buf(size, node); -} - void __meminit vmemmap_verify(pte_t *pte, int node, unsigned long start, unsigned long end) { @@ -178,7 +169,7 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node) pte_t *pte = pte_offset_kernel(pmd, addr); if (pte_none(*pte)) { pte_t entry; - void *p = alloc_block_buf(PAGE_SIZE, node); + void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node); if (!p) return NULL; entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); -- cgit v1.2.3 From eb8045335c70ef8b272d2888a225b81344423139 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:53:59 +0100 Subject: mm: merge vmem_altmap_alloc into altmap_alloc_block_buf There is no clear separation between the two, so merge them. Signed-off-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Signed-off-by: Dan Williams --- mm/sparse-vmemmap.c | 45 ++++++++++++++++----------------------------- 1 file changed, 16 insertions(+), 29 deletions(-) (limited to 'mm') diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index d012c9e2811b..bd0276d5f66b 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -107,33 +107,16 @@ static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap) } /** - * vmem_altmap_alloc - allocate pages from the vmem_altmap reservation - * @altmap - reserved page pool for the allocation - * @nr_pfns - size (in pages) of the allocation + * altmap_alloc_block_buf - allocate pages from the device page map + * @altmap: device page map + * @size: size (in bytes) of the allocation * - * Allocations are aligned to the size of the request + * Allocations are aligned to the size of the request. */ -static unsigned long __meminit vmem_altmap_alloc(struct vmem_altmap *altmap, - unsigned long nr_pfns) -{ - unsigned long pfn = vmem_altmap_next_pfn(altmap); - unsigned long nr_align; - - nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG); - nr_align = ALIGN(pfn, nr_align) - pfn; - - if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap)) - return ULONG_MAX; - altmap->alloc += nr_pfns; - altmap->align += nr_align; - return pfn + nr_align; -} - void * __meminit altmap_alloc_block_buf(unsigned long size, struct vmem_altmap *altmap) { - unsigned long pfn, nr_pfns; - void *ptr; + unsigned long pfn, nr_pfns, nr_align; if (size & ~PAGE_MASK) { pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n", @@ -141,16 +124,20 @@ void * __meminit altmap_alloc_block_buf(unsigned long size, return NULL; } + pfn = vmem_altmap_next_pfn(altmap); nr_pfns = size >> PAGE_SHIFT; - pfn = vmem_altmap_alloc(altmap, nr_pfns); - if (pfn < ULONG_MAX) - ptr = __va(__pfn_to_phys(pfn)); - else - ptr = NULL; + nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG); + nr_align = ALIGN(pfn, nr_align) - pfn; + if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap)) + return NULL; + + altmap->alloc += nr_pfns; + altmap->align += nr_align; + pfn += nr_align; + pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n", __func__, pfn, altmap->alloc, altmap->align, nr_pfns); - - return ptr; + return __va(__pfn_to_phys(pfn)); } void __meminit vmemmap_verify(pte_t *pte, int node, -- cgit v1.2.3 From 832d7aa051106c927cae05ced29d3fd31459ed21 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:54:01 +0100 Subject: mm: optimize dev_pagemap reference counting around get_dev_pagemap Change the calling convention so that get_dev_pagemap always consumes the previous reference instead of doing this using an explicit earlier call to put_dev_pagemap in the callers. The callers will still need to put the final reference after finishing the loop over the pages. Signed-off-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Signed-off-by: Dan Williams --- kernel/memremap.c | 17 +++++++++-------- mm/gup.c | 7 +++++-- 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'mm') diff --git a/kernel/memremap.c b/kernel/memremap.c index 3df6cd4ffb40..891c77487a6a 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -507,22 +507,23 @@ struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) * @pfn: page frame number to lookup page_map * @pgmap: optional known pgmap that already has a reference * - * @pgmap allows the overhead of a lookup to be bypassed when @pfn lands in the - * same mapping. + * If @pgmap is non-NULL and covers @pfn it will be returned as-is. If @pgmap + * is non-NULL but does not cover @pfn the reference to it will be released. */ struct dev_pagemap *get_dev_pagemap(unsigned long pfn, struct dev_pagemap *pgmap) { - const struct resource *res = pgmap ? pgmap->res : NULL; resource_size_t phys = PFN_PHYS(pfn); /* - * In the cached case we're already holding a live reference so - * we can simply do a blind increment + * In the cached case we're already holding a live reference. */ - if (res && phys >= res->start && phys <= res->end) { - percpu_ref_get(pgmap->ref); - return pgmap; + if (pgmap) { + const struct resource *res = pgmap ? pgmap->res : NULL; + + if (res && phys >= res->start && phys <= res->end) + return pgmap; + put_dev_pagemap(pgmap); } /* fall back to slow path lookup */ diff --git a/mm/gup.c b/mm/gup.c index e0d82b6706d7..3affe7544b0c 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1410,7 +1410,6 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, VM_BUG_ON_PAGE(compound_head(page) != head, page); - put_dev_pagemap(pgmap); SetPageReferenced(page); pages[*nr] = page; (*nr)++; @@ -1420,6 +1419,8 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, ret = 1; pte_unmap: + if (pgmap) + put_dev_pagemap(pgmap); pte_unmap(ptem); return ret; } @@ -1459,10 +1460,12 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr, SetPageReferenced(page); pages[*nr] = page; get_page(page); - put_dev_pagemap(pgmap); (*nr)++; pfn++; } while (addr += PAGE_SIZE, addr != end); + + if (pgmap) + put_dev_pagemap(pgmap); return 1; } -- cgit v1.2.3 From e7744aa25cffe26d3767c9ffcf4e130cca1dff00 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 29 Dec 2017 08:54:04 +0100 Subject: memremap: drop private struct page_map 'struct page_map' is a private structure of 'struct dev_pagemap' but the latter replicates all the same fields as the former so there isn't much value in it. Thus drop it in favour of a completely public struct. This is a clean up in preperation for a more generally useful 'devm_memeremap_pages' interface. Signed-off-by: Logan Gunthorpe Signed-off-by: Christoph Hellwig Signed-off-by: Dan Williams --- include/linux/memremap.h | 5 ++-- kernel/memremap.c | 68 ++++++++++++++++++------------------------------ mm/hmm.c | 2 +- 3 files changed, 30 insertions(+), 45 deletions(-) (limited to 'mm') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 3fddcfe57bb0..1cb5f39d25c1 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -113,8 +113,9 @@ typedef void (*dev_page_free_t)(struct page *page, void *data); struct dev_pagemap { dev_page_fault_t page_fault; dev_page_free_t page_free; - struct vmem_altmap *altmap; - const struct resource *res; + struct vmem_altmap altmap; + bool altmap_valid; + struct resource res; struct percpu_ref *ref; struct device *dev; void *data; diff --git a/kernel/memremap.c b/kernel/memremap.c index 12e78528fea4..9207c44cce20 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -188,13 +188,6 @@ static RADIX_TREE(pgmap_radix, GFP_KERNEL); #define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1) #define SECTION_SIZE (1UL << PA_SECTION_SHIFT) -struct page_map { - struct resource res; - struct percpu_ref *ref; - struct dev_pagemap pgmap; - struct vmem_altmap altmap; -}; - static unsigned long order_at(struct resource *res, unsigned long pgoff) { unsigned long phys_pgoff = PHYS_PFN(res->start) + pgoff; @@ -260,22 +253,21 @@ static void pgmap_radix_release(struct resource *res) synchronize_rcu(); } -static unsigned long pfn_first(struct page_map *page_map) +static unsigned long pfn_first(struct dev_pagemap *pgmap) { - struct dev_pagemap *pgmap = &page_map->pgmap; - const struct resource *res = &page_map->res; - struct vmem_altmap *altmap = pgmap->altmap; + const struct resource *res = &pgmap->res; + struct vmem_altmap *altmap = &pgmap->altmap; unsigned long pfn; pfn = res->start >> PAGE_SHIFT; - if (altmap) + if (pgmap->altmap_valid) pfn += vmem_altmap_offset(altmap); return pfn; } -static unsigned long pfn_end(struct page_map *page_map) +static unsigned long pfn_end(struct dev_pagemap *pgmap) { - const struct resource *res = &page_map->res; + const struct resource *res = &pgmap->res; return (res->start + resource_size(res)) >> PAGE_SHIFT; } @@ -285,13 +277,12 @@ static unsigned long pfn_end(struct page_map *page_map) static void devm_memremap_pages_release(struct device *dev, void *data) { - struct page_map *page_map = data; - struct resource *res = &page_map->res; + struct dev_pagemap *pgmap = data; + struct resource *res = &pgmap->res; resource_size_t align_start, align_size; - struct dev_pagemap *pgmap = &page_map->pgmap; unsigned long pfn; - for_each_device_pfn(pfn, page_map) + for_each_device_pfn(pfn, pgmap) put_page(pfn_to_page(pfn)); if (percpu_ref_tryget_live(pgmap->ref)) { @@ -304,24 +295,22 @@ static void devm_memremap_pages_release(struct device *dev, void *data) align_size = ALIGN(resource_size(res), SECTION_SIZE); mem_hotplug_begin(); - arch_remove_memory(align_start, align_size, pgmap->altmap); + arch_remove_memory(align_start, align_size, pgmap->altmap_valid ? + &pgmap->altmap : NULL); mem_hotplug_done(); untrack_pfn(NULL, PHYS_PFN(align_start), align_size); pgmap_radix_release(res); - dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc, - "%s: failed to free all reserved pages\n", __func__); + dev_WARN_ONCE(dev, pgmap->altmap.alloc, + "%s: failed to free all reserved pages\n", __func__); } /* assumes rcu_read_lock() held at entry */ static struct dev_pagemap *find_dev_pagemap(resource_size_t phys) { - struct page_map *page_map; - WARN_ON_ONCE(!rcu_read_lock_held()); - page_map = radix_tree_lookup(&pgmap_radix, PHYS_PFN(phys)); - return page_map ? &page_map->pgmap : NULL; + return radix_tree_lookup(&pgmap_radix, PHYS_PFN(phys)); } /** @@ -349,7 +338,6 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, unsigned long pfn, pgoff, order; pgprot_t pgprot = PAGE_KERNEL; struct dev_pagemap *pgmap; - struct page_map *page_map; int error, nid, is_ram, i = 0; align_start = res->start & ~(SECTION_SIZE - 1); @@ -370,22 +358,20 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, if (!ref) return ERR_PTR(-EINVAL); - page_map = devres_alloc_node(devm_memremap_pages_release, - sizeof(*page_map), GFP_KERNEL, dev_to_node(dev)); - if (!page_map) + pgmap = devres_alloc_node(devm_memremap_pages_release, + sizeof(*pgmap), GFP_KERNEL, dev_to_node(dev)); + if (!pgmap) return ERR_PTR(-ENOMEM); - pgmap = &page_map->pgmap; - memcpy(&page_map->res, res, sizeof(*res)); + memcpy(&pgmap->res, res, sizeof(*res)); pgmap->dev = dev; if (altmap) { - memcpy(&page_map->altmap, altmap, sizeof(*altmap)); - pgmap->altmap = &page_map->altmap; - altmap = pgmap->altmap; + memcpy(&pgmap->altmap, altmap, sizeof(*altmap)); + pgmap->altmap_valid = true; + altmap = &pgmap->altmap; } pgmap->ref = ref; - pgmap->res = &page_map->res; pgmap->type = MEMORY_DEVICE_HOST; pgmap->page_fault = NULL; pgmap->page_free = NULL; @@ -397,7 +383,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, foreach_order_pgoff(res, order, pgoff) { error = __radix_tree_insert(&pgmap_radix, - PHYS_PFN(res->start) + pgoff, order, page_map); + PHYS_PFN(res->start) + pgoff, order, pgmap); if (error) { dev_err(dev, "%s: failed: %d\n", __func__, error); break; @@ -426,7 +412,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, if (error) goto err_add_memory; - for_each_device_pfn(pfn, page_map) { + for_each_device_pfn(pfn, pgmap) { struct page *page = pfn_to_page(pfn); /* @@ -441,7 +427,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, if (!(++i % 1024)) cond_resched(); } - devres_add(dev, page_map); + devres_add(dev, pgmap); return __va(res->start); err_add_memory: @@ -449,7 +435,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, err_pfn_remap: err_radix: pgmap_radix_release(res); - devres_free(page_map); + devres_free(pgmap); return ERR_PTR(error); } EXPORT_SYMBOL(devm_memremap_pages); @@ -482,9 +468,7 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn, * In the cached case we're already holding a live reference. */ if (pgmap) { - const struct resource *res = pgmap ? pgmap->res : NULL; - - if (res && phys >= res->start && phys <= res->end) + if (phys >= pgmap->res.start && phys <= pgmap->res.end) return pgmap; put_dev_pagemap(pgmap); } diff --git a/mm/hmm.c b/mm/hmm.c index 2f2e13c61040..320fdc87f064 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -882,7 +882,7 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem) else devmem->pagemap.type = MEMORY_DEVICE_PRIVATE; - devmem->pagemap.res = devmem->resource; + devmem->pagemap.res = *devmem->resource; devmem->pagemap.page_fault = hmm_devmem_fault; devmem->pagemap.page_free = hmm_devmem_free; devmem->pagemap.dev = devmem->device; -- cgit v1.2.3 From 785a3fab4adbf91b2189c928a59ae219c54ba95e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 23 Oct 2017 07:20:00 -0700 Subject: mm, dax: introduce pfn_t_special() In support of removing the VM_MIXEDMAP indication from DAX VMAs, introduce pfn_t_special() for drivers to indicate that _PAGE_SPECIAL should be used for DAX ptes. This also helps identify drivers like dccssblk that only want to use DAX in a read-only fashion without get_user_pages() support. Ideally we could delete axonram and dcssblk DAX support, but if we need to keep it better make it explicit that axonram and dcssblk only support a sub-set of DAX due to missing _PAGE_DEVMAP support. Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Dan Williams --- arch/powerpc/sysdev/axonram.c | 2 +- drivers/s390/block/dcssblk.c | 3 ++- include/linux/pfn_t.h | 13 +++++++++++++ mm/memory.c | 16 +++++++++++++++- 4 files changed, 31 insertions(+), 3 deletions(-) (limited to 'mm') diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index 1b307c80b401..cdbb0e59b3d3 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -151,7 +151,7 @@ __axon_ram_direct_access(struct axon_ram_bank *bank, pgoff_t pgoff, long nr_page resource_size_t offset = pgoff * PAGE_SIZE; *kaddr = (void *) bank->io_addr + offset; - *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV); + *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV|PFN_SPECIAL); return (bank->size - offset) / PAGE_SIZE; } diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 6aaefb780436..9cae08b36b80 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -916,7 +916,8 @@ __dcssblk_direct_access(struct dcssblk_dev_info *dev_info, pgoff_t pgoff, dev_sz = dev_info->end - dev_info->start + 1; *kaddr = (void *) dev_info->start + offset; - *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), PFN_DEV); + *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), + PFN_DEV|PFN_SPECIAL); return (dev_sz - offset) / PAGE_SIZE; } diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h index 43b1d7648e82..a03c2642a87c 100644 --- a/include/linux/pfn_t.h +++ b/include/linux/pfn_t.h @@ -15,8 +15,10 @@ #define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2)) #define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3)) #define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4)) +#define PFN_SPECIAL (1ULL << (BITS_PER_LONG_LONG - 5)) #define PFN_FLAGS_TRACE \ + { PFN_SPECIAL, "SPECIAL" }, \ { PFN_SG_CHAIN, "SG_CHAIN" }, \ { PFN_SG_LAST, "SG_LAST" }, \ { PFN_DEV, "DEV" }, \ @@ -120,4 +122,15 @@ pud_t pud_mkdevmap(pud_t pud); #endif #endif /* __HAVE_ARCH_PTE_DEVMAP */ +#ifdef __HAVE_ARCH_PTE_SPECIAL +static inline bool pfn_t_special(pfn_t pfn) +{ + return (pfn.val & PFN_SPECIAL) == PFN_SPECIAL; +} +#else +static inline bool pfn_t_special(pfn_t pfn) +{ + return false; +} +#endif /* __HAVE_ARCH_PTE_SPECIAL */ #endif /* _LINUX_PFN_T_H_ */ diff --git a/mm/memory.c b/mm/memory.c index ca5674cbaff2..46b6c33b7f04 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1897,12 +1897,26 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, } EXPORT_SYMBOL(vm_insert_pfn_prot); +static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn) +{ + /* these checks mirror the abort conditions in vm_normal_page */ + if (vma->vm_flags & VM_MIXEDMAP) + return true; + if (pfn_t_devmap(pfn)) + return true; + if (pfn_t_special(pfn)) + return true; + if (is_zero_pfn(pfn_t_to_pfn(pfn))) + return true; + return false; +} + static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn, bool mkwrite) { pgprot_t pgprot = vma->vm_page_prot; - BUG_ON(!(vma->vm_flags & VM_MIXEDMAP)); + BUG_ON(!vm_mixed_ok(vma, pfn)); if (addr < vma->vm_start || addr >= vma->vm_end) return -EFAULT; -- cgit v1.2.3