diff options
author | Helge Deller <deller@gmx.de> | 2019-04-09 21:52:35 +0200 |
---|---|---|
committer | Helge Deller <deller@gmx.de> | 2019-05-03 23:47:40 +0200 |
commit | dbdf0760990583649bfaca75fd98f76afd5f3905 (patch) | |
tree | 7caa3387834d951c5c55aad14b1dfd78cd262257 | |
parent | 6b1370ae392b79ac3e2d053205f4107d4a55c102 (diff) | |
download | linux-dbdf0760990583649bfaca75fd98f76afd5f3905.tar.bz2 |
parisc: Switch from DISCONTIGMEM to SPARSEMEM
The commit 1c30844d2dfe ("mm: reclaim small amounts of memory when an
external fragmentation event occurs") breaks memory management on a
parisc c8000 workstation with this memory layout:
0) Start 0x0000000000000000 End 0x000000003fffffff Size 1024 MB
1) Start 0x0000000100000000 End 0x00000001bfdfffff Size 3070 MB
2) Start 0x0000004040000000 End 0x00000040ffffffff Size 3072 MB
With the patch 1c30844d2dfe, the kernel will incorrectly reclaim the
first zone when it fills up, ignoring the fact that there are two
completely free zones. Basiscally, it limits cache size to 1GiB.
The parisc kernel is currently using the DISCONTIGMEM implementation,
but isn't NUMA. Avoid this issue or strange work-arounds by switching to
the more commonly used SPARSEMEM implementation.
Reported-by: Mikulas Patocka <mpatocka@redhat.com>
Fixes: 1c30844d2dfe ("mm: reclaim small amounts of memory when an external fragmentation event occurs")
Signed-off-by: Helge Deller <deller@gmx.de>
-rw-r--r-- | arch/parisc/Kconfig | 12 | ||||
-rw-r--r-- | arch/parisc/include/asm/mmzone.h | 58 | ||||
-rw-r--r-- | arch/parisc/include/asm/page.h | 4 | ||||
-rw-r--r-- | arch/parisc/include/asm/sparsemem.h | 14 | ||||
-rw-r--r-- | arch/parisc/kernel/parisc_ksyms.c | 6 | ||||
-rw-r--r-- | arch/parisc/mm/init.c | 102 |
6 files changed, 68 insertions, 128 deletions
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index c8038165b81f..26c215570adf 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -36,6 +36,7 @@ config PARISC select GENERIC_STRNCPY_FROM_USER select SYSCTL_ARCH_UNALIGN_ALLOW select SYSCTL_EXCEPTION_TRACE + select ARCH_DISCARD_MEMBLOCK select HAVE_MOD_ARCH_SPECIFIC select VIRT_TO_BUS select MODULES_USE_ELF_RELA @@ -314,21 +315,16 @@ config ARCH_SELECT_MEMORY_MODEL def_bool y depends on 64BIT -config ARCH_DISCONTIGMEM_ENABLE +config ARCH_SPARSEMEM_ENABLE def_bool y depends on 64BIT config ARCH_FLATMEM_ENABLE def_bool y -config ARCH_DISCONTIGMEM_DEFAULT +config ARCH_SPARSEMEM_DEFAULT def_bool y - depends on ARCH_DISCONTIGMEM_ENABLE - -config NODES_SHIFT - int - default "3" - depends on NEED_MULTIPLE_NODES + depends on ARCH_SPARSEMEM_ENABLE source "kernel/Kconfig.hz" diff --git a/arch/parisc/include/asm/mmzone.h b/arch/parisc/include/asm/mmzone.h index fafa3893fd70..8d390406d862 100644 --- a/arch/parisc/include/asm/mmzone.h +++ b/arch/parisc/include/asm/mmzone.h @@ -2,62 +2,6 @@ #ifndef _PARISC_MMZONE_H #define _PARISC_MMZONE_H -#define MAX_PHYSMEM_RANGES 8 /* Fix the size for now (current known max is 3) */ +#define MAX_PHYSMEM_RANGES 4 /* Fix the size for now (current known max is 3) */ -#ifdef CONFIG_DISCONTIGMEM - -extern int npmem_ranges; - -struct node_map_data { - pg_data_t pg_data; -}; - -extern struct node_map_data node_data[]; - -#define NODE_DATA(nid) (&node_data[nid].pg_data) - -/* We have these possible memory map layouts: - * Astro: 0-3.75, 67.75-68, 4-64 - * zx1: 0-1, 257-260, 4-256 - * Stretch (N-class): 0-2, 4-32, 34-xxx - */ - -/* Since each 1GB can only belong to one region (node), we can create - * an index table for pfn to nid lookup; each entry in pfnnid_map - * represents 1GB, and contains the node that the memory belongs to. */ - -#define PFNNID_SHIFT (30 - PAGE_SHIFT) -#define PFNNID_MAP_MAX 512 /* support 512GB */ -extern signed char pfnnid_map[PFNNID_MAP_MAX]; - -#ifndef CONFIG_64BIT -#define pfn_is_io(pfn) ((pfn & (0xf0000000UL >> PAGE_SHIFT)) == (0xf0000000UL >> PAGE_SHIFT)) -#else -/* io can be 0xf0f0f0f0f0xxxxxx or 0xfffffffff0000000 */ -#define pfn_is_io(pfn) ((pfn & (0xf000000000000000UL >> PAGE_SHIFT)) == (0xf000000000000000UL >> PAGE_SHIFT)) -#endif - -static inline int pfn_to_nid(unsigned long pfn) -{ - unsigned int i; - - if (unlikely(pfn_is_io(pfn))) - return 0; - - i = pfn >> PFNNID_SHIFT; - BUG_ON(i >= ARRAY_SIZE(pfnnid_map)); - - return pfnnid_map[i]; -} - -static inline int pfn_valid(int pfn) -{ - int nid = pfn_to_nid(pfn); - - if (nid >= 0) - return (pfn < node_end_pfn(nid)); - return 0; -} - -#endif #endif /* _PARISC_MMZONE_H */ diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h index b77f49ce6220..93caf17ac5e2 100644 --- a/arch/parisc/include/asm/page.h +++ b/arch/parisc/include/asm/page.h @@ -147,9 +147,9 @@ extern int npmem_ranges; #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_SPARSEMEM #define pfn_valid(pfn) ((pfn) < max_mapnr) -#endif /* CONFIG_DISCONTIGMEM */ +#endif #ifdef CONFIG_HUGETLB_PAGE #define HPAGE_SHIFT PMD_SHIFT /* fixed for transparent huge pages */ diff --git a/arch/parisc/include/asm/sparsemem.h b/arch/parisc/include/asm/sparsemem.h new file mode 100644 index 000000000000..b5c3a79045b4 --- /dev/null +++ b/arch/parisc/include/asm/sparsemem.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ASM_PARISC_SPARSEMEM_H +#define ASM_PARISC_SPARSEMEM_H + +/* We have these possible memory map layouts: + * Astro: 0-3.75, 67.75-68, 4-64 + * zx1: 0-1, 257-260, 4-256 + * Stretch (N-class): 0-2, 4-32, 34-xxx + */ + +#define MAX_PHYSMEM_BITS 39 /* 512 GB */ +#define SECTION_SIZE_BITS 27 /* 128 MB */ + +#endif diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c index 7baa2265d439..174213b1716e 100644 --- a/arch/parisc/kernel/parisc_ksyms.c +++ b/arch/parisc/kernel/parisc_ksyms.c @@ -138,12 +138,6 @@ extern void $$dyncall(void); EXPORT_SYMBOL($$dyncall); #endif -#ifdef CONFIG_DISCONTIGMEM -#include <asm/mmzone.h> -EXPORT_SYMBOL(node_data); -EXPORT_SYMBOL(pfnnid_map); -#endif - #ifdef CONFIG_FUNCTION_TRACER extern void _mcount(void); EXPORT_SYMBOL(_mcount); diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index b2b52de2b82b..513f747b0d9d 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -32,6 +32,7 @@ #include <asm/mmzone.h> #include <asm/sections.h> #include <asm/msgbuf.h> +#include <asm/sparsemem.h> extern int data_start; extern void parisc_kernel_start(void); /* Kernel entry point in head.S */ @@ -48,11 +49,6 @@ pmd_t pmd0[PTRS_PER_PMD] __attribute__ ((__section__ (".data..vm0.pmd"), aligned pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__ ((__section__ (".data..vm0.pgd"), aligned(PAGE_SIZE))); pte_t pg0[PT_INITIAL * PTRS_PER_PTE] __attribute__ ((__section__ (".data..vm0.pte"), aligned(PAGE_SIZE))); -#ifdef CONFIG_DISCONTIGMEM -struct node_map_data node_data[MAX_NUMNODES] __read_mostly; -signed char pfnnid_map[PFNNID_MAP_MAX] __read_mostly; -#endif - static struct resource data_resource = { .name = "Kernel data", .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, @@ -76,11 +72,11 @@ static struct resource sysram_resources[MAX_PHYSMEM_RANGES] __read_mostly; * information retrieved in kernel/inventory.c. */ -physmem_range_t pmem_ranges[MAX_PHYSMEM_RANGES] __read_mostly; -int npmem_ranges __read_mostly; +physmem_range_t pmem_ranges[MAX_PHYSMEM_RANGES] __initdata; +int npmem_ranges __initdata; #ifdef CONFIG_64BIT -#define MAX_MEM (~0UL) +#define MAX_MEM (1UL << MAX_PHYSMEM_BITS) #else /* !CONFIG_64BIT */ #define MAX_MEM (3584U*1024U*1024U) #endif /* !CONFIG_64BIT */ @@ -119,7 +115,7 @@ static void __init mem_limit_func(void) static void __init setup_bootmem(void) { unsigned long mem_max; -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_SPARSEMEM physmem_range_t pmem_holes[MAX_PHYSMEM_RANGES - 1]; int npmem_holes; #endif @@ -137,23 +133,20 @@ static void __init setup_bootmem(void) int j; for (j = i; j > 0; j--) { - unsigned long tmp; + physmem_range_t tmp; if (pmem_ranges[j-1].start_pfn < pmem_ranges[j].start_pfn) { break; } - tmp = pmem_ranges[j-1].start_pfn; - pmem_ranges[j-1].start_pfn = pmem_ranges[j].start_pfn; - pmem_ranges[j].start_pfn = tmp; - tmp = pmem_ranges[j-1].pages; - pmem_ranges[j-1].pages = pmem_ranges[j].pages; - pmem_ranges[j].pages = tmp; + tmp = pmem_ranges[j-1]; + pmem_ranges[j-1] = pmem_ranges[j]; + pmem_ranges[j] = tmp; } } -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_SPARSEMEM /* * Throw out ranges that are too far apart (controlled by * MAX_GAP). @@ -165,7 +158,7 @@ static void __init setup_bootmem(void) pmem_ranges[i-1].pages) > MAX_GAP) { npmem_ranges = i; printk("Large gap in memory detected (%ld pages). " - "Consider turning on CONFIG_DISCONTIGMEM\n", + "Consider turning on CONFIG_SPARSEMEM\n", pmem_ranges[i].start_pfn - (pmem_ranges[i-1].start_pfn + pmem_ranges[i-1].pages)); @@ -230,9 +223,8 @@ static void __init setup_bootmem(void) printk(KERN_INFO "Total Memory: %ld MB\n",mem_max >> 20); -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_SPARSEMEM /* Merge the ranges, keeping track of the holes */ - { unsigned long end_pfn; unsigned long hole_pages; @@ -255,18 +247,6 @@ static void __init setup_bootmem(void) } #endif -#ifdef CONFIG_DISCONTIGMEM - for (i = 0; i < MAX_PHYSMEM_RANGES; i++) { - memset(NODE_DATA(i), 0, sizeof(pg_data_t)); - } - memset(pfnnid_map, 0xff, sizeof(pfnnid_map)); - - for (i = 0; i < npmem_ranges; i++) { - node_set_state(i, N_NORMAL_MEMORY); - node_set_online(i); - } -#endif - /* * Initialize and free the full range of memory in each range. */ @@ -314,7 +294,7 @@ static void __init setup_bootmem(void) memblock_reserve(__pa(KERNEL_BINARY_TEXT_START), (unsigned long)(_end - KERNEL_BINARY_TEXT_START)); -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_SPARSEMEM /* reserve the holes */ @@ -360,6 +340,9 @@ static void __init setup_bootmem(void) /* Initialize Page Deallocation Table (PDT) and check for bad memory. */ pdc_pdt_init(); + + memblock_allow_resize(); + memblock_dump_all(); } static int __init parisc_text_address(unsigned long vaddr) @@ -713,37 +696,46 @@ static void __init gateway_init(void) PAGE_SIZE, PAGE_GATEWAY, 1); } -void __init paging_init(void) +static void __init parisc_bootmem_free(void) { + unsigned long zones_size[MAX_NR_ZONES] = { 0, }; + unsigned long holes_size[MAX_NR_ZONES] = { 0, }; + unsigned long mem_start_pfn = ~0UL, mem_end_pfn = 0, mem_size_pfn = 0; int i; + for (i = 0; i < npmem_ranges; i++) { + unsigned long start = pmem_ranges[i].start_pfn; + unsigned long size = pmem_ranges[i].pages; + unsigned long end = start + size; + + if (mem_start_pfn > start) + mem_start_pfn = start; + if (mem_end_pfn < end) + mem_end_pfn = end; + mem_size_pfn += size; + } + + zones_size[0] = mem_end_pfn - mem_start_pfn; + holes_size[0] = zones_size[0] - mem_size_pfn; + + free_area_init_node(0, zones_size, mem_start_pfn, holes_size); +} + +void __init paging_init(void) +{ setup_bootmem(); pagetable_init(); gateway_init(); flush_cache_all_local(); /* start with known state */ flush_tlb_all_local(NULL); - for (i = 0; i < npmem_ranges; i++) { - unsigned long zones_size[MAX_NR_ZONES] = { 0, }; - - zones_size[ZONE_NORMAL] = pmem_ranges[i].pages; - -#ifdef CONFIG_DISCONTIGMEM - /* Need to initialize the pfnnid_map before we can initialize - the zone */ - { - int j; - for (j = (pmem_ranges[i].start_pfn >> PFNNID_SHIFT); - j <= ((pmem_ranges[i].start_pfn + pmem_ranges[i].pages) >> PFNNID_SHIFT); - j++) { - pfnnid_map[j] = i; - } - } -#endif - - free_area_init_node(i, zones_size, - pmem_ranges[i].start_pfn, NULL); - } + /* + * Mark all memblocks as present for sparsemem using + * memory_present() and then initialize sparsemem. + */ + memblocks_present(); + sparse_init(); + parisc_bootmem_free(); } #ifdef CONFIG_PA20 |