diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-26 14:04:32 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-26 14:04:32 -0700 |
commit | c3bf9bc243092c53946fd6d8ebd6dc2f4e572d48 (patch) | |
tree | 4cabbf33e11e3a71b64394b24fe70453f41cefe8 /mm | |
parent | e3505dd50caf54e6f81f897cb347441409974a15 (diff) | |
parent | c2b91e2eec9678dbda274e906cc32ea8f711da3b (diff) | |
download | linux-c3bf9bc243092c53946fd6d8ebd6dc2f4e572d48.tar.bz2 |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86-bigbox-bootmem-v3
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86-bigbox-bootmem-v3:
x86_64/mm: check and print vmemmap allocation continuous
x86_64: fix setup_node_bootmem to support big mem excluding with memmap
x86_64: make reserve_bootmem_generic() use new reserve_bootmem()
mm: allow reserve_bootmem() cross nodes
mm: offset align in alloc_bootmem()
mm: fix alloc_bootmem_core to use fast searching for all nodes
mm: make mem_map allocation continuous
Diffstat (limited to 'mm')
-rw-r--r-- | mm/bootmem.c | 164 | ||||
-rw-r--r-- | mm/sparse.c | 37 |
2 files changed, 146 insertions, 55 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c index 2ccea700968f..b6791646143e 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -111,44 +111,74 @@ static unsigned long __init init_bootmem_core(pg_data_t *pgdat, * might be used for boot-time allocations - or it might get added * to the free page pool later on. */ -static int __init reserve_bootmem_core(bootmem_data_t *bdata, +static int __init can_reserve_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size, int flags) { unsigned long sidx, eidx; unsigned long i; - int ret; + + BUG_ON(!size); + + /* out of range, don't hold other */ + if (addr + size < bdata->node_boot_start || + PFN_DOWN(addr) > bdata->node_low_pfn) + return 0; /* - * round up, partially reserved pages are considered - * fully reserved. + * Round up to index to the range. */ + if (addr > bdata->node_boot_start) + sidx= PFN_DOWN(addr - bdata->node_boot_start); + else + sidx = 0; + + eidx = PFN_UP(addr + size - bdata->node_boot_start); + if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start)) + eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start); + + for (i = sidx; i < eidx; i++) { + if (test_bit(i, bdata->node_bootmem_map)) { + if (flags & BOOTMEM_EXCLUSIVE) + return -EBUSY; + } + } + + return 0; + +} + +static void __init reserve_bootmem_core(bootmem_data_t *bdata, + unsigned long addr, unsigned long size, int flags) +{ + unsigned long sidx, eidx; + unsigned long i; + BUG_ON(!size); - BUG_ON(PFN_DOWN(addr) >= bdata->node_low_pfn); - BUG_ON(PFN_UP(addr + size) > bdata->node_low_pfn); - BUG_ON(addr < bdata->node_boot_start); - sidx = PFN_DOWN(addr - bdata->node_boot_start); + /* out of range */ + if (addr + size < bdata->node_boot_start || + PFN_DOWN(addr) > bdata->node_low_pfn) + return; + + /* + * Round up to index to the range. + */ + if (addr > bdata->node_boot_start) + sidx= PFN_DOWN(addr - bdata->node_boot_start); + else + sidx = 0; + eidx = PFN_UP(addr + size - bdata->node_boot_start); + if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start)) + eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start); - for (i = sidx; i < eidx; i++) + for (i = sidx; i < eidx; i++) { if (test_and_set_bit(i, bdata->node_bootmem_map)) { #ifdef CONFIG_DEBUG_BOOTMEM printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE); #endif - if (flags & BOOTMEM_EXCLUSIVE) { - ret = -EBUSY; - goto err; - } } - - return 0; - -err: - /* unreserve memory we accidentally reserved */ - for (i--; i >= sidx; i--) - clear_bit(i, bdata->node_bootmem_map); - - return ret; + } } static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, @@ -206,9 +236,11 @@ void * __init __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, unsigned long align, unsigned long goal, unsigned long limit) { - unsigned long offset, remaining_size, areasize, preferred; + unsigned long areasize, preferred; unsigned long i, start = 0, incr, eidx, end_pfn; void *ret; + unsigned long node_boot_start; + void *node_bootmem_map; if (!size) { printk("__alloc_bootmem_core(): zero-sized request\n"); @@ -216,70 +248,83 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, } BUG_ON(align & (align-1)); - if (limit && bdata->node_boot_start >= limit) - return NULL; - /* on nodes without memory - bootmem_map is NULL */ if (!bdata->node_bootmem_map) return NULL; + /* bdata->node_boot_start is supposed to be (12+6)bits alignment on x86_64 ? */ + node_boot_start = bdata->node_boot_start; + node_bootmem_map = bdata->node_bootmem_map; + if (align) { + node_boot_start = ALIGN(bdata->node_boot_start, align); + if (node_boot_start > bdata->node_boot_start) + node_bootmem_map = (unsigned long *)bdata->node_bootmem_map + + PFN_DOWN(node_boot_start - bdata->node_boot_start)/BITS_PER_LONG; + } + + if (limit && node_boot_start >= limit) + return NULL; + end_pfn = bdata->node_low_pfn; limit = PFN_DOWN(limit); if (limit && end_pfn > limit) end_pfn = limit; - eidx = end_pfn - PFN_DOWN(bdata->node_boot_start); - offset = 0; - if (align && (bdata->node_boot_start & (align - 1UL)) != 0) - offset = align - (bdata->node_boot_start & (align - 1UL)); - offset = PFN_DOWN(offset); + eidx = end_pfn - PFN_DOWN(node_boot_start); /* * We try to allocate bootmem pages above 'goal' * first, then we try to allocate lower pages. */ - if (goal && goal >= bdata->node_boot_start && PFN_DOWN(goal) < end_pfn) { - preferred = goal - bdata->node_boot_start; + preferred = 0; + if (goal && PFN_DOWN(goal) < end_pfn) { + if (goal > node_boot_start) + preferred = goal - node_boot_start; - if (bdata->last_success >= preferred) + if (bdata->last_success > node_boot_start && + bdata->last_success - node_boot_start >= preferred) if (!limit || (limit && limit > bdata->last_success)) - preferred = bdata->last_success; - } else - preferred = 0; + preferred = bdata->last_success - node_boot_start; + } - preferred = PFN_DOWN(ALIGN(preferred, align)) + offset; + preferred = PFN_DOWN(ALIGN(preferred, align)); areasize = (size + PAGE_SIZE-1) / PAGE_SIZE; incr = align >> PAGE_SHIFT ? : 1; restart_scan: - for (i = preferred; i < eidx; i += incr) { + for (i = preferred; i < eidx;) { unsigned long j; - i = find_next_zero_bit(bdata->node_bootmem_map, eidx, i); + + i = find_next_zero_bit(node_bootmem_map, eidx, i); i = ALIGN(i, incr); if (i >= eidx) break; - if (test_bit(i, bdata->node_bootmem_map)) + if (test_bit(i, node_bootmem_map)) { + i += incr; continue; + } for (j = i + 1; j < i + areasize; ++j) { if (j >= eidx) goto fail_block; - if (test_bit(j, bdata->node_bootmem_map)) + if (test_bit(j, node_bootmem_map)) goto fail_block; } start = i; goto found; fail_block: i = ALIGN(j, incr); + if (i == j) + i += incr; } - if (preferred > offset) { - preferred = offset; + if (preferred > 0) { + preferred = 0; goto restart_scan; } return NULL; found: - bdata->last_success = PFN_PHYS(start); + bdata->last_success = PFN_PHYS(start) + node_boot_start; BUG_ON(start >= eidx); /* @@ -289,6 +334,7 @@ found: */ if (align < PAGE_SIZE && bdata->last_offset && bdata->last_pos+1 == start) { + unsigned long offset, remaining_size; offset = ALIGN(bdata->last_offset, align); BUG_ON(offset > PAGE_SIZE); remaining_size = PAGE_SIZE - offset; @@ -297,14 +343,12 @@ found: /* last_pos unchanged */ bdata->last_offset = offset + size; ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + - offset + - bdata->node_boot_start); + offset + node_boot_start); } else { remaining_size = size - remaining_size; areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE; ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + - offset + - bdata->node_boot_start); + offset + node_boot_start); bdata->last_pos = start + areasize - 1; bdata->last_offset = remaining_size; } @@ -312,14 +356,14 @@ found: } else { bdata->last_pos = start + areasize - 1; bdata->last_offset = size & ~PAGE_MASK; - ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start); + ret = phys_to_virt(start * PAGE_SIZE + node_boot_start); } /* * Reserve the area now: */ for (i = start; i < start + areasize; i++) - if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map))) + if (unlikely(test_and_set_bit(i, node_bootmem_map))) BUG(); memset(ret, 0, size); return ret; @@ -401,6 +445,11 @@ unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn, void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, unsigned long size, int flags) { + int ret; + + ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); + if (ret < 0) + return; reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); } @@ -426,7 +475,18 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages) int __init reserve_bootmem(unsigned long addr, unsigned long size, int flags) { - return reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size, flags); + bootmem_data_t *bdata; + int ret; + + list_for_each_entry(bdata, &bdata_list, list) { + ret = can_reserve_bootmem_core(bdata, addr, size, flags); + if (ret < 0) + return ret; + } + list_for_each_entry(bdata, &bdata_list, list) + reserve_bootmem_core(bdata, addr, size, flags); + + return 0; } #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ diff --git a/mm/sparse.c b/mm/sparse.c index 98d6b39c3472..7e9191381f86 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -295,6 +295,9 @@ struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) return NULL; } +void __attribute__((weak)) __meminit vmemmap_populate_print_last(void) +{ +} /* * Allocate the accumulated non-linear sections, allocate a mem_map * for each and record the physical to section mapping. @@ -304,22 +307,50 @@ void __init sparse_init(void) unsigned long pnum; struct page *map; unsigned long *usemap; + unsigned long **usemap_map; + int size; + + /* + * map is using big page (aka 2M in x86 64 bit) + * usemap is less one page (aka 24 bytes) + * so alloc 2M (with 2M align) and 24 bytes in turn will + * make next 2M slip to one more 2M later. + * then in big system, the memory will have a lot of holes... + * here try to allocate 2M pages continously. + * + * powerpc need to call sparse_init_one_section right after each + * sparse_early_mem_map_alloc, so allocate usemap_map at first. + */ + size = sizeof(unsigned long *) * NR_MEM_SECTIONS; + usemap_map = alloc_bootmem(size); + if (!usemap_map) + panic("can not allocate usemap_map\n"); for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { if (!present_section_nr(pnum)) continue; + usemap_map[pnum] = sparse_early_usemap_alloc(pnum); + } - map = sparse_early_mem_map_alloc(pnum); - if (!map) + for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { + if (!present_section_nr(pnum)) continue; - usemap = sparse_early_usemap_alloc(pnum); + usemap = usemap_map[pnum]; if (!usemap) continue; + map = sparse_early_mem_map_alloc(pnum); + if (!map) + continue; + sparse_init_one_section(__nr_to_section(pnum), pnum, map, usemap); } + + vmemmap_populate_print_last(); + + free_bootmem(__pa(usemap_map), size); } #ifdef CONFIG_MEMORY_HOTPLUG |