summaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c158
1 files changed, 79 insertions, 79 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9d666df5ef95..ce63d603820f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -114,13 +114,6 @@ static DEFINE_SPINLOCK(managed_page_count_lock);
unsigned long totalram_pages __read_mostly;
unsigned long totalreserve_pages __read_mostly;
unsigned long totalcma_pages __read_mostly;
-/*
- * When calculating the number of globally allowed dirty pages, there
- * is a certain number of per-zone reserves that should not be
- * considered dirtyable memory. This is the sum of those reserves
- * over all existing zones that contribute dirtyable memory.
- */
-unsigned long dirty_balance_reserve __read_mostly;
int percpu_pagelist_fraction;
gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
@@ -812,7 +805,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
do {
int mt; /* migratetype of the to-be-freed page */
- page = list_entry(list->prev, struct page, lru);
+ page = list_last_entry(list, struct page, lru);
/* must delete as __free_one_page list manipulates */
list_del(&page->lru);
@@ -1417,11 +1410,10 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
/* Find a page of the appropriate size in the preferred list */
for (current_order = order; current_order < MAX_ORDER; ++current_order) {
area = &(zone->free_area[current_order]);
- if (list_empty(&area->free_list[migratetype]))
- continue;
-
- page = list_entry(area->free_list[migratetype].next,
+ page = list_first_entry_or_null(&area->free_list[migratetype],
struct page, lru);
+ if (!page)
+ continue;
list_del(&page->lru);
rmv_page_order(page);
area->nr_free--;
@@ -1700,12 +1692,12 @@ static void unreserve_highatomic_pageblock(const struct alloc_context *ac)
for (order = 0; order < MAX_ORDER; order++) {
struct free_area *area = &(zone->free_area[order]);
- if (list_empty(&area->free_list[MIGRATE_HIGHATOMIC]))
+ page = list_first_entry_or_null(
+ &area->free_list[MIGRATE_HIGHATOMIC],
+ struct page, lru);
+ if (!page)
continue;
- page = list_entry(area->free_list[MIGRATE_HIGHATOMIC].next,
- struct page, lru);
-
/*
* It should never happen but changes to locking could
* inadvertently allow a per-cpu drain to add pages
@@ -1753,7 +1745,7 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
if (fallback_mt == -1)
continue;
- page = list_entry(area->free_list[fallback_mt].next,
+ page = list_first_entry(&area->free_list[fallback_mt],
struct page, lru);
if (can_steal)
steal_suitable_fallback(zone, page, start_migratetype);
@@ -1788,7 +1780,7 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
* Call me with the zone->lock already held.
*/
static struct page *__rmqueue(struct zone *zone, unsigned int order,
- int migratetype, gfp_t gfp_flags)
+ int migratetype)
{
struct page *page;
@@ -1818,7 +1810,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
spin_lock(&zone->lock);
for (i = 0; i < count; ++i) {
- struct page *page = __rmqueue(zone, order, migratetype, 0);
+ struct page *page = __rmqueue(zone, order, migratetype);
if (unlikely(page == NULL))
break;
@@ -1988,7 +1980,7 @@ void mark_free_pages(struct zone *zone)
unsigned long pfn, max_zone_pfn;
unsigned long flags;
unsigned int order, t;
- struct list_head *curr;
+ struct page *page;
if (zone_is_empty(zone))
return;
@@ -1998,17 +1990,17 @@ void mark_free_pages(struct zone *zone)
max_zone_pfn = zone_end_pfn(zone);
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
if (pfn_valid(pfn)) {
- struct page *page = pfn_to_page(pfn);
-
+ page = pfn_to_page(pfn);
if (!swsusp_page_is_forbidden(page))
swsusp_unset_page_free(page);
}
for_each_migratetype_order(order, t) {
- list_for_each(curr, &zone->free_area[order].free_list[t]) {
+ list_for_each_entry(page,
+ &zone->free_area[order].free_list[t], lru) {
unsigned long i;
- pfn = page_to_pfn(list_entry(curr, struct page, lru));
+ pfn = page_to_pfn(page);
for (i = 0; i < (1UL << order); i++)
swsusp_set_page_free(pfn_to_page(pfn + i));
}
@@ -2212,9 +2204,9 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
}
if (cold)
- page = list_entry(list->prev, struct page, lru);
+ page = list_last_entry(list, struct page, lru);
else
- page = list_entry(list->next, struct page, lru);
+ page = list_first_entry(list, struct page, lru);
list_del(&page->lru);
pcp->count--;
@@ -2241,7 +2233,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
trace_mm_page_alloc_zone_locked(page, order, migratetype);
}
if (!page)
- page = __rmqueue(zone, order, migratetype, gfp_flags);
+ page = __rmqueue(zone, order, migratetype);
spin_unlock(&zone->lock);
if (!page)
goto failed;
@@ -2740,8 +2732,21 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
goto out;
}
/* Exhausted what can be done so it's blamo time */
- if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
+ if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) {
*did_some_progress = 1;
+
+ if (gfp_mask & __GFP_NOFAIL) {
+ page = get_page_from_freelist(gfp_mask, order,
+ ALLOC_NO_WATERMARKS|ALLOC_CPUSET, ac);
+ /*
+ * fallback to ignore cpuset restriction if our nodes
+ * are depleted
+ */
+ if (!page)
+ page = get_page_from_freelist(gfp_mask, order,
+ ALLOC_NO_WATERMARKS, ac);
+ }
+ }
out:
mutex_unlock(&oom_lock);
return page;
@@ -2876,28 +2881,6 @@ retry:
return page;
}
-/*
- * This is called in the allocator slow-path if the allocation request is of
- * sufficient urgency to ignore watermarks and take other desperate measures
- */
-static inline struct page *
-__alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
- const struct alloc_context *ac)
-{
- struct page *page;
-
- do {
- page = get_page_from_freelist(gfp_mask, order,
- ALLOC_NO_WATERMARKS, ac);
-
- if (!page && gfp_mask & __GFP_NOFAIL)
- wait_iff_congested(ac->preferred_zone, BLK_RW_ASYNC,
- HZ/50);
- } while (!page && (gfp_mask & __GFP_NOFAIL));
-
- return page;
-}
-
static void wake_all_kswapds(unsigned int order, const struct alloc_context *ac)
{
struct zoneref *z;
@@ -3042,28 +3025,36 @@ retry:
* allocations are system rather than user orientated
*/
ac->zonelist = node_zonelist(numa_node_id(), gfp_mask);
-
- page = __alloc_pages_high_priority(gfp_mask, order, ac);
-
- if (page) {
+ page = get_page_from_freelist(gfp_mask, order,
+ ALLOC_NO_WATERMARKS, ac);
+ if (page)
goto got_pg;
- }
}
/* Caller is not willing to reclaim, we can't balance anything */
if (!can_direct_reclaim) {
/*
- * All existing users of the deprecated __GFP_NOFAIL are
- * blockable, so warn of any new users that actually allow this
- * type of allocation to fail.
+ * All existing users of the __GFP_NOFAIL are blockable, so warn
+ * of any new users that actually allow this type of allocation
+ * to fail.
*/
WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL);
goto nopage;
}
/* Avoid recursion of direct reclaim */
- if (current->flags & PF_MEMALLOC)
+ if (current->flags & PF_MEMALLOC) {
+ /*
+ * __GFP_NOFAIL request from this context is rather bizarre
+ * because we cannot reclaim anything and only can loop waiting
+ * for somebody to do a work for us.
+ */
+ if (WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) {
+ cond_resched();
+ goto retry;
+ }
goto nopage;
+ }
/* Avoid allocations with no watermarks from looping endlessly */
if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL))
@@ -3402,7 +3393,8 @@ EXPORT_SYMBOL(__free_page_frag);
/*
* alloc_kmem_pages charges newly allocated pages to the kmem resource counter
- * of the current memory cgroup.
+ * of the current memory cgroup if __GFP_ACCOUNT is set, other than that it is
+ * equivalent to alloc_pages.
*
* It should be used when the caller would like to use kmalloc, but since the
* allocation is large, it has to fall back to the page allocator.
@@ -4147,8 +4139,7 @@ static void set_zonelist_order(void)
static void build_zonelists(pg_data_t *pgdat)
{
- int j, node, load;
- enum zone_type i;
+ int i, node, load;
nodemask_t used_mask;
int local_node, prev_node;
struct zonelist *zonelist;
@@ -4168,7 +4159,7 @@ static void build_zonelists(pg_data_t *pgdat)
nodes_clear(used_mask);
memset(node_order, 0, sizeof(node_order));
- j = 0;
+ i = 0;
while ((node = find_next_best_node(local_node, &used_mask)) >= 0) {
/*
@@ -4185,12 +4176,12 @@ static void build_zonelists(pg_data_t *pgdat)
if (order == ZONELIST_ORDER_NODE)
build_zonelists_in_node_order(pgdat, node);
else
- node_order[j++] = node; /* remember order */
+ node_order[i++] = node; /* remember order */
}
if (order == ZONELIST_ORDER_ZONE) {
/* calculate node order -- i.e., DMA last! */
- build_zonelists_in_zone_order(pgdat, j);
+ build_zonelists_in_zone_order(pgdat, i);
}
build_thisnode_zonelists(pgdat);
@@ -5956,20 +5947,12 @@ static void calculate_totalreserve_pages(void)
if (max > zone->managed_pages)
max = zone->managed_pages;
+
+ zone->totalreserve_pages = max;
+
reserve_pages += max;
- /*
- * Lowmem reserves are not available to
- * GFP_HIGHUSER page cache allocations and
- * kswapd tries to balance zones to their high
- * watermark. As a result, neither should be
- * regarded as dirtyable memory, to prevent a
- * situation where reclaim has to clean pages
- * in order to balance the zones.
- */
- zone->dirty_balance_reserve = max;
}
}
- dirty_balance_reserve = reserve_pages;
totalreserve_pages = reserve_pages;
}
@@ -6724,8 +6707,12 @@ int alloc_contig_range(unsigned long start, unsigned long end,
if (ret)
return ret;
+ /*
+ * In case of -EBUSY, we'd like to know which page causes problem.
+ * So, just fall through. We will check it in test_pages_isolated().
+ */
ret = __alloc_contig_migrate_range(&cc, start, end);
- if (ret)
+ if (ret && ret != -EBUSY)
goto done;
/*
@@ -6752,12 +6739,25 @@ int alloc_contig_range(unsigned long start, unsigned long end,
outer_start = start;
while (!PageBuddy(pfn_to_page(outer_start))) {
if (++order >= MAX_ORDER) {
- ret = -EBUSY;
- goto done;
+ outer_start = start;
+ break;
}
outer_start &= ~0UL << order;
}
+ if (outer_start != start) {
+ order = page_order(pfn_to_page(outer_start));
+
+ /*
+ * outer_start page could be small order buddy page and
+ * it doesn't include start page. Adjust outer_start
+ * in this case to report failed page properly
+ * on tracepoint in test_pages_isolated()
+ */
+ if (outer_start + (1UL << order) <= start)
+ outer_start = start;
+ }
+
/* Make sure the range is really isolated. */
if (test_pages_isolated(outer_start, end, false)) {
pr_info("%s: [%lx, %lx) PFNs busy\n",