summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/mmzone.h1
-rw-r--r--mm/page_alloc.c72
-rw-r--r--mm/vmstat.c1
3 files changed, 64 insertions, 10 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index af4a3b77a8de..ac1ea796ec0f 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -105,6 +105,7 @@ struct zone_padding {
enum zone_stat_item {
/* First 128 byte cacheline (assuming 64 bit words) */
NR_FREE_PAGES,
+ NR_ALLOC_BATCH,
NR_LRU_BASE,
NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
NR_ACTIVE_ANON, /* " " " " " */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9884aa0f233a..544d19d681a2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1551,6 +1551,7 @@ again:
get_pageblock_migratetype(page));
}
+ __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
__count_zone_vm_events(PGALLOC, zone, 1 << order);
zone_statistics(preferred_zone, zone, gfp_flags);
local_irq_restore(flags);
@@ -1817,6 +1818,11 @@ static void zlc_clear_zones_full(struct zonelist *zonelist)
bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
}
+static bool zone_local(struct zone *local_zone, struct zone *zone)
+{
+ return node_distance(local_zone->node, zone->node) == LOCAL_DISTANCE;
+}
+
static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
{
return node_isset(local_zone->node, zone->zone_pgdat->reclaim_nodes);
@@ -1854,6 +1860,11 @@ static void zlc_clear_zones_full(struct zonelist *zonelist)
{
}
+static bool zone_local(struct zone *local_zone, struct zone *zone)
+{
+ return true;
+}
+
static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
{
return true;
@@ -1901,6 +1912,26 @@ zonelist_scan:
if (alloc_flags & ALLOC_NO_WATERMARKS)
goto try_this_zone;
/*
+ * Distribute pages in proportion to the individual
+ * zone size to ensure fair page aging. The zone a
+ * page was allocated in should have no effect on the
+ * time the page has in memory before being reclaimed.
+ *
+ * When zone_reclaim_mode is enabled, try to stay in
+ * local zones in the fastpath. If that fails, the
+ * slowpath is entered, which will do another pass
+ * starting with the local zones, but ultimately fall
+ * back to remote zones that do not partake in the
+ * fairness round-robin cycle of this zonelist.
+ */
+ if (alloc_flags & ALLOC_WMARK_LOW) {
+ if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
+ continue;
+ if (zone_reclaim_mode &&
+ !zone_local(preferred_zone, zone))
+ continue;
+ }
+ /*
* When allocating a page cache page for writing, we
* want to get it from a zone that is within its dirty
* limit, such that no single zone holds more than its
@@ -2346,16 +2377,30 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
return page;
}
-static inline
-void wake_all_kswapd(unsigned int order, struct zonelist *zonelist,
- enum zone_type high_zoneidx,
- enum zone_type classzone_idx)
+static void prepare_slowpath(gfp_t gfp_mask, unsigned int order,
+ struct zonelist *zonelist,
+ enum zone_type high_zoneidx,
+ struct zone *preferred_zone)
{
struct zoneref *z;
struct zone *zone;
- for_each_zone_zonelist(zone, z, zonelist, high_zoneidx)
- wakeup_kswapd(zone, order, classzone_idx);
+ for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
+ if (!(gfp_mask & __GFP_NO_KSWAPD))
+ wakeup_kswapd(zone, order, zone_idx(preferred_zone));
+ /*
+ * Only reset the batches of zones that were actually
+ * considered in the fast path, we don't want to
+ * thrash fairness information for zones that are not
+ * actually part of this zonelist's round-robin cycle.
+ */
+ if (zone_reclaim_mode && !zone_local(preferred_zone, zone))
+ continue;
+ mod_zone_page_state(zone, NR_ALLOC_BATCH,
+ high_wmark_pages(zone) -
+ low_wmark_pages(zone) -
+ zone_page_state(zone, NR_ALLOC_BATCH));
+ }
}
static inline int
@@ -2451,9 +2496,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
goto nopage;
restart:
- if (!(gfp_mask & __GFP_NO_KSWAPD))
- wake_all_kswapd(order, zonelist, high_zoneidx,
- zone_idx(preferred_zone));
+ prepare_slowpath(gfp_mask, order, zonelist,
+ high_zoneidx, preferred_zone);
/*
* OK, we're below the kswapd watermark and have kicked background
@@ -4753,8 +4797,11 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
spin_lock_init(&zone->lru_lock);
zone_seqlock_init(zone);
zone->zone_pgdat = pgdat;
-
zone_pcp_init(zone);
+
+ /* For bootup, initialized properly in watermark setup */
+ mod_zone_page_state(zone, NR_ALLOC_BATCH, zone->managed_pages);
+
lruvec_init(&zone->lruvec);
if (!size)
continue;
@@ -5525,6 +5572,11 @@ static void __setup_per_zone_wmarks(void)
zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2);
zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1);
+ __mod_zone_page_state(zone, NR_ALLOC_BATCH,
+ high_wmark_pages(zone) -
+ low_wmark_pages(zone) -
+ zone_page_state(zone, NR_ALLOC_BATCH));
+
setup_zone_migrate_reserve(zone);
spin_unlock_irqrestore(&zone->lock, flags);
}
diff --git a/mm/vmstat.c b/mm/vmstat.c
index ca06e9653827..8a8da1f9b044 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -703,6 +703,7 @@ static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
const char * const vmstat_text[] = {
/* Zoned VM counters */
"nr_free_pages",
+ "nr_alloc_batch",
"nr_inactive_anon",
"nr_active_anon",
"nr_inactive_file",