summaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c234
1 files changed, 116 insertions, 118 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 532a2a750952..7bb23ff229b6 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -234,22 +234,39 @@ bool pgdat_reclaimable(struct pglist_data *pgdat)
pgdat_reclaimable_pages(pgdat) * 6;
}
-unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru)
+/**
+ * lruvec_lru_size - Returns the number of pages on the given LRU list.
+ * @lruvec: lru vector
+ * @lru: lru to use
+ * @zone_idx: zones to consider (use MAX_NR_ZONES for the whole LRU list)
+ */
+unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx)
{
+ unsigned long lru_size;
+ int zid;
+
if (!mem_cgroup_disabled())
- return mem_cgroup_get_lru_size(lruvec, lru);
+ lru_size = mem_cgroup_get_lru_size(lruvec, lru);
+ else
+ lru_size = node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru);
- return node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru);
-}
+ for (zid = zone_idx + 1; zid < MAX_NR_ZONES; zid++) {
+ struct zone *zone = &lruvec_pgdat(lruvec)->node_zones[zid];
+ unsigned long size;
-unsigned long lruvec_zone_lru_size(struct lruvec *lruvec, enum lru_list lru,
- int zone_idx)
-{
- if (!mem_cgroup_disabled())
- return mem_cgroup_get_zone_lru_size(lruvec, lru, zone_idx);
+ if (!managed_zone(zone))
+ continue;
+
+ if (!mem_cgroup_disabled())
+ size = mem_cgroup_get_zone_lru_size(lruvec, lru, zid);
+ else
+ size = zone_page_state(&lruvec_pgdat(lruvec)->node_zones[zid],
+ NR_ZONE_LRU_BASE + lru);
+ lru_size -= min(size, lru_size);
+ }
+
+ return lru_size;
- return zone_page_state(&lruvec_pgdat(lruvec)->node_zones[zone_idx],
- NR_ZONE_LRU_BASE + lru);
}
/*
@@ -912,6 +929,17 @@ static void page_check_dirty_writeback(struct page *page,
mapping->a_ops->is_dirty_writeback(page, dirty, writeback);
}
+struct reclaim_stat {
+ unsigned nr_dirty;
+ unsigned nr_unqueued_dirty;
+ unsigned nr_congested;
+ unsigned nr_writeback;
+ unsigned nr_immediate;
+ unsigned nr_activate;
+ unsigned nr_ref_keep;
+ unsigned nr_unmap_fail;
+};
+
/*
* shrink_page_list() returns the number of reclaimed pages
*/
@@ -919,22 +947,20 @@ static unsigned long shrink_page_list(struct list_head *page_list,
struct pglist_data *pgdat,
struct scan_control *sc,
enum ttu_flags ttu_flags,
- unsigned long *ret_nr_dirty,
- unsigned long *ret_nr_unqueued_dirty,
- unsigned long *ret_nr_congested,
- unsigned long *ret_nr_writeback,
- unsigned long *ret_nr_immediate,
+ struct reclaim_stat *stat,
bool force_reclaim)
{
LIST_HEAD(ret_pages);
LIST_HEAD(free_pages);
int pgactivate = 0;
- unsigned long nr_unqueued_dirty = 0;
- unsigned long nr_dirty = 0;
- unsigned long nr_congested = 0;
- unsigned long nr_reclaimed = 0;
- unsigned long nr_writeback = 0;
- unsigned long nr_immediate = 0;
+ unsigned nr_unqueued_dirty = 0;
+ unsigned nr_dirty = 0;
+ unsigned nr_congested = 0;
+ unsigned nr_reclaimed = 0;
+ unsigned nr_writeback = 0;
+ unsigned nr_immediate = 0;
+ unsigned nr_ref_keep = 0;
+ unsigned nr_unmap_fail = 0;
cond_resched();
@@ -1073,6 +1099,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
case PAGEREF_ACTIVATE:
goto activate_locked;
case PAGEREF_KEEP:
+ nr_ref_keep++;
goto keep_locked;
case PAGEREF_RECLAIM:
case PAGEREF_RECLAIM_CLEAN:
@@ -1110,6 +1137,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
(ttu_flags | TTU_BATCH_FLUSH | TTU_LZFREE) :
(ttu_flags | TTU_BATCH_FLUSH))) {
case SWAP_FAIL:
+ nr_unmap_fail++;
goto activate_locked;
case SWAP_AGAIN:
goto keep_locked;
@@ -1276,11 +1304,16 @@ keep:
list_splice(&ret_pages, page_list);
count_vm_events(PGACTIVATE, pgactivate);
- *ret_nr_dirty += nr_dirty;
- *ret_nr_congested += nr_congested;
- *ret_nr_unqueued_dirty += nr_unqueued_dirty;
- *ret_nr_writeback += nr_writeback;
- *ret_nr_immediate += nr_immediate;
+ if (stat) {
+ stat->nr_dirty = nr_dirty;
+ stat->nr_congested = nr_congested;
+ stat->nr_unqueued_dirty = nr_unqueued_dirty;
+ stat->nr_writeback = nr_writeback;
+ stat->nr_immediate = nr_immediate;
+ stat->nr_activate = pgactivate;
+ stat->nr_ref_keep = nr_ref_keep;
+ stat->nr_unmap_fail = nr_unmap_fail;
+ }
return nr_reclaimed;
}
@@ -1292,7 +1325,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
.priority = DEF_PRIORITY,
.may_unmap = 1,
};
- unsigned long ret, dummy1, dummy2, dummy3, dummy4, dummy5;
+ unsigned long ret;
struct page *page, *next;
LIST_HEAD(clean_pages);
@@ -1305,8 +1338,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
}
ret = shrink_page_list(&clean_pages, zone->zone_pgdat, &sc,
- TTU_UNMAP|TTU_IGNORE_ACCESS,
- &dummy1, &dummy2, &dummy3, &dummy4, &dummy5, true);
+ TTU_UNMAP|TTU_IGNORE_ACCESS, NULL, true);
list_splice(&clean_pages, page_list);
mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, -ret);
return ret;
@@ -1437,6 +1469,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
unsigned long nr_taken = 0;
unsigned long nr_zone_taken[MAX_NR_ZONES] = { 0 };
unsigned long nr_skipped[MAX_NR_ZONES] = { 0, };
+ unsigned long skipped = 0, total_skipped = 0;
unsigned long scan, nr_pages;
LIST_HEAD(pages_skipped);
@@ -1488,14 +1521,13 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
*/
if (!list_empty(&pages_skipped)) {
int zid;
- unsigned long total_skipped = 0;
for (zid = 0; zid < MAX_NR_ZONES; zid++) {
if (!nr_skipped[zid])
continue;
__count_zid_vm_events(PGSCAN_SKIP, zid, nr_skipped[zid]);
- total_skipped += nr_skipped[zid];
+ skipped += nr_skipped[zid];
}
/*
@@ -1503,13 +1535,13 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
* close to unreclaimable. If the LRU list is empty, account
* skipped pages as a full scan.
*/
- scan += list_empty(src) ? total_skipped : total_skipped >> 2;
+ total_skipped = list_empty(src) ? skipped : skipped >> 2;
list_splice(&pages_skipped, src);
}
- *nr_scanned = scan;
- trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, scan,
- nr_taken, mode, is_file_lru(lru));
+ *nr_scanned = scan + total_skipped;
+ trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan,
+ scan, skipped, nr_taken, mode, lru);
update_lru_sizes(lruvec, lru, nr_zone_taken);
return nr_taken;
}
@@ -1669,30 +1701,6 @@ static int current_may_throttle(void)
bdi_write_congested(current->backing_dev_info);
}
-static bool inactive_reclaimable_pages(struct lruvec *lruvec,
- struct scan_control *sc, enum lru_list lru)
-{
- int zid;
- struct zone *zone;
- int file = is_file_lru(lru);
- struct pglist_data *pgdat = lruvec_pgdat(lruvec);
-
- if (!global_reclaim(sc))
- return true;
-
- for (zid = sc->reclaim_idx; zid >= 0; zid--) {
- zone = &pgdat->node_zones[zid];
- if (!managed_zone(zone))
- continue;
-
- if (zone_page_state_snapshot(zone, NR_ZONE_LRU_BASE +
- LRU_FILE * file) >= SWAP_CLUSTER_MAX)
- return true;
- }
-
- return false;
-}
-
/*
* shrink_inactive_list() is a helper for shrink_node(). It returns the number
* of reclaimed pages
@@ -1705,19 +1713,12 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
unsigned long nr_scanned;
unsigned long nr_reclaimed = 0;
unsigned long nr_taken;
- unsigned long nr_dirty = 0;
- unsigned long nr_congested = 0;
- unsigned long nr_unqueued_dirty = 0;
- unsigned long nr_writeback = 0;
- unsigned long nr_immediate = 0;
+ struct reclaim_stat stat = {};
isolate_mode_t isolate_mode = 0;
int file = is_file_lru(lru);
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
- if (!inactive_reclaimable_pages(lruvec, sc, lru))
- return 0;
-
while (unlikely(too_many_isolated(pgdat, file, sc))) {
congestion_wait(BLK_RW_ASYNC, HZ/10);
@@ -1754,9 +1755,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
return 0;
nr_reclaimed = shrink_page_list(&page_list, pgdat, sc, TTU_UNMAP,
- &nr_dirty, &nr_unqueued_dirty, &nr_congested,
- &nr_writeback, &nr_immediate,
- false);
+ &stat, false);
spin_lock_irq(&pgdat->lru_lock);
@@ -1790,7 +1789,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
* of pages under pages flagged for immediate reclaim and stall if any
* are encountered in the nr_immediate check below.
*/
- if (nr_writeback && nr_writeback == nr_taken)
+ if (stat.nr_writeback && stat.nr_writeback == nr_taken)
set_bit(PGDAT_WRITEBACK, &pgdat->flags);
/*
@@ -1802,7 +1801,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
* Tag a zone as congested if all the dirty pages scanned were
* backed by a congested BDI and wait_iff_congested will stall.
*/
- if (nr_dirty && nr_dirty == nr_congested)
+ if (stat.nr_dirty && stat.nr_dirty == stat.nr_congested)
set_bit(PGDAT_CONGESTED, &pgdat->flags);
/*
@@ -1811,7 +1810,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
* the pgdat PGDAT_DIRTY and kswapd will start writing pages from
* reclaim context.
*/
- if (nr_unqueued_dirty == nr_taken)
+ if (stat.nr_unqueued_dirty == nr_taken)
set_bit(PGDAT_DIRTY, &pgdat->flags);
/*
@@ -1820,7 +1819,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
* that pages are cycling through the LRU faster than
* they are written so also forcibly stall.
*/
- if (nr_immediate && current_may_throttle())
+ if (stat.nr_immediate && current_may_throttle())
congestion_wait(BLK_RW_ASYNC, HZ/10);
}
@@ -1835,6 +1834,10 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id,
nr_scanned, nr_reclaimed,
+ stat.nr_dirty, stat.nr_writeback,
+ stat.nr_congested, stat.nr_immediate,
+ stat.nr_activate, stat.nr_ref_keep,
+ stat.nr_unmap_fail,
sc->priority, file);
return nr_reclaimed;
}
@@ -1855,17 +1858,19 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
*
* The downside is that we have to touch page->_refcount against each page.
* But we had to alter page->flags anyway.
+ *
+ * Returns the number of pages moved to the given lru.
*/
-static void move_active_pages_to_lru(struct lruvec *lruvec,
+static unsigned move_active_pages_to_lru(struct lruvec *lruvec,
struct list_head *list,
struct list_head *pages_to_free,
enum lru_list lru)
{
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
- unsigned long pgmoved = 0;
struct page *page;
int nr_pages;
+ int nr_moved = 0;
while (!list_empty(list)) {
page = lru_to_page(list);
@@ -1877,7 +1882,6 @@ static void move_active_pages_to_lru(struct lruvec *lruvec,
nr_pages = hpage_nr_pages(page);
update_lru_size(lruvec, lru, page_zonenum(page), nr_pages);
list_move(&page->lru, &lruvec->lists[lru]);
- pgmoved += nr_pages;
if (put_page_testzero(page)) {
__ClearPageLRU(page);
@@ -1891,11 +1895,15 @@ static void move_active_pages_to_lru(struct lruvec *lruvec,
spin_lock_irq(&pgdat->lru_lock);
} else
list_add(&page->lru, pages_to_free);
+ } else {
+ nr_moved += nr_pages;
}
}
if (!is_active_lru(lru))
- __count_vm_events(PGDEACTIVATE, pgmoved);
+ __count_vm_events(PGDEACTIVATE, nr_moved);
+
+ return nr_moved;
}
static void shrink_active_list(unsigned long nr_to_scan,
@@ -1911,7 +1919,8 @@ static void shrink_active_list(unsigned long nr_to_scan,
LIST_HEAD(l_inactive);
struct page *page;
struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
- unsigned long nr_rotated = 0;
+ unsigned nr_deactivate, nr_activate;
+ unsigned nr_rotated = 0;
isolate_mode_t isolate_mode = 0;
int file = is_file_lru(lru);
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
@@ -1989,13 +1998,15 @@ static void shrink_active_list(unsigned long nr_to_scan,
*/
reclaim_stat->recent_rotated[file] += nr_rotated;
- move_active_pages_to_lru(lruvec, &l_active, &l_hold, lru);
- move_active_pages_to_lru(lruvec, &l_inactive, &l_hold, lru - LRU_ACTIVE);
+ nr_activate = move_active_pages_to_lru(lruvec, &l_active, &l_hold, lru);
+ nr_deactivate = move_active_pages_to_lru(lruvec, &l_inactive, &l_hold, lru - LRU_ACTIVE);
__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
spin_unlock_irq(&pgdat->lru_lock);
mem_cgroup_uncharge_list(&l_hold);
free_hot_cold_page_list(&l_hold, true);
+ trace_mm_vmscan_lru_shrink_active(pgdat->node_id, nr_taken, nr_activate,
+ nr_deactivate, nr_rotated, sc->priority, file);
}
/*
@@ -2025,14 +2036,13 @@ static void shrink_active_list(unsigned long nr_to_scan,
* 10TB 320 32GB
*/
static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
- struct scan_control *sc)
+ struct scan_control *sc, bool trace)
{
unsigned long inactive_ratio;
- unsigned long inactive;
- unsigned long active;
+ unsigned long inactive, active;
+ enum lru_list inactive_lru = file * LRU_FILE;
+ enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE;
unsigned long gb;
- struct pglist_data *pgdat = lruvec_pgdat(lruvec);
- int zid;
/*
* If we don't have swap space, anonymous page deactivation
@@ -2041,27 +2051,8 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
if (!file && !total_swap_pages)
return false;
- inactive = lruvec_lru_size(lruvec, file * LRU_FILE);
- active = lruvec_lru_size(lruvec, file * LRU_FILE + LRU_ACTIVE);
-
- /*
- * For zone-constrained allocations, it is necessary to check if
- * deactivations are required for lowmem to be reclaimed. This
- * calculates the inactive/active pages available in eligible zones.
- */
- for (zid = sc->reclaim_idx + 1; zid < MAX_NR_ZONES; zid++) {
- struct zone *zone = &pgdat->node_zones[zid];
- unsigned long inactive_zone, active_zone;
-
- if (!managed_zone(zone))
- continue;
-
- inactive_zone = lruvec_zone_lru_size(lruvec, file * LRU_FILE, zid);
- active_zone = lruvec_zone_lru_size(lruvec, (file * LRU_FILE) + LRU_ACTIVE, zid);
-
- inactive -= min(inactive, inactive_zone);
- active -= min(active, active_zone);
- }
+ inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx);
+ active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx);
gb = (inactive + active) >> (30 - PAGE_SHIFT);
if (gb)
@@ -2069,6 +2060,13 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
else
inactive_ratio = 1;
+ if (trace)
+ trace_mm_vmscan_inactive_list_is_low(lruvec_pgdat(lruvec)->node_id,
+ sc->reclaim_idx,
+ lruvec_lru_size(lruvec, inactive_lru, MAX_NR_ZONES), inactive,
+ lruvec_lru_size(lruvec, active_lru, MAX_NR_ZONES), active,
+ inactive_ratio, file);
+
return inactive * inactive_ratio < active;
}
@@ -2076,7 +2074,7 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
struct lruvec *lruvec, struct scan_control *sc)
{
if (is_active_lru(lru)) {
- if (inactive_list_is_low(lruvec, is_file_lru(lru), sc))
+ if (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true))
shrink_active_list(nr_to_scan, lruvec, sc, lru);
return 0;
}
@@ -2207,8 +2205,8 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
* lruvec even if it has plenty of old anonymous pages unless the
* system is under heavy pressure.
*/
- if (!inactive_list_is_low(lruvec, true, sc) &&
- lruvec_lru_size(lruvec, LRU_INACTIVE_FILE) >> sc->priority) {
+ if (!inactive_list_is_low(lruvec, true, sc, false) &&
+ lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) {
scan_balance = SCAN_FILE;
goto out;
}
@@ -2234,10 +2232,10 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
* anon in [0], file in [1]
*/
- anon = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON) +
- lruvec_lru_size(lruvec, LRU_INACTIVE_ANON);
- file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE) +
- lruvec_lru_size(lruvec, LRU_INACTIVE_FILE);
+ anon = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON, MAX_NR_ZONES) +
+ lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, MAX_NR_ZONES);
+ file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES) +
+ lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, MAX_NR_ZONES);
spin_lock_irq(&pgdat->lru_lock);
if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
@@ -2275,7 +2273,7 @@ out:
unsigned long size;
unsigned long scan;
- size = lruvec_lru_size(lruvec, lru);
+ size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
scan = size >> sc->priority;
if (!scan && pass && force_scan)
@@ -2432,7 +2430,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc
* Even if we did not try to evict anon pages at all, we want to
* rebalance the anon lru active/inactive ratio.
*/
- if (inactive_list_is_low(lruvec, false, sc))
+ if (inactive_list_is_low(lruvec, false, sc, true))
shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
sc, LRU_ACTIVE_ANON);
}
@@ -3082,7 +3080,7 @@ static void age_active_anon(struct pglist_data *pgdat,
do {
struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
- if (inactive_list_is_low(lruvec, false, sc))
+ if (inactive_list_is_low(lruvec, false, sc, true))
shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
sc, LRU_ACTIVE_ANON);