diff options
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 94 |
1 files changed, 74 insertions, 20 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 7a30150b4dee..e8cb983a8c84 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2006,6 +2006,8 @@ static void shrink_active_list(unsigned long nr_to_scan, * Both inactive lists should also be large enough that each inactive * page has a chance to be referenced again before it is reclaimed. * + * If that fails and refaulting is observed, the inactive list grows. + * * The inactive_ratio is the target ratio of ACTIVE to INACTIVE pages * on this LRU, maintained by the pageout code. A zone->inactive_ratio * of 3 means 3:1 or 25% of the pages are kept on the inactive list. @@ -2022,12 +2024,15 @@ static void shrink_active_list(unsigned long nr_to_scan, * 10TB 320 32GB */ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, - struct scan_control *sc, bool trace) + struct mem_cgroup *memcg, + struct scan_control *sc, bool actual_reclaim) { - unsigned long inactive_ratio; - unsigned long inactive, active; - enum lru_list inactive_lru = file * LRU_FILE; enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE; + struct pglist_data *pgdat = lruvec_pgdat(lruvec); + enum lru_list inactive_lru = file * LRU_FILE; + unsigned long inactive, active; + unsigned long inactive_ratio; + unsigned long refaults; unsigned long gb; /* @@ -2040,27 +2045,43 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx); active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx); - gb = (inactive + active) >> (30 - PAGE_SHIFT); - if (gb) - inactive_ratio = int_sqrt(10 * gb); + if (memcg) + refaults = mem_cgroup_read_stat(memcg, + MEMCG_WORKINGSET_ACTIVATE); else - inactive_ratio = 1; + refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE); + + /* + * When refaults are being observed, it means a new workingset + * is being established. Disable active list protection to get + * rid of the stale workingset quickly. + */ + if (file && actual_reclaim && lruvec->refaults != refaults) { + inactive_ratio = 0; + } else { + gb = (inactive + active) >> (30 - PAGE_SHIFT); + if (gb) + inactive_ratio = int_sqrt(10 * gb); + else + inactive_ratio = 1; + } - if (trace) - trace_mm_vmscan_inactive_list_is_low(lruvec_pgdat(lruvec)->node_id, - sc->reclaim_idx, - lruvec_lru_size(lruvec, inactive_lru, MAX_NR_ZONES), inactive, - lruvec_lru_size(lruvec, active_lru, MAX_NR_ZONES), active, - inactive_ratio, file); + if (actual_reclaim) + trace_mm_vmscan_inactive_list_is_low(pgdat->node_id, sc->reclaim_idx, + lruvec_lru_size(lruvec, inactive_lru, MAX_NR_ZONES), inactive, + lruvec_lru_size(lruvec, active_lru, MAX_NR_ZONES), active, + inactive_ratio, file); return inactive * inactive_ratio < active; } static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, - struct lruvec *lruvec, struct scan_control *sc) + struct lruvec *lruvec, struct mem_cgroup *memcg, + struct scan_control *sc) { if (is_active_lru(lru)) { - if (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true)) + if (inactive_list_is_low(lruvec, is_file_lru(lru), + memcg, sc, true)) shrink_active_list(nr_to_scan, lruvec, sc, lru); return 0; } @@ -2169,7 +2190,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, * lruvec even if it has plenty of old anonymous pages unless the * system is under heavy pressure. */ - if (!inactive_list_is_low(lruvec, true, sc, false) && + if (!inactive_list_is_low(lruvec, true, memcg, sc, false) && lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) { scan_balance = SCAN_FILE; goto out; @@ -2320,7 +2341,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc nr[lru] -= nr_to_scan; nr_reclaimed += shrink_list(lru, nr_to_scan, - lruvec, sc); + lruvec, memcg, sc); } } @@ -2387,7 +2408,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc * Even if we did not try to evict anon pages at all, we want to * rebalance the anon lru active/inactive ratio. */ - if (inactive_list_is_low(lruvec, false, sc, true)) + if (inactive_list_is_low(lruvec, false, memcg, sc, true)) shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON); } @@ -2703,6 +2724,26 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) sc->gfp_mask = orig_mask; } +static void snapshot_refaults(struct mem_cgroup *root_memcg, pg_data_t *pgdat) +{ + struct mem_cgroup *memcg; + + memcg = mem_cgroup_iter(root_memcg, NULL, NULL); + do { + unsigned long refaults; + struct lruvec *lruvec; + + if (memcg) + refaults = mem_cgroup_read_stat(memcg, + MEMCG_WORKINGSET_ACTIVATE); + else + refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE); + + lruvec = mem_cgroup_lruvec(pgdat, memcg); + lruvec->refaults = refaults; + } while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL))); +} + /* * This is the main entry point to direct page reclaim. * @@ -2723,6 +2764,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, struct scan_control *sc) { int initial_priority = sc->priority; + pg_data_t *last_pgdat; + struct zoneref *z; + struct zone *zone; retry: delayacct_freepages_start(); @@ -2749,6 +2793,15 @@ retry: sc->may_writepage = 1; } while (--sc->priority >= 0); + last_pgdat = NULL; + for_each_zone_zonelist_nodemask(zone, z, zonelist, sc->reclaim_idx, + sc->nodemask) { + if (zone->zone_pgdat == last_pgdat) + continue; + last_pgdat = zone->zone_pgdat; + snapshot_refaults(sc->target_mem_cgroup, zone->zone_pgdat); + } + delayacct_freepages_end(); if (sc->nr_reclaimed) @@ -3033,7 +3086,7 @@ static void age_active_anon(struct pglist_data *pgdat, do { struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg); - if (inactive_list_is_low(lruvec, false, sc, true)) + if (inactive_list_is_low(lruvec, false, memcg, sc, true)) shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON); @@ -3280,6 +3333,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) pgdat->kswapd_failures++; out: + snapshot_refaults(NULL, pgdat); /* * Return the order kswapd stopped reclaiming at as * prepare_kswapd_sleep() takes it into account. If another caller |