diff options
-rw-r--r-- | include/linux/memcontrol.h | 10 | ||||
-rw-r--r-- | mm/memcontrol.c | 163 | ||||
-rw-r--r-- | mm/vmscan.c | 62 |
3 files changed, 60 insertions, 175 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 6c416092e324..4b78661c68d0 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -180,9 +180,7 @@ static inline void mem_cgroup_dec_page_stat(struct page *page, mem_cgroup_update_page_stat(page, idx, -1); } -unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, - gfp_t gfp_mask, - unsigned long *total_scanned); +bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg); void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx); static inline void mem_cgroup_count_vm_event(struct mm_struct *mm, @@ -359,11 +357,9 @@ static inline void mem_cgroup_dec_page_stat(struct page *page, } static inline -unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, - gfp_t gfp_mask, - unsigned long *total_scanned) +bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg) { - return 0; + return false; } static inline void mem_cgroup_split_huge_fixup(struct page *head) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index fe6b9f96abdd..6c32271a31c5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2004,57 +2004,28 @@ static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap) } #endif -static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg, - struct zone *zone, - gfp_t gfp_mask, - unsigned long *total_scanned) -{ - struct mem_cgroup *victim = NULL; - int total = 0; - int loop = 0; - unsigned long excess; - unsigned long nr_scanned; - struct mem_cgroup_reclaim_cookie reclaim = { - .zone = zone, - .priority = 0, - }; - - excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT; - - while (1) { - victim = mem_cgroup_iter(root_memcg, victim, &reclaim); - if (!victim) { - loop++; - if (loop >= 2) { - /* - * If we have not been able to reclaim - * anything, it might because there are - * no reclaimable pages under this hierarchy - */ - if (!total) - break; - /* - * We want to do more targeted reclaim. - * excess >> 2 is not to excessive so as to - * reclaim too much, nor too less that we keep - * coming back to reclaim from this cgroup - */ - if (total >= (excess >> 2) || - (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS)) - break; - } - continue; - } - if (!mem_cgroup_reclaimable(victim, false)) - continue; - total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false, - zone, &nr_scanned); - *total_scanned += nr_scanned; - if (!res_counter_soft_limit_excess(&root_memcg->res)) - break; +/* + * A group is eligible for the soft limit reclaim if it is + * a) is over its soft limit + * b) any parent up the hierarchy is over its soft limit + */ +bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg) +{ + struct mem_cgroup *parent = memcg; + + if (res_counter_soft_limit_excess(&memcg->res)) + return true; + + /* + * If any parent up the hierarchy is over its soft limit then we + * have to obey and reclaim from this group as well. + */ + while((parent = parent_mem_cgroup(parent))) { + if (res_counter_soft_limit_excess(&parent->res)) + return true; } - mem_cgroup_iter_break(root_memcg, victim); - return total; + + return false; } /* @@ -4727,98 +4698,6 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, return ret; } -unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, - gfp_t gfp_mask, - unsigned long *total_scanned) -{ - unsigned long nr_reclaimed = 0; - struct mem_cgroup_per_zone *mz, *next_mz = NULL; - unsigned long reclaimed; - int loop = 0; - struct mem_cgroup_tree_per_zone *mctz; - unsigned long long excess; - unsigned long nr_scanned; - - if (order > 0) - return 0; - - mctz = soft_limit_tree_node_zone(zone_to_nid(zone), zone_idx(zone)); - /* - * This loop can run a while, specially if mem_cgroup's continuously - * keep exceeding their soft limit and putting the system under - * pressure - */ - do { - if (next_mz) - mz = next_mz; - else - mz = mem_cgroup_largest_soft_limit_node(mctz); - if (!mz) - break; - - nr_scanned = 0; - reclaimed = mem_cgroup_soft_reclaim(mz->memcg, zone, - gfp_mask, &nr_scanned); - nr_reclaimed += reclaimed; - *total_scanned += nr_scanned; - spin_lock(&mctz->lock); - - /* - * If we failed to reclaim anything from this memory cgroup - * it is time to move on to the next cgroup - */ - next_mz = NULL; - if (!reclaimed) { - do { - /* - * Loop until we find yet another one. - * - * By the time we get the soft_limit lock - * again, someone might have aded the - * group back on the RB tree. Iterate to - * make sure we get a different mem. - * mem_cgroup_largest_soft_limit_node returns - * NULL if no other cgroup is present on - * the tree - */ - next_mz = - __mem_cgroup_largest_soft_limit_node(mctz); - if (next_mz == mz) - css_put(&next_mz->memcg->css); - else /* next_mz == NULL or other memcg */ - break; - } while (1); - } - __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz); - excess = res_counter_soft_limit_excess(&mz->memcg->res); - /* - * One school of thought says that we should not add - * back the node to the tree if reclaim returns 0. - * But our reclaim could return 0, simply because due - * to priority we are exposing a smaller subset of - * memory to reclaim from. Consider this as a longer - * term TODO. - */ - /* If excess == 0, no tree ops */ - __mem_cgroup_insert_exceeded(mz->memcg, mz, mctz, excess); - spin_unlock(&mctz->lock); - css_put(&mz->memcg->css); - loop++; - /* - * Could not reclaim anything and there are no more - * mem cgroups to try or we seem to be looping without - * reclaiming anything. - */ - if (!nr_reclaimed && - (next_mz == NULL || - loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS)) - break; - } while (!nr_reclaimed); - if (next_mz) - css_put(&next_mz->memcg->css); - return nr_reclaimed; -} - /** * mem_cgroup_force_empty_list - clears LRU of a group * @memcg: group to clear diff --git a/mm/vmscan.c b/mm/vmscan.c index fe715daeb8bc..cf4643807ec2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -139,11 +139,21 @@ static bool global_reclaim(struct scan_control *sc) { return !sc->target_mem_cgroup; } + +static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc) +{ + return !mem_cgroup_disabled() && global_reclaim(sc); +} #else static bool global_reclaim(struct scan_control *sc) { return true; } + +static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc) +{ + return false; +} #endif unsigned long zone_reclaimable_pages(struct zone *zone) @@ -2130,7 +2140,8 @@ static inline bool should_continue_reclaim(struct zone *zone, } } -static void shrink_zone(struct zone *zone, struct scan_control *sc) +static void +__shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim) { unsigned long nr_reclaimed, nr_scanned; @@ -2149,6 +2160,12 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc) do { struct lruvec *lruvec; + if (soft_reclaim && + !mem_cgroup_soft_reclaim_eligible(memcg)) { + memcg = mem_cgroup_iter(root, memcg, &reclaim); + continue; + } + lruvec = mem_cgroup_zone_lruvec(zone, memcg); shrink_lruvec(lruvec, sc); @@ -2179,6 +2196,24 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc) sc->nr_scanned - nr_scanned, sc)); } + +static void shrink_zone(struct zone *zone, struct scan_control *sc) +{ + bool do_soft_reclaim = mem_cgroup_should_soft_reclaim(sc); + unsigned long nr_scanned = sc->nr_scanned; + + __shrink_zone(zone, sc, do_soft_reclaim); + + /* + * No group is over the soft limit or those that are do not have + * pages in the zone we are reclaiming so we have to reclaim everybody + */ + if (do_soft_reclaim && (sc->nr_scanned == nr_scanned)) { + __shrink_zone(zone, sc, false); + return; + } +} + /* Returns true if compaction should go ahead for a high-order request */ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) { @@ -2240,8 +2275,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) { struct zoneref *z; struct zone *zone; - unsigned long nr_soft_reclaimed; - unsigned long nr_soft_scanned; bool aborted_reclaim = false; /* @@ -2281,18 +2314,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) continue; } } - /* - * This steals pages from memory cgroups over softlimit - * and returns the number of reclaimed pages and - * scanned pages. This works for global memory pressure - * and balancing, not for a memcg's limit. - */ - nr_soft_scanned = 0; - nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone, - sc->order, sc->gfp_mask, - &nr_soft_scanned); - sc->nr_reclaimed += nr_soft_reclaimed; - sc->nr_scanned += nr_soft_scanned; /* need some check for avoid more shrink_zone() */ } @@ -2880,8 +2901,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, { int i; int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ - unsigned long nr_soft_reclaimed; - unsigned long nr_soft_scanned; struct scan_control sc = { .gfp_mask = GFP_KERNEL, .priority = DEF_PRIORITY, @@ -2996,15 +3015,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, sc.nr_scanned = 0; - nr_soft_scanned = 0; - /* - * Call soft limit reclaim before calling shrink_zone. - */ - nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone, - order, sc.gfp_mask, - &nr_soft_scanned); - sc.nr_reclaimed += nr_soft_reclaimed; - /* * There should be no need to raise the scanning * priority if enough pages are already being scanned |