From 3e38e0aaca9eafb12b1c4b731d1c10975cbe7974 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 11 Aug 2020 18:30:25 -0700 Subject: mm: memcg: charge memcg percpu memory to the parent cgroup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Memory cgroups are using large chunks of percpu memory to store vmstat data. Yet this memory is not accounted at all, so in the case when there are many (dying) cgroups, it's not exactly clear where all the memory is. Because the size of memory cgroup internal structures can dramatically exceed the size of object or page which is pinning it in the memory, it's not a good idea to simply ignore it. It actually breaks the isolation between cgroups. Let's account the consumed percpu memory to the parent cgroup. [guro@fb.com: add WARN_ON_ONCE()s, per Johannes] Link: http://lkml.kernel.org/r/20200811170611.GB1507044@carbon.DHCP.thefacebook.com Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Acked-by: Dennis Zhou Acked-by: Johannes Weiner Cc: Christoph Lameter Cc: David Rientjes Cc: Joonsoo Kim Cc: Mel Gorman Cc: Michal Hocko Cc: Pekka Enberg Cc: Tejun Heo Cc: Tobin C. Harding Cc: Vlastimil Babka Cc: Waiman Long Cc: Bixuan Cui Cc: Michal Koutný Cc: Stephen Rothwell Link: http://lkml.kernel.org/r/20200623184515.4132564-5-guro@fb.com Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 36d5300f9b69..f1fd265b9f9e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5131,13 +5131,18 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) if (!pn) return 1; - pn->lruvec_stat_local = alloc_percpu(struct lruvec_stat); + /* We charge the parent cgroup, never the current task */ + WARN_ON_ONCE(!current->active_memcg); + + pn->lruvec_stat_local = alloc_percpu_gfp(struct lruvec_stat, + GFP_KERNEL_ACCOUNT); if (!pn->lruvec_stat_local) { kfree(pn); return 1; } - pn->lruvec_stat_cpu = alloc_percpu(struct lruvec_stat); + pn->lruvec_stat_cpu = alloc_percpu_gfp(struct lruvec_stat, + GFP_KERNEL_ACCOUNT); if (!pn->lruvec_stat_cpu) { free_percpu(pn->lruvec_stat_local); kfree(pn); @@ -5211,11 +5216,16 @@ static struct mem_cgroup *mem_cgroup_alloc(void) goto fail; } - memcg->vmstats_local = alloc_percpu(struct memcg_vmstats_percpu); + /* We charge the parent cgroup, never the current task */ + WARN_ON_ONCE(!current->active_memcg); + + memcg->vmstats_local = alloc_percpu_gfp(struct memcg_vmstats_percpu, + GFP_KERNEL_ACCOUNT); if (!memcg->vmstats_local) goto fail; - memcg->vmstats_percpu = alloc_percpu(struct memcg_vmstats_percpu); + memcg->vmstats_percpu = alloc_percpu_gfp(struct memcg_vmstats_percpu, + GFP_KERNEL_ACCOUNT); if (!memcg->vmstats_percpu) goto fail; @@ -5264,7 +5274,9 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) struct mem_cgroup *memcg; long error = -ENOMEM; + memalloc_use_memcg(parent); memcg = mem_cgroup_alloc(); + memalloc_unuse_memcg(); if (IS_ERR(memcg)) return ERR_CAST(memcg); -- cgit v1.2.3