summaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c42
1 files changed, 37 insertions, 5 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bb0d3ac80ade..9b7a7d0592e5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -73,6 +73,7 @@ EXPORT_SYMBOL(memory_cgrp_subsys);
struct mem_cgroup *root_mem_cgroup __read_mostly;
+/* The number of times we should retry reclaim failures before giving up. */
#define MEM_CGROUP_RECLAIM_RETRIES 5
/* Socket memory accounting disabled? */
@@ -2363,18 +2364,23 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu)
return 0;
}
-static void reclaim_high(struct mem_cgroup *memcg,
- unsigned int nr_pages,
- gfp_t gfp_mask)
+static unsigned long reclaim_high(struct mem_cgroup *memcg,
+ unsigned int nr_pages,
+ gfp_t gfp_mask)
{
+ unsigned long nr_reclaimed = 0;
+
do {
if (page_counter_read(&memcg->memory) <=
READ_ONCE(memcg->memory.high))
continue;
memcg_memory_event(memcg, MEMCG_HIGH);
- try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
+ nr_reclaimed += try_to_free_mem_cgroup_pages(memcg, nr_pages,
+ gfp_mask, true);
} while ((memcg = parent_mem_cgroup(memcg)) &&
!mem_cgroup_is_root(memcg));
+
+ return nr_reclaimed;
}
static void high_work_func(struct work_struct *work)
@@ -2530,16 +2536,32 @@ void mem_cgroup_handle_over_high(void)
{
unsigned long penalty_jiffies;
unsigned long pflags;
+ unsigned long nr_reclaimed;
unsigned int nr_pages = current->memcg_nr_pages_over_high;
+ int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
struct mem_cgroup *memcg;
+ bool in_retry = false;
if (likely(!nr_pages))
return;
memcg = get_mem_cgroup_from_mm(current->mm);
- reclaim_high(memcg, nr_pages, GFP_KERNEL);
current->memcg_nr_pages_over_high = 0;
+retry_reclaim:
+ /*
+ * The allocating task should reclaim at least the batch size, but for
+ * subsequent retries we only want to do what's necessary to prevent oom
+ * or breaching resource isolation.
+ *
+ * This is distinct from memory.max or page allocator behaviour because
+ * memory.high is currently batched, whereas memory.max and the page
+ * allocator run every time an allocation is made.
+ */
+ nr_reclaimed = reclaim_high(memcg,
+ in_retry ? SWAP_CLUSTER_MAX : nr_pages,
+ GFP_KERNEL);
+
/*
* memory.high is breached and reclaim is unable to keep up. Throttle
* allocators proactively to slow down excessive growth.
@@ -2567,6 +2589,16 @@ void mem_cgroup_handle_over_high(void)
goto out;
/*
+ * If reclaim is making forward progress but we're still over
+ * memory.high, we want to encourage that rather than doing allocator
+ * throttling.
+ */
+ if (nr_reclaimed || nr_retries--) {
+ in_retry = true;
+ goto retry_reclaim;
+ }
+
+ /*
* If we exit early, we're guaranteed to die (since
* schedule_timeout_killable sets TASK_KILLABLE). This means we don't
* need to account for any ill-begotten jiffies to pay them off later.