summaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c210
1 files changed, 23 insertions, 187 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f4ec4e7ca4cd..3508777837c7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -204,50 +204,6 @@ struct mem_cgroup_eventfd_list {
static void mem_cgroup_threshold(struct mem_cgroup *mem);
static void mem_cgroup_oom_notify(struct mem_cgroup *mem);
-enum {
- SCAN_BY_LIMIT,
- SCAN_BY_SYSTEM,
- NR_SCAN_CONTEXT,
- SCAN_BY_SHRINK, /* not recorded now */
-};
-
-enum {
- SCAN,
- SCAN_ANON,
- SCAN_FILE,
- ROTATE,
- ROTATE_ANON,
- ROTATE_FILE,
- FREED,
- FREED_ANON,
- FREED_FILE,
- ELAPSED,
- NR_SCANSTATS,
-};
-
-struct scanstat {
- spinlock_t lock;
- unsigned long stats[NR_SCAN_CONTEXT][NR_SCANSTATS];
- unsigned long rootstats[NR_SCAN_CONTEXT][NR_SCANSTATS];
-};
-
-const char *scanstat_string[NR_SCANSTATS] = {
- "scanned_pages",
- "scanned_anon_pages",
- "scanned_file_pages",
- "rotated_pages",
- "rotated_anon_pages",
- "rotated_file_pages",
- "freed_pages",
- "freed_anon_pages",
- "freed_file_pages",
- "elapsed_ns",
-};
-#define SCANSTAT_WORD_LIMIT "_by_limit"
-#define SCANSTAT_WORD_SYSTEM "_by_system"
-#define SCANSTAT_WORD_HIERARCHY "_under_hierarchy"
-
-
/*
* The memory controller data structure. The memory controller controls both
* page cache and RSS per cgroup. We would eventually like to provide
@@ -313,8 +269,7 @@ struct mem_cgroup {
/* For oom notifier event fd */
struct list_head oom_notify;
- /* For recording LRU-scan statistics */
- struct scanstat scanstat;
+
/*
* Should we move charges of a task when a task is moved into this
* mem_cgroup ? And what type of charges should we move ?
@@ -1678,44 +1633,6 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)
}
#endif
-static void __mem_cgroup_record_scanstat(unsigned long *stats,
- struct memcg_scanrecord *rec)
-{
-
- stats[SCAN] += rec->nr_scanned[0] + rec->nr_scanned[1];
- stats[SCAN_ANON] += rec->nr_scanned[0];
- stats[SCAN_FILE] += rec->nr_scanned[1];
-
- stats[ROTATE] += rec->nr_rotated[0] + rec->nr_rotated[1];
- stats[ROTATE_ANON] += rec->nr_rotated[0];
- stats[ROTATE_FILE] += rec->nr_rotated[1];
-
- stats[FREED] += rec->nr_freed[0] + rec->nr_freed[1];
- stats[FREED_ANON] += rec->nr_freed[0];
- stats[FREED_FILE] += rec->nr_freed[1];
-
- stats[ELAPSED] += rec->elapsed;
-}
-
-static void mem_cgroup_record_scanstat(struct memcg_scanrecord *rec)
-{
- struct mem_cgroup *mem;
- int context = rec->context;
-
- if (context >= NR_SCAN_CONTEXT)
- return;
-
- mem = rec->mem;
- spin_lock(&mem->scanstat.lock);
- __mem_cgroup_record_scanstat(mem->scanstat.stats[context], rec);
- spin_unlock(&mem->scanstat.lock);
-
- mem = rec->root;
- spin_lock(&mem->scanstat.lock);
- __mem_cgroup_record_scanstat(mem->scanstat.rootstats[context], rec);
- spin_unlock(&mem->scanstat.lock);
-}
-
/*
* Scan the hierarchy if needed to reclaim memory. We remember the last child
* we reclaimed from, so that we don't end up penalizing one child extensively
@@ -1740,9 +1657,8 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP;
bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
- struct memcg_scanrecord rec;
unsigned long excess;
- unsigned long scanned;
+ unsigned long nr_scanned;
excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT;
@@ -1750,15 +1666,6 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
if (!check_soft && !shrink && root_mem->memsw_is_minimum)
noswap = true;
- if (shrink)
- rec.context = SCAN_BY_SHRINK;
- else if (check_soft)
- rec.context = SCAN_BY_SYSTEM;
- else
- rec.context = SCAN_BY_LIMIT;
-
- rec.root = root_mem;
-
while (1) {
victim = mem_cgroup_select_victim(root_mem);
if (victim == root_mem) {
@@ -1799,23 +1706,14 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
css_put(&victim->css);
continue;
}
- rec.mem = victim;
- rec.nr_scanned[0] = 0;
- rec.nr_scanned[1] = 0;
- rec.nr_rotated[0] = 0;
- rec.nr_rotated[1] = 0;
- rec.nr_freed[0] = 0;
- rec.nr_freed[1] = 0;
- rec.elapsed = 0;
/* we use swappiness of local cgroup */
if (check_soft) {
ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
- noswap, zone, &rec, &scanned);
- *total_scanned += scanned;
+ noswap, zone, &nr_scanned);
+ *total_scanned += nr_scanned;
} else
ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
- noswap, &rec);
- mem_cgroup_record_scanstat(&rec);
+ noswap);
css_put(&victim->css);
/*
* At shrinking usage, we can't check we should stop here or
@@ -1841,29 +1739,23 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
*/
static bool mem_cgroup_oom_lock(struct mem_cgroup *mem)
{
- int lock_count = -1;
struct mem_cgroup *iter, *failed = NULL;
bool cond = true;
for_each_mem_cgroup_tree_cond(iter, mem, cond) {
- bool locked = iter->oom_lock;
-
- iter->oom_lock = true;
- if (lock_count == -1)
- lock_count = iter->oom_lock;
- else if (lock_count != locked) {
+ if (iter->oom_lock) {
/*
* this subtree of our hierarchy is already locked
* so we cannot give a lock.
*/
- lock_count = 0;
failed = iter;
cond = false;
- }
+ } else
+ iter->oom_lock = true;
}
if (!failed)
- goto done;
+ return true;
/*
* OK, we failed to lock the whole subtree so we have to clean up
@@ -1877,8 +1769,7 @@ static bool mem_cgroup_oom_lock(struct mem_cgroup *mem)
}
iter->oom_lock = false;
}
-done:
- return lock_count;
+ return false;
}
/*
@@ -2091,6 +1982,7 @@ struct memcg_stock_pcp {
#define FLUSHING_CACHED_CHARGE (0)
};
static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock);
+static DEFINE_MUTEX(percpu_charge_mutex);
/*
* Try to consume stocked charge on this cpu. If success, one page is consumed
@@ -2168,13 +2060,7 @@ static void drain_all_stock(struct mem_cgroup *root_mem, bool sync)
/* Notify other cpus that system-wide "drain" is running */
get_online_cpus();
- /*
- * Get a hint for avoiding draining charges on the current cpu,
- * which must be exhausted by our charging. It is not required that
- * this be a precise check, so we use raw_smp_processor_id() instead of
- * getcpu()/putcpu().
- */
- curcpu = raw_smp_processor_id();
+ curcpu = get_cpu();
for_each_online_cpu(cpu) {
struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
struct mem_cgroup *mem;
@@ -2191,14 +2077,14 @@ static void drain_all_stock(struct mem_cgroup *root_mem, bool sync)
schedule_work_on(cpu, &stock->work);
}
}
+ put_cpu();
if (!sync)
goto out;
for_each_online_cpu(cpu) {
struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
- if (mem_cgroup_same_or_subtree(root_mem, stock->cached) &&
- test_bit(FLUSHING_CACHED_CHARGE, &stock->flags))
+ if (test_bit(FLUSHING_CACHED_CHARGE, &stock->flags))
flush_work(&stock->work);
}
out:
@@ -2213,14 +2099,22 @@ out:
*/
static void drain_all_stock_async(struct mem_cgroup *root_mem)
{
+ /*
+ * If someone calls draining, avoid adding more kworker runs.
+ */
+ if (!mutex_trylock(&percpu_charge_mutex))
+ return;
drain_all_stock(root_mem, false);
+ mutex_unlock(&percpu_charge_mutex);
}
/* This is a synchronous drain interface. */
static void drain_all_stock_sync(struct mem_cgroup *root_mem)
{
/* called when force_empty is called */
+ mutex_lock(&percpu_charge_mutex);
drain_all_stock(root_mem, true);
+ mutex_unlock(&percpu_charge_mutex);
}
/*
@@ -3858,18 +3752,14 @@ try_to_free:
/* try to free all pages in this cgroup */
shrink = 1;
while (nr_retries && mem->res.usage > 0) {
- struct memcg_scanrecord rec;
int progress;
if (signal_pending(current)) {
ret = -EINTR;
goto out;
}
- rec.context = SCAN_BY_SHRINK;
- rec.mem = mem;
- rec.root = mem;
progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL,
- false, &rec);
+ false);
if (!progress) {
nr_retries--;
/* maybe some writeback is necessary */
@@ -4713,54 +4603,6 @@ static int mem_control_numa_stat_open(struct inode *unused, struct file *file)
}
#endif /* CONFIG_NUMA */
-static int mem_cgroup_vmscan_stat_read(struct cgroup *cgrp,
- struct cftype *cft,
- struct cgroup_map_cb *cb)
-{
- struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
- char string[64];
- int i;
-
- for (i = 0; i < NR_SCANSTATS; i++) {
- strcpy(string, scanstat_string[i]);
- strcat(string, SCANSTAT_WORD_LIMIT);
- cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_LIMIT][i]);
- }
-
- for (i = 0; i < NR_SCANSTATS; i++) {
- strcpy(string, scanstat_string[i]);
- strcat(string, SCANSTAT_WORD_SYSTEM);
- cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_SYSTEM][i]);
- }
-
- for (i = 0; i < NR_SCANSTATS; i++) {
- strcpy(string, scanstat_string[i]);
- strcat(string, SCANSTAT_WORD_LIMIT);
- strcat(string, SCANSTAT_WORD_HIERARCHY);
- cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_LIMIT][i]);
- }
- for (i = 0; i < NR_SCANSTATS; i++) {
- strcpy(string, scanstat_string[i]);
- strcat(string, SCANSTAT_WORD_SYSTEM);
- strcat(string, SCANSTAT_WORD_HIERARCHY);
- cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_SYSTEM][i]);
- }
- return 0;
-}
-
-static int mem_cgroup_reset_vmscan_stat(struct cgroup *cgrp,
- unsigned int event)
-{
- struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
-
- spin_lock(&mem->scanstat.lock);
- memset(&mem->scanstat.stats, 0, sizeof(mem->scanstat.stats));
- memset(&mem->scanstat.rootstats, 0, sizeof(mem->scanstat.rootstats));
- spin_unlock(&mem->scanstat.lock);
- return 0;
-}
-
-
static struct cftype mem_cgroup_files[] = {
{
.name = "usage_in_bytes",
@@ -4831,11 +4673,6 @@ static struct cftype mem_cgroup_files[] = {
.mode = S_IRUGO,
},
#endif
- {
- .name = "vmscan_stat",
- .read_map = mem_cgroup_vmscan_stat_read,
- .trigger = mem_cgroup_reset_vmscan_stat,
- },
};
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
@@ -5099,7 +4936,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
atomic_set(&mem->refcnt, 1);
mem->move_charge_at_immigrate = 0;
mutex_init(&mem->thresholds_lock);
- spin_lock_init(&mem->scanstat.lock);
return &mem->css;
free_out:
__mem_cgroup_free(mem);