summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2017-05-03 14:55:03 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-03 15:52:11 -0700
commit2a2e48854d704214dac7546e87ae0e4daa0e61a0 (patch)
treea3b999e7be0b2495c9602348eda500a9908ae822 /include
parent20ac28933c49433e0f064314de3618129b54a22e (diff)
downloadlinux-2a2e48854d704214dac7546e87ae0e4daa0e61a0.tar.bz2
mm: vmscan: fix IO/refault regression in cache workingset transition
Since commit 59dc76b0d4df ("mm: vmscan: reduce size of inactive file list") we noticed bigger IO spikes during changes in cache access patterns. The patch in question shrunk the inactive list size to leave more room for the current workingset in the presence of streaming IO. However, workingset transitions that previously happened on the inactive list are now pushed out of memory and incur more refaults to complete. This patch disables active list protection when refaults are being observed. This accelerates workingset transitions, and allows more of the new set to establish itself from memory, without eating into the ability to protect the established workingset during stable periods. The workloads that were measurably affected for us were hit pretty bad by it, with refault/majfault rates doubling and tripling during cache transitions, and the machines sustaining half-hour periods of 100% IO utilization, where they'd previously have sub-minute peaks at 60-90%. Stateful services that handle user data tend to be more conservative with kernel upgrades. As a result we hit most page cache issues with some delay, as was the case here. The severity seemed to warrant a stable tag. Fixes: 59dc76b0d4df ("mm: vmscan: reduce size of inactive file list") Link: http://lkml.kernel.org/r/20170404220052.27593-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Rik van Riel <riel@redhat.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Michal Hocko <mhocko@suse.com> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Cc: <stable@vger.kernel.org> [4.7+] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/memcontrol.h64
-rw-r--r--include/linux/mmzone.h2
2 files changed, 63 insertions, 3 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index c5ebb32fef49..cfa91a3ca0ca 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -57,6 +57,9 @@ enum mem_cgroup_stat_index {
MEMCG_SLAB_RECLAIMABLE,
MEMCG_SLAB_UNRECLAIMABLE,
MEMCG_SOCK,
+ MEMCG_WORKINGSET_REFAULT,
+ MEMCG_WORKINGSET_ACTIVATE,
+ MEMCG_WORKINGSET_NODERECLAIM,
MEMCG_NR_STAT,
};
@@ -495,6 +498,40 @@ extern int do_swap_account;
void lock_page_memcg(struct page *page);
void unlock_page_memcg(struct page *page);
+static inline unsigned long mem_cgroup_read_stat(struct mem_cgroup *memcg,
+ enum mem_cgroup_stat_index idx)
+{
+ long val = 0;
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ val += per_cpu(memcg->stat->count[idx], cpu);
+
+ if (val < 0)
+ val = 0;
+
+ return val;
+}
+
+static inline void mem_cgroup_update_stat(struct mem_cgroup *memcg,
+ enum mem_cgroup_stat_index idx, int val)
+{
+ if (!mem_cgroup_disabled())
+ this_cpu_add(memcg->stat->count[idx], val);
+}
+
+static inline void mem_cgroup_inc_stat(struct mem_cgroup *memcg,
+ enum mem_cgroup_stat_index idx)
+{
+ mem_cgroup_update_stat(memcg, idx, 1);
+}
+
+static inline void mem_cgroup_dec_stat(struct mem_cgroup *memcg,
+ enum mem_cgroup_stat_index idx)
+{
+ mem_cgroup_update_stat(memcg, idx, -1);
+}
+
/**
* mem_cgroup_update_page_stat - update page state statistics
* @page: the page
@@ -509,14 +546,14 @@ void unlock_page_memcg(struct page *page);
* if (TestClearPageState(page))
* mem_cgroup_update_page_stat(page, state, -1);
* unlock_page(page) or unlock_page_memcg(page)
+ *
+ * Kernel pages are an exception to this, since they'll never move.
*/
static inline void mem_cgroup_update_page_stat(struct page *page,
enum mem_cgroup_stat_index idx, int val)
{
- VM_BUG_ON(!(rcu_read_lock_held() || PageLocked(page)));
-
if (page->mem_cgroup)
- this_cpu_add(page->mem_cgroup->stat->count[idx], val);
+ mem_cgroup_update_stat(page->mem_cgroup, idx, val);
}
static inline void mem_cgroup_inc_page_stat(struct page *page,
@@ -741,6 +778,27 @@ static inline bool mem_cgroup_oom_synchronize(bool wait)
return false;
}
+static inline unsigned long mem_cgroup_read_stat(struct mem_cgroup *memcg,
+ enum mem_cgroup_stat_index idx)
+{
+ return 0;
+}
+
+static inline void mem_cgroup_update_stat(struct mem_cgroup *memcg,
+ enum mem_cgroup_stat_index idx, int val)
+{
+}
+
+static inline void mem_cgroup_inc_stat(struct mem_cgroup *memcg,
+ enum mem_cgroup_stat_index idx)
+{
+}
+
+static inline void mem_cgroup_dec_stat(struct mem_cgroup *memcg,
+ enum mem_cgroup_stat_index idx)
+{
+}
+
static inline void mem_cgroup_update_page_stat(struct page *page,
enum mem_cgroup_stat_index idx,
int nr)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 446cf68c1c09..e0c3c5e3d8a0 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -225,6 +225,8 @@ struct lruvec {
struct zone_reclaim_stat reclaim_stat;
/* Evictions & activations on the inactive file list */
atomic_long_t inactive_age;
+ /* Refaults at the time of last reclaim cycle */
+ unsigned long refaults;
#ifdef CONFIG_MEMCG
struct pglist_data *pgdat;
#endif