summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2013-07-03 15:02:26 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2013-07-03 16:07:31 -0700
commitc6286c983900c77410a951874f1589f4a41fbbae (patch)
tree3e4b1d2c2c4c783886f9c6e98770f4602273197c
parentf968ef1c55199301e98c28fe7dfa8ace05ecdb96 (diff)
downloadlinux-c6286c983900c77410a951874f1589f4a41fbbae.tar.bz2
mm: add tracepoints for LRU activation and insertions
Andrew Perepechko reported a problem whereby pages are being prematurely evicted as the mark_page_accessed() hint is ignored for pages that are currently on a pagevec -- http://www.spinics.net/lists/linux-ext4/msg37340.html . Alexey Lyahkov and Robin Dong have also reported problems recently that could be due to hot pages reaching the end of the inactive list too quickly and be reclaimed. Rather than addressing this on a per-filesystem basis, this series aims to fix the mark_page_accessed() interface by deferring what LRU a page is added to pagevec drain time and allowing mark_page_accessed() to call SetPageActive on a pagevec page. Patch 1 adds two tracepoints for LRU page activation and insertion. Using these processes it's possible to build a model of pages in the LRU that can be processed offline. Patch 2 defers making the decision on what LRU to add a page to until when the pagevec is drained. Patch 3 searches the local pagevec for pages to mark PageActive on mark_page_accessed. The changelog explains why only the local pagevec is examined. Patches 4 and 5 tidy up the API. postmark, a dd-based test and fs-mark both single and threaded mode were run but none of them showed any performance degradation or gain as a result of the patch. Using patch 1, I built a *very* basic model of the LRU to examine offline what the average age of different page types on the LRU were in milliseconds. Of course, capturing the trace distorts the test as it's written to local disk but it does not matter for the purposes of this test. The average age of pages in milliseconds were vanilla deferdrain Average age mapped anon: 1454 1250 Average age mapped file: 127841 155552 Average age unmapped anon: 85 235 Average age unmapped file: 73633 38884 Average age unmapped buffers: 74054 116155 The LRU activity was mostly files which you'd expect for a dd-based workload. Note that the average age of buffer pages is increased by the series and it is expected this is due to the fact that the buffer pages are now getting added to the active list when drained from the pagevecs. Note that the average age of the unmapped file data is decreased as they are still added to the inactive list and are reclaimed before the buffers. There is no guarantee this is a universal win for all workloads and it would be nice if the filesystem people gave some thought as to whether this decision is generally a win or a loss. This patch: Using these tracepoints it is possible to model LRU activity and the average residency of pages of different types. This can be used to debug problems related to premature reclaim of pages of particular types. Signed-off-by: Mel Gorman <mgorman@suse.de> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: Jan Kara <jack@suse.cz> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Alexey Lyahkov <alexey.lyashkov@gmail.com> Cc: Andrew Perepechko <anserper@ya.ru> Cc: Robin Dong <sanbai@taobao.com> Cc: Theodore Tso <tytso@mit.edu> Cc: Hugh Dickins <hughd@google.com> Cc: Rik van Riel <riel@redhat.com> Cc: Bernd Schubert <bernd.schubert@fastmail.fm> Cc: David Howells <dhowells@redhat.com> Cc: Trond Myklebust <Trond.Myklebust@netapp.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/trace/events/pagemap.h89
-rw-r--r--mm/swap.c5
2 files changed, 94 insertions, 0 deletions
diff --git a/include/trace/events/pagemap.h b/include/trace/events/pagemap.h
new file mode 100644
index 000000000000..1c9fabde69e4
--- /dev/null
+++ b/include/trace/events/pagemap.h
@@ -0,0 +1,89 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM pagemap
+
+#if !defined(_TRACE_PAGEMAP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_PAGEMAP_H
+
+#include <linux/tracepoint.h>
+#include <linux/mm.h>
+
+#define PAGEMAP_MAPPED 0x0001u
+#define PAGEMAP_ANONYMOUS 0x0002u
+#define PAGEMAP_FILE 0x0004u
+#define PAGEMAP_SWAPCACHE 0x0008u
+#define PAGEMAP_SWAPBACKED 0x0010u
+#define PAGEMAP_MAPPEDDISK 0x0020u
+#define PAGEMAP_BUFFERS 0x0040u
+
+#define trace_pagemap_flags(page) ( \
+ (PageAnon(page) ? PAGEMAP_ANONYMOUS : PAGEMAP_FILE) | \
+ (page_mapped(page) ? PAGEMAP_MAPPED : 0) | \
+ (PageSwapCache(page) ? PAGEMAP_SWAPCACHE : 0) | \
+ (PageSwapBacked(page) ? PAGEMAP_SWAPBACKED : 0) | \
+ (PageMappedToDisk(page) ? PAGEMAP_MAPPEDDISK : 0) | \
+ (page_has_private(page) ? PAGEMAP_BUFFERS : 0) \
+ )
+
+TRACE_EVENT(mm_lru_insertion,
+
+ TP_PROTO(
+ struct page *page,
+ unsigned long pfn,
+ int lru,
+ unsigned long flags
+ ),
+
+ TP_ARGS(page, pfn, lru, flags),
+
+ TP_STRUCT__entry(
+ __field(struct page *, page )
+ __field(unsigned long, pfn )
+ __field(int, lru )
+ __field(unsigned long, flags )
+ ),
+
+ TP_fast_assign(
+ __entry->page = page;
+ __entry->pfn = pfn;
+ __entry->lru = lru;
+ __entry->flags = flags;
+ ),
+
+ /* Flag format is based on page-types.c formatting for pagemap */
+ TP_printk("page=%p pfn=%lu lru=%d flags=%s%s%s%s%s%s",
+ __entry->page,
+ __entry->pfn,
+ __entry->lru,
+ __entry->flags & PAGEMAP_MAPPED ? "M" : " ",
+ __entry->flags & PAGEMAP_ANONYMOUS ? "a" : "f",
+ __entry->flags & PAGEMAP_SWAPCACHE ? "s" : " ",
+ __entry->flags & PAGEMAP_SWAPBACKED ? "b" : " ",
+ __entry->flags & PAGEMAP_MAPPEDDISK ? "d" : " ",
+ __entry->flags & PAGEMAP_BUFFERS ? "B" : " ")
+);
+
+TRACE_EVENT(mm_lru_activate,
+
+ TP_PROTO(struct page *page, unsigned long pfn),
+
+ TP_ARGS(page, pfn),
+
+ TP_STRUCT__entry(
+ __field(struct page *, page )
+ __field(unsigned long, pfn )
+ ),
+
+ TP_fast_assign(
+ __entry->page = page;
+ __entry->pfn = pfn;
+ ),
+
+ /* Flag format is based on page-types.c formatting for pagemap */
+ TP_printk("page=%p pfn=%lu", __entry->page, __entry->pfn)
+
+);
+
+#endif /* _TRACE_PAGEMAP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/mm/swap.c b/mm/swap.c
index dfd7d71d6841..53c9ceb7b816 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -34,6 +34,9 @@
#include "internal.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/pagemap.h>
+
/* How many pages do we try to swap or page in/out together? */
int page_cluster;
@@ -384,6 +387,7 @@ static void __activate_page(struct page *page, struct lruvec *lruvec,
SetPageActive(page);
lru += LRU_ACTIVE;
add_page_to_lru_list(page, lruvec, lru);
+ trace_mm_lru_activate(page, page_to_pfn(page));
__count_vm_event(PGACTIVATE);
update_page_reclaim_stat(lruvec, file, 1);
@@ -808,6 +812,7 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,
SetPageActive(page);
add_page_to_lru_list(page, lruvec, lru);
update_page_reclaim_stat(lruvec, file, active);
+ trace_mm_lru_insertion(page, page_to_pfn(page), lru, trace_pagemap_flags(page));
}
/*