summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorShaohua Li <shli@kernel.org>2013-02-22 16:34:38 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2013-02-23 17:50:17 -0800
commitec8acf20afb8534ed511f6613dd2226b9e301010 (patch)
treea0d6779eeffa0f523a2799dbb619e0a34fd786d4 /include
parent33806f06da654092182410d974b6d3c5396ea3eb (diff)
downloadlinux-ec8acf20afb8534ed511f6613dd2226b9e301010.tar.bz2
swap: add per-partition lock for swapfile
swap_lock is heavily contended when I test swap to 3 fast SSD (even slightly slower than swap to 2 such SSD). The main contention comes from swap_info_get(). This patch tries to fix the gap with adding a new per-partition lock. Global data like nr_swapfiles, total_swap_pages, least_priority and swap_list are still protected by swap_lock. nr_swap_pages is an atomic now, it can be changed without swap_lock. In theory, it's possible get_swap_page() finds no swap pages but actually there are free swap pages. But sounds not a big problem. Accessing partition specific data (like scan_swap_map and so on) is only protected by swap_info_struct.lock. Changing swap_info_struct.flags need hold swap_lock and swap_info_struct.lock, because scan_scan_map() will check it. read the flags is ok with either the locks hold. If both swap_lock and swap_info_struct.lock must be hold, we always hold the former first to avoid deadlock. swap_entry_free() can change swap_list. To delete that code, we add a new highest_priority_index. Whenever get_swap_page() is called, we check it. If it's valid, we use it. It's a pity get_swap_page() still holds swap_lock(). But in practice, swap_lock() isn't heavily contended in my test with this patch (or I can say there are other much more heavier bottlenecks like TLB flush). And BTW, looks get_swap_page() doesn't really need the lock. We never free swap_info[] and we check SWAP_WRITEOK flag. The only risk without the lock is we could swapout to some low priority swap, but we can quickly recover after several rounds of swap, so sounds not a big deal to me. But I'd prefer to fix this if it's a real problem. "swap: make each swap partition have one address_space" improved the swapout speed from 1.7G/s to 2G/s. This patch further improves the speed to 2.3G/s, so around 15% improvement. It's a multi-process test, so TLB flush isn't the biggest bottleneck before the patches. [arnd@arndb.de: fix it for nommu] [hughd@google.com: add missing unlock] [minchan@kernel.org: get rid of lockdep whinge on sys_swapon] Signed-off-by: Shaohua Li <shli@fusionio.com> Cc: Hugh Dickins <hughd@google.com> Cc: Rik van Riel <riel@redhat.com> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Seth Jennings <sjenning@linux.vnet.ibm.com> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> Cc: Dan Magenheimer <dan.magenheimer@oracle.com> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Hugh Dickins <hughd@google.com> Signed-off-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/swap.h32
1 files changed, 27 insertions, 5 deletions
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 235c039892ee..a3e22d357e91 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -202,6 +202,18 @@ struct swap_info_struct {
unsigned long *frontswap_map; /* frontswap in-use, one bit per page */
atomic_t frontswap_pages; /* frontswap pages in-use counter */
#endif
+ spinlock_t lock; /*
+ * protect map scan related fields like
+ * swap_map, lowest_bit, highest_bit,
+ * inuse_pages, cluster_next,
+ * cluster_nr, lowest_alloc and
+ * highest_alloc. other fields are only
+ * changed at swapon/swapoff, so are
+ * protected by swap_lock. changing
+ * flags need hold this lock and
+ * swap_lock. If both locks need hold,
+ * hold swap_lock first.
+ */
};
struct swap_list_t {
@@ -209,9 +221,6 @@ struct swap_list_t {
int next; /* swapfile to be used next */
};
-/* Swap 50% full? Release swapcache more aggressively.. */
-#define vm_swap_full() (nr_swap_pages*2 < total_swap_pages)
-
/* linux/mm/page_alloc.c */
extern unsigned long totalram_pages;
extern unsigned long totalreserve_pages;
@@ -347,8 +356,20 @@ extern struct page *swapin_readahead(swp_entry_t, gfp_t,
struct vm_area_struct *vma, unsigned long addr);
/* linux/mm/swapfile.c */
-extern long nr_swap_pages;
+extern atomic_long_t nr_swap_pages;
extern long total_swap_pages;
+
+/* Swap 50% full? Release swapcache more aggressively.. */
+static inline bool vm_swap_full(void)
+{
+ return atomic_long_read(&nr_swap_pages) * 2 < total_swap_pages;
+}
+
+static inline long get_nr_swap_pages(void)
+{
+ return atomic_long_read(&nr_swap_pages);
+}
+
extern void si_swapinfo(struct sysinfo *);
extern swp_entry_t get_swap_page(void);
extern swp_entry_t get_swap_page_of_type(int);
@@ -381,9 +402,10 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
#else /* CONFIG_SWAP */
-#define nr_swap_pages 0L
+#define get_nr_swap_pages() 0L
#define total_swap_pages 0L
#define total_swapcache_pages() 0UL
+#define vm_swap_full() 0
#define si_swapinfo(val) \
do { (val)->freeswap = (val)->totalswap = 0; } while (0)