From 5db8a73a8d7cc6a66afbf25ed7fda338caa8f5f9 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 15 Jun 2011 15:08:48 -0700 Subject: mm/memory-failure.c: fix page isolated count mismatch Pages isolated for migration are accounted with the vmstat counters NR_ISOLATE_[ANON|FILE]. Callers of migrate_pages() are expected to increment these counters when pages are isolated from the LRU. Once the pages have been migrated, they are put back on the LRU or freed and the isolated count is decremented. Memory failure is not properly accounting for pages it isolates causing the NR_ISOLATED counters to be negative. On SMP builds, this goes unnoticed as negative counters are treated as 0 due to expected per-cpu drift. On UP builds, the counter is treated by too_many_isolated() as a large value causing processes to enter D state during page reclaim or compaction. This patch accounts for pages isolated by memory failure correctly. [mel@csn.ul.ie: rewrote changelog] Reviewed-by: Andrea Arcangeli Signed-off-by: Minchan Kim Cc: Andi Kleen Acked-by: Mel Gorman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'mm/memory-failure.c') diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 5c8f7e08928d..eac0ba561491 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -52,6 +52,7 @@ #include #include #include +#include #include "internal.h" int sysctl_memory_failure_early_kill __read_mostly = 0; @@ -1468,7 +1469,8 @@ int soft_offline_page(struct page *page, int flags) put_page(page); if (!ret) { LIST_HEAD(pagelist); - + inc_zone_page_state(page, NR_ISOLATED_ANON + + page_is_file_cache(page)); list_add(&page->lru, &pagelist); ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0, true); -- cgit v1.2.3 From 9b679320a5fbf46454011e5c62e0b8991b0956d1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 27 Jun 2011 16:18:09 -0700 Subject: mm/memory-failure.c: fix spinlock vs mutex order We cannot take a mutex while holding a spinlock, so flip the order and fix the locking documentation. Signed-off-by: Peter Zijlstra Acked-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 21 ++++++--------------- mm/rmap.c | 5 ++--- 2 files changed, 8 insertions(+), 18 deletions(-) (limited to 'mm/memory-failure.c') diff --git a/mm/memory-failure.c b/mm/memory-failure.c index eac0ba561491..740c4f52059c 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -391,10 +391,11 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, struct task_struct *tsk; struct anon_vma *av; - read_lock(&tasklist_lock); av = page_lock_anon_vma(page); if (av == NULL) /* Not actually mapped anymore */ - goto out; + return; + + read_lock(&tasklist_lock); for_each_process (tsk) { struct anon_vma_chain *vmac; @@ -408,9 +409,8 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, add_to_kill(tsk, page, vma, to_kill, tkc); } } - page_unlock_anon_vma(av); -out: read_unlock(&tasklist_lock); + page_unlock_anon_vma(av); } /* @@ -424,17 +424,8 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, struct prio_tree_iter iter; struct address_space *mapping = page->mapping; - /* - * A note on the locking order between the two locks. - * We don't rely on this particular order. - * If you have some other code that needs a different order - * feel free to switch them around. Or add a reverse link - * from mm_struct to task_struct, then this could be all - * done without taking tasklist_lock and looping over all tasks. - */ - - read_lock(&tasklist_lock); mutex_lock(&mapping->i_mmap_mutex); + read_lock(&tasklist_lock); for_each_process(tsk) { pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); @@ -454,8 +445,8 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, add_to_kill(tsk, page, vma, to_kill, tkc); } } - mutex_unlock(&mapping->i_mmap_mutex); read_unlock(&tasklist_lock); + mutex_unlock(&mapping->i_mmap_mutex); } /* diff --git a/mm/rmap.c b/mm/rmap.c index 27dfd3b82b0f..23295f65ae43 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -38,9 +38,8 @@ * in arch-dependent flush_dcache_mmap_lock, * within inode_wb_list_lock in __sync_single_inode) * - * (code doesn't rely on that order so it could be switched around) - * ->tasklist_lock - * anon_vma->mutex (memory_failure, collect_procs_anon) + * anon_vma->mutex,mapping->i_mutex (memory_failure, collect_procs_anon) + * ->tasklist_lock * pte map lock */ -- cgit v1.2.3