diff options
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r-- | mm/memory-failure.c | 183 |
1 files changed, 105 insertions, 78 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 2c13aa7a0164..947ed5413279 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -206,7 +206,7 @@ static int kill_proc(struct task_struct *t, unsigned long addr, int trapno, #ifdef __ARCH_SI_TRAPNO si.si_trapno = trapno; #endif - si.si_addr_lsb = compound_trans_order(compound_head(page)) + PAGE_SHIFT; + si.si_addr_lsb = compound_order(compound_head(page)) + PAGE_SHIFT; if ((flags & MF_ACTION_REQUIRED) && t == current) { si.si_code = BUS_MCEERR_AR; @@ -248,10 +248,12 @@ void shake_page(struct page *p, int access) */ if (access) { int nr; + int nid = page_to_nid(p); do { struct shrink_control shrink = { .gfp_mask = GFP_KERNEL, }; + node_set(nid, shrink.nodes_to_scan); nr = shrink_slab(&shrink, 1000, 1000); if (page_count(p) == 1) @@ -983,7 +985,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, static void set_page_hwpoison_huge_page(struct page *hpage) { int i; - int nr_pages = 1 << compound_trans_order(hpage); + int nr_pages = 1 << compound_order(hpage); for (i = 0; i < nr_pages; i++) SetPageHWPoison(hpage + i); } @@ -991,7 +993,7 @@ static void set_page_hwpoison_huge_page(struct page *hpage) static void clear_page_hwpoison_huge_page(struct page *hpage) { int i; - int nr_pages = 1 << compound_trans_order(hpage); + int nr_pages = 1 << compound_order(hpage); for (i = 0; i < nr_pages; i++) ClearPageHWPoison(hpage + i); } @@ -1204,6 +1206,9 @@ int memory_failure(unsigned long pfn, int trapno, int flags) for (ps = error_states;; ps++) if ((p->flags & ps->mask) == ps->res) break; + + page_flags |= (p->flags & (1UL << PG_dirty)); + if (!ps->mask) for (ps = error_states;; ps++) if ((page_flags & ps->mask) == ps->res) @@ -1265,7 +1270,7 @@ void memory_failure_queue(unsigned long pfn, int trapno, int flags) if (kfifo_put(&mf_cpu->fifo, &entry)) schedule_work_on(smp_processor_id(), &mf_cpu->work); else - pr_err("Memory failure: buffer overflow when queuing memory failure at 0x%#lx\n", + pr_err("Memory failure: buffer overflow when queuing memory failure at %#lx\n", pfn); spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); put_cpu_var(memory_failure_cpu); @@ -1286,7 +1291,10 @@ static void memory_failure_work_func(struct work_struct *work) spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); if (!gotten) break; - memory_failure(entry.pfn, entry.trapno, entry.flags); + if (entry.flags & MF_SOFT_OFFLINE) + soft_offline_page(pfn_to_page(entry.pfn), entry.flags); + else + memory_failure(entry.pfn, entry.trapno, entry.flags); } } @@ -1336,7 +1344,17 @@ int unpoison_memory(unsigned long pfn) return 0; } - nr_pages = 1 << compound_trans_order(page); + /* + * unpoison_memory() can encounter thp only when the thp is being + * worked by memory_failure() and the page lock is not held yet. + * In such case, we yield to memory_failure() and make unpoison fail. + */ + if (PageTransHuge(page)) { + pr_info("MCE: Memory failure is now running on %#lx\n", pfn); + return 0; + } + + nr_pages = 1 << compound_order(page); if (!get_page_unless_zero(page)) { /* @@ -1350,7 +1368,7 @@ int unpoison_memory(unsigned long pfn) return 0; } if (TestClearPageHWPoison(p)) - atomic_long_sub(nr_pages, &num_poisoned_pages); + atomic_long_dec(&num_poisoned_pages); pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn); return 0; } @@ -1372,7 +1390,7 @@ int unpoison_memory(unsigned long pfn) unlock_page(page); put_page(page); - if (freeit) + if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1)) put_page(page); return 0; @@ -1413,7 +1431,8 @@ static int __get_any_page(struct page *p, unsigned long pfn, int flags) * was free. This flag should be kept set until the source page * is freed and PG_hwpoison on it is set. */ - set_migratetype_isolate(p, true); + if (get_pageblock_migratetype(p) != MIGRATE_ISOLATE) + set_migratetype_isolate(p, true); /* * When the target page is a free hugepage, just remove it * from free hugepage list. @@ -1467,6 +1486,7 @@ static int soft_offline_huge_page(struct page *page, int flags) int ret; unsigned long pfn = page_to_pfn(page); struct page *hpage = compound_head(page); + LIST_HEAD(pagelist); /* * This double-check of PageHWPoison is to avoid the race with @@ -1482,86 +1502,29 @@ static int soft_offline_huge_page(struct page *page, int flags) unlock_page(hpage); /* Keep page count to indicate a given hugepage is isolated. */ - ret = migrate_huge_page(hpage, new_page, MPOL_MF_MOVE_ALL, - MIGRATE_SYNC); - put_page(hpage); + list_move(&hpage->lru, &pagelist); + ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, + MIGRATE_SYNC, MR_MEMORY_FAILURE); if (ret) { pr_info("soft offline: %#lx: migration failed %d, type %lx\n", pfn, ret, page->flags); + /* + * We know that soft_offline_huge_page() tries to migrate + * only one hugepage pointed to by hpage, so we need not + * run through the pagelist here. + */ + putback_active_hugepage(hpage); + if (ret > 0) + ret = -EIO; } else { set_page_hwpoison_huge_page(hpage); dequeue_hwpoisoned_huge_page(hpage); - atomic_long_add(1 << compound_trans_order(hpage), + atomic_long_add(1 << compound_order(hpage), &num_poisoned_pages); } return ret; } -static int __soft_offline_page(struct page *page, int flags); - -/** - * soft_offline_page - Soft offline a page. - * @page: page to offline - * @flags: flags. Same as memory_failure(). - * - * Returns 0 on success, otherwise negated errno. - * - * Soft offline a page, by migration or invalidation, - * without killing anything. This is for the case when - * a page is not corrupted yet (so it's still valid to access), - * but has had a number of corrected errors and is better taken - * out. - * - * The actual policy on when to do that is maintained by - * user space. - * - * This should never impact any application or cause data loss, - * however it might take some time. - * - * This is not a 100% solution for all memory, but tries to be - * ``good enough'' for the majority of memory. - */ -int soft_offline_page(struct page *page, int flags) -{ - int ret; - unsigned long pfn = page_to_pfn(page); - struct page *hpage = compound_trans_head(page); - - if (PageHWPoison(page)) { - pr_info("soft offline: %#lx page already poisoned\n", pfn); - return -EBUSY; - } - if (!PageHuge(page) && PageTransHuge(hpage)) { - if (PageAnon(hpage) && unlikely(split_huge_page(hpage))) { - pr_info("soft offline: %#lx: failed to split THP\n", - pfn); - return -EBUSY; - } - } - - ret = get_any_page(page, pfn, flags); - if (ret < 0) - return ret; - if (ret) { /* for in-use pages */ - if (PageHuge(page)) - ret = soft_offline_huge_page(page, flags); - else - ret = __soft_offline_page(page, flags); - } else { /* for free pages */ - if (PageHuge(page)) { - set_page_hwpoison_huge_page(hpage); - dequeue_hwpoisoned_huge_page(hpage); - atomic_long_add(1 << compound_trans_order(hpage), - &num_poisoned_pages); - } else { - SetPageHWPoison(page); - atomic_long_inc(&num_poisoned_pages); - } - } - unset_migratetype_isolate(page, MIGRATE_MOVABLE); - return ret; -} - static int __soft_offline_page(struct page *page, int flags) { int ret; @@ -1648,3 +1611,67 @@ static int __soft_offline_page(struct page *page, int flags) } return ret; } + +/** + * soft_offline_page - Soft offline a page. + * @page: page to offline + * @flags: flags. Same as memory_failure(). + * + * Returns 0 on success, otherwise negated errno. + * + * Soft offline a page, by migration or invalidation, + * without killing anything. This is for the case when + * a page is not corrupted yet (so it's still valid to access), + * but has had a number of corrected errors and is better taken + * out. + * + * The actual policy on when to do that is maintained by + * user space. + * + * This should never impact any application or cause data loss, + * however it might take some time. + * + * This is not a 100% solution for all memory, but tries to be + * ``good enough'' for the majority of memory. + */ +int soft_offline_page(struct page *page, int flags) +{ + int ret; + unsigned long pfn = page_to_pfn(page); + struct page *hpage = compound_trans_head(page); + + if (PageHWPoison(page)) { + pr_info("soft offline: %#lx page already poisoned\n", pfn); + return -EBUSY; + } + if (!PageHuge(page) && PageTransHuge(hpage)) { + if (PageAnon(hpage) && unlikely(split_huge_page(hpage))) { + pr_info("soft offline: %#lx: failed to split THP\n", + pfn); + return -EBUSY; + } + } + + ret = get_any_page(page, pfn, flags); + if (ret < 0) + goto unset; + if (ret) { /* for in-use pages */ + if (PageHuge(page)) + ret = soft_offline_huge_page(page, flags); + else + ret = __soft_offline_page(page, flags); + } else { /* for free pages */ + if (PageHuge(page)) { + set_page_hwpoison_huge_page(hpage); + dequeue_hwpoisoned_huge_page(hpage); + atomic_long_add(1 << compound_order(hpage), + &num_poisoned_pages); + } else { + SetPageHWPoison(page); + atomic_long_inc(&num_poisoned_pages); + } + } +unset: + unset_migratetype_isolate(page, MIGRATE_MOVABLE); + return ret; +} |