summaryrefslogtreecommitdiffstats
path: root/mm/memory-failure.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r--mm/memory-failure.c105
1 files changed, 99 insertions, 6 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 740c4f52059c..06d3479513aa 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -42,6 +42,7 @@
#include <linux/sched.h>
#include <linux/ksm.h>
#include <linux/rmap.h>
+#include <linux/export.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
#include <linux/backing-dev.h>
@@ -53,6 +54,7 @@
#include <linux/hugetlb.h>
#include <linux/memory_hotplug.h>
#include <linux/mm_inline.h>
+#include <linux/kfifo.h>
#include "internal.h"
int sysctl_memory_failure_early_kill __read_mostly = 0;
@@ -1178,6 +1180,97 @@ void memory_failure(unsigned long pfn, int trapno)
__memory_failure(pfn, trapno, 0);
}
+#define MEMORY_FAILURE_FIFO_ORDER 4
+#define MEMORY_FAILURE_FIFO_SIZE (1 << MEMORY_FAILURE_FIFO_ORDER)
+
+struct memory_failure_entry {
+ unsigned long pfn;
+ int trapno;
+ int flags;
+};
+
+struct memory_failure_cpu {
+ DECLARE_KFIFO(fifo, struct memory_failure_entry,
+ MEMORY_FAILURE_FIFO_SIZE);
+ spinlock_t lock;
+ struct work_struct work;
+};
+
+static DEFINE_PER_CPU(struct memory_failure_cpu, memory_failure_cpu);
+
+/**
+ * memory_failure_queue - Schedule handling memory failure of a page.
+ * @pfn: Page Number of the corrupted page
+ * @trapno: Trap number reported in the signal to user space.
+ * @flags: Flags for memory failure handling
+ *
+ * This function is called by the low level hardware error handler
+ * when it detects hardware memory corruption of a page. It schedules
+ * the recovering of error page, including dropping pages, killing
+ * processes etc.
+ *
+ * The function is primarily of use for corruptions that
+ * happen outside the current execution context (e.g. when
+ * detected by a background scrubber)
+ *
+ * Can run in IRQ context.
+ */
+void memory_failure_queue(unsigned long pfn, int trapno, int flags)
+{
+ struct memory_failure_cpu *mf_cpu;
+ unsigned long proc_flags;
+ struct memory_failure_entry entry = {
+ .pfn = pfn,
+ .trapno = trapno,
+ .flags = flags,
+ };
+
+ mf_cpu = &get_cpu_var(memory_failure_cpu);
+ spin_lock_irqsave(&mf_cpu->lock, proc_flags);
+ if (kfifo_put(&mf_cpu->fifo, &entry))
+ schedule_work_on(smp_processor_id(), &mf_cpu->work);
+ else
+ pr_err("Memory failure: buffer overflow when queuing memory failure at 0x%#lx\n",
+ pfn);
+ spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
+ put_cpu_var(memory_failure_cpu);
+}
+EXPORT_SYMBOL_GPL(memory_failure_queue);
+
+static void memory_failure_work_func(struct work_struct *work)
+{
+ struct memory_failure_cpu *mf_cpu;
+ struct memory_failure_entry entry = { 0, };
+ unsigned long proc_flags;
+ int gotten;
+
+ mf_cpu = &__get_cpu_var(memory_failure_cpu);
+ for (;;) {
+ spin_lock_irqsave(&mf_cpu->lock, proc_flags);
+ gotten = kfifo_get(&mf_cpu->fifo, &entry);
+ spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
+ if (!gotten)
+ break;
+ __memory_failure(entry.pfn, entry.trapno, entry.flags);
+ }
+}
+
+static int __init memory_failure_init(void)
+{
+ struct memory_failure_cpu *mf_cpu;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ mf_cpu = &per_cpu(memory_failure_cpu, cpu);
+ spin_lock_init(&mf_cpu->lock);
+ INIT_KFIFO(mf_cpu->fifo);
+ INIT_WORK(&mf_cpu->work, memory_failure_work_func);
+ }
+
+ return 0;
+}
+core_initcall(memory_failure_init);
+
/**
* unpoison_memory - Unpoison a previously poisoned page
* @pfn: Page number of the to be unpoisoned page
@@ -1218,7 +1311,7 @@ int unpoison_memory(unsigned long pfn)
* to the end.
*/
if (PageHuge(page)) {
- pr_debug("MCE: Memory failure is now running on free hugepage %#lx\n", pfn);
+ pr_info("MCE: Memory failure is now running on free hugepage %#lx\n", pfn);
return 0;
}
if (TestClearPageHWPoison(p))
@@ -1327,7 +1420,7 @@ static int soft_offline_huge_page(struct page *page, int flags)
if (PageHWPoison(hpage)) {
put_page(hpage);
- pr_debug("soft offline: %#lx hugepage already poisoned\n", pfn);
+ pr_info("soft offline: %#lx hugepage already poisoned\n", pfn);
return -EBUSY;
}
@@ -1341,8 +1434,8 @@ static int soft_offline_huge_page(struct page *page, int flags)
list_for_each_entry_safe(page1, page2, &pagelist, lru)
put_page(page1);
- pr_debug("soft offline: %#lx: migration failed %d, type %lx\n",
- pfn, ret, page->flags);
+ pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
+ pfn, ret, page->flags);
if (ret > 0)
ret = -EIO;
return ret;
@@ -1413,7 +1506,7 @@ int soft_offline_page(struct page *page, int flags)
}
if (!PageLRU(page)) {
pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n",
- pfn, page->flags);
+ pfn, page->flags);
return -EIO;
}
@@ -1474,7 +1567,7 @@ int soft_offline_page(struct page *page, int flags)
}
} else {
pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n",
- pfn, ret, page_count(page), page->flags);
+ pfn, ret, page_count(page), page->flags);
}
if (ret)
return ret;