summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/damon/core-test.h4
-rw-r--r--mm/huge_memory.c2
-rw-r--r--mm/khugepaged.c26
-rw-r--r--mm/memory-failure.c28
-rw-r--r--mm/memory.c9
-rw-r--r--mm/oom_kill.c23
-rw-r--r--mm/page_alloc.c8
-rw-r--r--mm/secretmem.c2
-rw-r--r--mm/vmalloc.c15
9 files changed, 73 insertions, 44 deletions
diff --git a/mm/damon/core-test.h b/mm/damon/core-test.h
index c938a9c34e6c..7008c3735e99 100644
--- a/mm/damon/core-test.h
+++ b/mm/damon/core-test.h
@@ -219,14 +219,14 @@ static void damon_test_split_regions_of(struct kunit *test)
r = damon_new_region(0, 22);
damon_add_region(r, t);
damon_split_regions_of(c, t, 2);
- KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 2u);
+ KUNIT_EXPECT_LE(test, damon_nr_regions(t), 2u);
damon_free_target(t);
t = damon_new_target(42);
r = damon_new_region(0, 220);
damon_add_region(r, t);
damon_split_regions_of(c, t, 4);
- KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 4u);
+ KUNIT_EXPECT_LE(test, damon_nr_regions(t), 4u);
damon_free_target(t);
damon_destroy_ctx(c);
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 92192cb086c7..c5142d237e48 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2426,6 +2426,8 @@ static void __split_huge_page(struct page *page, struct list_head *list,
/* lock lru list/PageCompound, ref frozen by page_ref_freeze */
lruvec = lock_page_lruvec(head);
+ ClearPageHasHWPoisoned(head);
+
for (i = nr - 1; i >= 1; i--) {
__split_huge_page_tail(head, i, lruvec, list);
/* Some pages can be beyond EOF: drop them from page cache */
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 045cc579f724..8a8b3aa92937 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -445,22 +445,25 @@ static bool hugepage_vma_check(struct vm_area_struct *vma,
if (!transhuge_vma_enabled(vma, vm_flags))
return false;
+ if (vma->vm_file && !IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) -
+ vma->vm_pgoff, HPAGE_PMD_NR))
+ return false;
+
/* Enabled via shmem mount options or sysfs settings. */
- if (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) {
- return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
- HPAGE_PMD_NR);
- }
+ if (shmem_file(vma->vm_file))
+ return shmem_huge_enabled(vma);
/* THP settings require madvise. */
if (!(vm_flags & VM_HUGEPAGE) && !khugepaged_always())
return false;
- /* Read-only file mappings need to be aligned for THP to work. */
+ /* Only regular file is valid */
if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && vma->vm_file &&
- !inode_is_open_for_write(vma->vm_file->f_inode) &&
(vm_flags & VM_EXEC)) {
- return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
- HPAGE_PMD_NR);
+ struct inode *inode = vma->vm_file->f_inode;
+
+ return !inode_is_open_for_write(inode) &&
+ S_ISREG(inode->i_mode);
}
if (!vma->anon_vma || vma->vm_ops)
@@ -1763,6 +1766,10 @@ static void collapse_file(struct mm_struct *mm,
filemap_flush(mapping);
result = SCAN_FAIL;
goto xa_unlocked;
+ } else if (PageWriteback(page)) {
+ xas_unlock_irq(&xas);
+ result = SCAN_FAIL;
+ goto xa_unlocked;
} else if (trylock_page(page)) {
get_page(page);
xas_unlock_irq(&xas);
@@ -1798,7 +1805,8 @@ static void collapse_file(struct mm_struct *mm,
goto out_unlock;
}
- if (!is_shmem && PageDirty(page)) {
+ if (!is_shmem && (PageDirty(page) ||
+ PageWriteback(page))) {
/*
* khugepaged only works on read-only fd, so this
* page is dirty because it hasn't been flushed
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 3e6449f2102a..bdbbb32211a5 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1147,20 +1147,6 @@ static int __get_hwpoison_page(struct page *page)
if (!HWPoisonHandlable(head))
return -EBUSY;
- if (PageTransHuge(head)) {
- /*
- * Non anonymous thp exists only in allocation/free time. We
- * can't handle such a case correctly, so let's give it up.
- * This should be better than triggering BUG_ON when kernel
- * tries to touch the "partially handled" page.
- */
- if (!PageAnon(head)) {
- pr_err("Memory failure: %#lx: non anonymous thp\n",
- page_to_pfn(page));
- return 0;
- }
- }
-
if (get_page_unless_zero(head)) {
if (head == compound_head(page))
return 1;
@@ -1708,6 +1694,20 @@ try_again:
}
if (PageTransHuge(hpage)) {
+ /*
+ * The flag must be set after the refcount is bumped
+ * otherwise it may race with THP split.
+ * And the flag can't be set in get_hwpoison_page() since
+ * it is called by soft offline too and it is just called
+ * for !MF_COUNT_INCREASE. So here seems to be the best
+ * place.
+ *
+ * Don't need care about the above error handling paths for
+ * get_hwpoison_page() since they handle either free page
+ * or unhandlable page. The refcount is bumped iff the
+ * page is a valid handlable page.
+ */
+ SetPageHasHWPoisoned(hpage);
if (try_to_split_thp_page(p, "Memory Failure") < 0) {
action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED);
res = -EBUSY;
diff --git a/mm/memory.c b/mm/memory.c
index adf9b9ef8277..c52be6d6b605 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3907,6 +3907,15 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
return ret;
/*
+ * Just backoff if any subpage of a THP is corrupted otherwise
+ * the corrupted page may mapped by PMD silently to escape the
+ * check. This kind of THP just can be PTE mapped. Access to
+ * the corrupted subpage should trigger SIGBUS as expected.
+ */
+ if (unlikely(PageHasHWPoisoned(page)))
+ return ret;
+
+ /*
* Archs like ppc64 need additional space to store information
* related to pte entry. Use the preallocated table for that.
*/
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 831340e7ad8b..989f35a2bbb1 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -1150,7 +1150,7 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
struct task_struct *task;
struct task_struct *p;
unsigned int f_flags;
- bool reap = true;
+ bool reap = false;
struct pid *pid;
long ret = 0;
@@ -1177,15 +1177,15 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
goto put_task;
}
- mm = p->mm;
- mmgrab(mm);
-
- /* If the work has been done already, just exit with success */
- if (test_bit(MMF_OOM_SKIP, &mm->flags))
- reap = false;
- else if (!task_will_free_mem(p)) {
- reap = false;
- ret = -EINVAL;
+ if (mmget_not_zero(p->mm)) {
+ mm = p->mm;
+ if (task_will_free_mem(p))
+ reap = true;
+ else {
+ /* Error only if the work has not been done already */
+ if (!test_bit(MMF_OOM_SKIP, &mm->flags))
+ ret = -EINVAL;
+ }
}
task_unlock(p);
@@ -1201,7 +1201,8 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
mmap_read_unlock(mm);
drop_mm:
- mmdrop(mm);
+ if (mm)
+ mmput(mm);
put_task:
put_task_struct(task);
put_pid:
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b37435c274cf..23d3339ac4e8 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1312,8 +1312,10 @@ static __always_inline bool free_pages_prepare(struct page *page,
VM_BUG_ON_PAGE(compound && compound_order(page) != order, page);
- if (compound)
+ if (compound) {
ClearPageDoubleMap(page);
+ ClearPageHasHWPoisoned(page);
+ }
for (i = 1; i < (1 << order); i++) {
if (compound)
bad += free_tail_pages_check(page, page + i);
@@ -5223,6 +5225,10 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
if (unlikely(page_array && nr_pages - nr_populated == 0))
goto out;
+ /* Bulk allocator does not support memcg accounting. */
+ if (memcg_kmem_enabled() && (gfp & __GFP_ACCOUNT))
+ goto failed;
+
/* Use the single page allocator for one page. */
if (nr_pages - nr_populated == 1)
goto failed;
diff --git a/mm/secretmem.c b/mm/secretmem.c
index c2dda408bb36..22b310adb53d 100644
--- a/mm/secretmem.c
+++ b/mm/secretmem.c
@@ -218,8 +218,8 @@ SYSCALL_DEFINE1(memfd_secret, unsigned int, flags)
file->f_flags |= O_LARGEFILE;
- fd_install(fd, file);
atomic_inc(&secretmem_users);
+ fd_install(fd, file);
return fd;
err_put_fd:
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index d77830ff604c..e8a807c78110 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2816,6 +2816,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
unsigned int order, unsigned int nr_pages, struct page **pages)
{
unsigned int nr_allocated = 0;
+ struct page *page;
+ int i;
/*
* For order-0 pages we make use of bulk allocator, if
@@ -2823,7 +2825,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
* to fails, fallback to a single page allocator that is
* more permissive.
*/
- if (!order) {
+ if (!order && nid != NUMA_NO_NODE) {
while (nr_allocated < nr_pages) {
unsigned int nr, nr_pages_request;
@@ -2848,7 +2850,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
if (nr != nr_pages_request)
break;
}
- } else
+ } else if (order)
/*
* Compound pages required for remap_vmalloc_page if
* high-order pages.
@@ -2856,11 +2858,12 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
gfp |= __GFP_COMP;
/* High-order pages or fallback path if "bulk" fails. */
- while (nr_allocated < nr_pages) {
- struct page *page;
- int i;
- page = alloc_pages_node(nid, gfp, order);
+ while (nr_allocated < nr_pages) {
+ if (nid == NUMA_NO_NODE)
+ page = alloc_pages(gfp, order);
+ else
+ page = alloc_pages_node(nid, gfp, order);
if (unlikely(!page))
break;