summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/gup.c2
-rw-r--r--mm/huge_memory.c2
-rw-r--r--mm/hugetlb.c7
-rw-r--r--mm/init-mm.c11
-rw-r--r--mm/memblock.c6
-rw-r--r--mm/memcontrol.c17
-rw-r--r--mm/memfd.c2
-rw-r--r--mm/memory.c31
-rw-r--r--mm/mempolicy.c1
-rw-r--r--mm/mmap.c67
-rw-r--r--mm/nommu.c12
-rw-r--r--mm/page_alloc.c24
-rw-r--r--mm/rmap.c8
-rw-r--r--mm/shmem.c50
-rw-r--r--mm/zswap.c9
15 files changed, 136 insertions, 113 deletions
diff --git a/mm/gup.c b/mm/gup.c
index b70d7ba7cc13..fc5f98069f4e 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1238,8 +1238,6 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
int locked = 0;
long ret = 0;
- VM_BUG_ON(start & ~PAGE_MASK);
- VM_BUG_ON(len != PAGE_ALIGN(len));
end = start + len;
for (nstart = start; nstart < end; nstart = nend) {
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1cd7c1a57a14..25346bd99364 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2084,6 +2084,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
if (vma_is_dax(vma))
return;
page = pmd_page(_pmd);
+ if (!PageDirty(page) && pmd_dirty(_pmd))
+ set_page_dirty(page);
if (!PageReferenced(page) && pmd_young(_pmd))
SetPageReferenced(page);
page_remove_rmap(page, true);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 039ddbc574e9..3103099f64fd 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3167,6 +3167,13 @@ static vm_fault_t hugetlb_vm_op_fault(struct vm_fault *vmf)
return 0;
}
+/*
+ * When a new function is introduced to vm_operations_struct and added
+ * to hugetlb_vm_ops, please consider adding the function to shm_vm_ops.
+ * This is because under System V memory model, mappings created via
+ * shmget/shmat with "huge page" specified are backed by hugetlbfs files,
+ * their original vm_ops are overwritten with shm_vm_ops.
+ */
const struct vm_operations_struct hugetlb_vm_ops = {
.fault = hugetlb_vm_op_fault,
.open = hugetlb_vm_op_open,
diff --git a/mm/init-mm.c b/mm/init-mm.c
index f0179c9c04c2..a787a319211e 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -15,6 +15,16 @@
#define INIT_MM_CONTEXT(name)
#endif
+/*
+ * For dynamically allocated mm_structs, there is a dynamically sized cpumask
+ * at the end of the structure, the size of which depends on the maximum CPU
+ * number the system can see. That way we allocate only as much memory for
+ * mm_cpumask() as needed for the hundreds, or thousands of processes that
+ * a system typically runs.
+ *
+ * Since there is only one init_mm in the entire system, keep it simple
+ * and size this cpu_bitmask to NR_CPUS.
+ */
struct mm_struct init_mm = {
.mm_rb = RB_ROOT,
.pgd = swapper_pg_dir,
@@ -25,5 +35,6 @@ struct mm_struct init_mm = {
.arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
.user_ns = &init_user_ns,
+ .cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0},
INIT_MM_CONTEXT(init_mm)
};
diff --git a/mm/memblock.c b/mm/memblock.c
index 03d48d8835ba..4b5d245fafc1 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -20,6 +20,7 @@
#include <linux/kmemleak.h>
#include <linux/seq_file.h>
#include <linux/memblock.h>
+#include <linux/bootmem.h>
#include <asm/sections.h>
#include <linux/io.h>
@@ -227,7 +228,8 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
* so we use WARN_ONCE() here to see the stack trace if
* fail happens.
*/
- WARN_ONCE(1, "memblock: bottom-up allocation failed, memory hotunplug may be affected\n");
+ WARN_ONCE(IS_ENABLED(CONFIG_MEMORY_HOTREMOVE),
+ "memblock: bottom-up allocation failed, memory hotremove may be affected\n");
}
return __memblock_find_range_top_down(start, end, size, align, nid,
@@ -1224,6 +1226,7 @@ phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, i
return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
}
+#if defined(CONFIG_NO_BOOTMEM)
/**
* memblock_virt_alloc_internal - allocate boot memory block
* @size: size of memory block to be allocated in bytes
@@ -1431,6 +1434,7 @@ void * __init memblock_virt_alloc_try_nid(
(u64)max_addr);
return NULL;
}
+#endif
/**
* __memblock_free_early - free boot memory block
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e6f0d5ef320a..b2173f7e5164 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -850,7 +850,7 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
int nid;
int i;
- while ((memcg = parent_mem_cgroup(memcg))) {
+ for (; memcg; memcg = parent_mem_cgroup(memcg)) {
for_each_node(nid) {
mz = mem_cgroup_nodeinfo(memcg, nid);
for (i = 0; i <= DEF_PRIORITY; i++) {
@@ -4037,6 +4037,14 @@ static struct cftype mem_cgroup_legacy_files[] = {
static DEFINE_IDR(mem_cgroup_idr);
+static void mem_cgroup_id_remove(struct mem_cgroup *memcg)
+{
+ if (memcg->id.id > 0) {
+ idr_remove(&mem_cgroup_idr, memcg->id.id);
+ memcg->id.id = 0;
+ }
+}
+
static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)
{
VM_BUG_ON(atomic_read(&memcg->id.ref) <= 0);
@@ -4047,8 +4055,7 @@ static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
{
VM_BUG_ON(atomic_read(&memcg->id.ref) < n);
if (atomic_sub_and_test(n, &memcg->id.ref)) {
- idr_remove(&mem_cgroup_idr, memcg->id.id);
- memcg->id.id = 0;
+ mem_cgroup_id_remove(memcg);
/* Memcg ID pins CSS */
css_put(&memcg->css);
@@ -4185,8 +4192,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
return memcg;
fail:
- if (memcg->id.id > 0)
- idr_remove(&mem_cgroup_idr, memcg->id.id);
+ mem_cgroup_id_remove(memcg);
__mem_cgroup_free(memcg);
return NULL;
}
@@ -4245,6 +4251,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
return &memcg->css;
fail:
+ mem_cgroup_id_remove(memcg);
mem_cgroup_free(memcg);
return ERR_PTR(-ENOMEM);
}
diff --git a/mm/memfd.c b/mm/memfd.c
index 27069518e3c5..2bb5e257080e 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -326,7 +326,7 @@ SYSCALL_DEFINE2(memfd_create,
goto err_fd;
}
file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
- file->f_flags |= O_RDWR | O_LARGEFILE;
+ file->f_flags |= O_LARGEFILE;
if (flags & MFD_ALLOW_SEALING) {
file_seals = memfd_file_seals_ptr(file);
diff --git a/mm/memory.c b/mm/memory.c
index 7206a634270b..3d0a74ab70f2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -326,16 +326,20 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
#ifdef CONFIG_HAVE_RCU_TABLE_FREE
-/*
- * See the comment near struct mmu_table_batch.
- */
-
static void tlb_remove_table_smp_sync(void *arg)
{
- /* Simply deliver the interrupt */
+ struct mm_struct __maybe_unused *mm = arg;
+ /*
+ * On most architectures this does nothing. Simply delivering the
+ * interrupt is enough to prevent races with software page table
+ * walking like that done in get_user_pages_fast.
+ *
+ * See the comment near struct mmu_table_batch.
+ */
+ tlb_flush_remove_tables_local(mm);
}
-static void tlb_remove_table_one(void *table)
+static void tlb_remove_table_one(void *table, struct mmu_gather *tlb)
{
/*
* This isn't an RCU grace period and hence the page-tables cannot be
@@ -344,7 +348,7 @@ static void tlb_remove_table_one(void *table)
* It is however sufficient for software page-table walkers that rely on
* IRQ disabling. See the comment near struct mmu_table_batch.
*/
- smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
+ smp_call_function(tlb_remove_table_smp_sync, tlb->mm, 1);
__tlb_remove_table(table);
}
@@ -365,6 +369,8 @@ void tlb_table_flush(struct mmu_gather *tlb)
{
struct mmu_table_batch **batch = &tlb->batch;
+ tlb_flush_remove_tables(tlb->mm);
+
if (*batch) {
call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
*batch = NULL;
@@ -387,7 +393,7 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
if (*batch == NULL) {
*batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
if (*batch == NULL) {
- tlb_remove_table_one(table);
+ tlb_remove_table_one(table, tlb);
return;
}
(*batch)->nr = 0;
@@ -1417,11 +1423,9 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
do {
next = pmd_addr_end(addr, end);
if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
- if (next - addr != HPAGE_PMD_SIZE) {
- VM_BUG_ON_VMA(vma_is_anonymous(vma) &&
- !rwsem_is_locked(&tlb->mm->mmap_sem), vma);
+ if (next - addr != HPAGE_PMD_SIZE)
__split_huge_pmd(vma, pmd, addr, false, NULL);
- } else if (zap_huge_pmd(tlb, vma, pmd, addr))
+ else if (zap_huge_pmd(tlb, vma, pmd, addr))
goto next;
/* fall through */
}
@@ -4397,6 +4401,9 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
return -EINVAL;
maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot);
+ if (!maddr)
+ return -ENOMEM;
+
if (write)
memcpy_toio(maddr + offset, buf, len);
else
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 9ac49ef17b4e..01f1a14facc4 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2505,6 +2505,7 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
/* Create pseudo-vma that contains just the policy */
memset(&pvma, 0, sizeof(struct vm_area_struct));
+ vma_init(&pvma, NULL);
pvma.vm_end = TASK_SIZE; /* policy covers entire file */
mpol_set_shared_policy(sp, &pvma, new); /* adds ref */
diff --git a/mm/mmap.c b/mm/mmap.c
index d1eb87ef4b1a..17bbf4d3e24f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -182,12 +182,12 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
if (vma->vm_file)
fput(vma->vm_file);
mpol_put(vma_policy(vma));
- kmem_cache_free(vm_area_cachep, vma);
+ vm_area_free(vma);
return next;
}
-static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf);
-
+static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags,
+ struct list_head *uf);
SYSCALL_DEFINE1(brk, unsigned long, brk)
{
unsigned long retval;
@@ -245,7 +245,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
goto out;
/* Ok, looks good - let it rip. */
- if (do_brk(oldbrk, newbrk-oldbrk, &uf) < 0)
+ if (do_brk_flags(oldbrk, newbrk-oldbrk, 0, &uf) < 0)
goto out;
set_brk:
@@ -911,7 +911,7 @@ again:
anon_vma_merge(vma, next);
mm->map_count--;
mpol_put(vma_policy(next));
- kmem_cache_free(vm_area_cachep, next);
+ vm_area_free(next);
/*
* In mprotect's case 6 (see comments on vma_merge),
* we must remove another next too. It would clutter
@@ -1729,19 +1729,17 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
* specific mapper. the address has already been validated, but
* not unmapped, but the maps are removed from the list.
*/
- vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+ vma = vm_area_alloc(mm);
if (!vma) {
error = -ENOMEM;
goto unacct_error;
}
- vma->vm_mm = mm;
vma->vm_start = addr;
vma->vm_end = addr + len;
vma->vm_flags = vm_flags;
vma->vm_page_prot = vm_get_page_prot(vm_flags);
vma->vm_pgoff = pgoff;
- INIT_LIST_HEAD(&vma->anon_vma_chain);
if (file) {
if (vm_flags & VM_DENYWRITE) {
@@ -1780,6 +1778,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
error = shmem_zero_setup(vma);
if (error)
goto free_vma;
+ } else {
+ vma_set_anonymous(vma);
}
vma_link(mm, vma, prev, rb_link, rb_parent);
@@ -1832,7 +1832,7 @@ allow_write_and_free_vma:
if (vm_flags & VM_DENYWRITE)
allow_write_access(file);
free_vma:
- kmem_cache_free(vm_area_cachep, vma);
+ vm_area_free(vma);
unacct_error:
if (charged)
vm_unacct_memory(charged);
@@ -2620,15 +2620,10 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
return err;
}
- new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+ new = vm_area_dup(vma);
if (!new)
return -ENOMEM;
- /* most fields are the same, copy all, and then fixup */
- *new = *vma;
-
- INIT_LIST_HEAD(&new->anon_vma_chain);
-
if (new_below)
new->vm_end = addr;
else {
@@ -2669,7 +2664,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
out_free_mpol:
mpol_put(vma_policy(new));
out_free_vma:
- kmem_cache_free(vm_area_cachep, new);
+ vm_area_free(new);
return err;
}
@@ -2929,21 +2924,14 @@ static inline void verify_mm_writelocked(struct mm_struct *mm)
* anonymous maps. eventually we may be able to do some
* brk-specific accounting here.
*/
-static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags, struct list_head *uf)
+static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long flags, struct list_head *uf)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma, *prev;
- unsigned long len;
struct rb_node **rb_link, *rb_parent;
pgoff_t pgoff = addr >> PAGE_SHIFT;
int error;
- len = PAGE_ALIGN(request);
- if (len < request)
- return -ENOMEM;
- if (!len)
- return 0;
-
/* Until we need other flags, refuse anything except VM_EXEC. */
if ((flags & (~VM_EXEC)) != 0)
return -EINVAL;
@@ -2991,14 +2979,13 @@ static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long
/*
* create a vma struct for an anonymous mapping
*/
- vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+ vma = vm_area_alloc(mm);
if (!vma) {
vm_unacct_memory(len >> PAGE_SHIFT);
return -ENOMEM;
}
- INIT_LIST_HEAD(&vma->anon_vma_chain);
- vma->vm_mm = mm;
+ vma_set_anonymous(vma);
vma->vm_start = addr;
vma->vm_end = addr + len;
vma->vm_pgoff = pgoff;
@@ -3015,18 +3002,20 @@ out:
return 0;
}
-static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf)
-{
- return do_brk_flags(addr, len, 0, uf);
-}
-
-int vm_brk_flags(unsigned long addr, unsigned long len, unsigned long flags)
+int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
{
struct mm_struct *mm = current->mm;
+ unsigned long len;
int ret;
bool populate;
LIST_HEAD(uf);
+ len = PAGE_ALIGN(request);
+ if (len < request)
+ return -ENOMEM;
+ if (!len)
+ return 0;
+
if (down_write_killable(&mm->mmap_sem))
return -EINTR;
@@ -3207,16 +3196,14 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
}
*need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
} else {
- new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+ new_vma = vm_area_dup(vma);
if (!new_vma)
goto out;
- *new_vma = *vma;
new_vma->vm_start = addr;
new_vma->vm_end = addr + len;
new_vma->vm_pgoff = pgoff;
if (vma_dup_policy(vma, new_vma))
goto out_free_vma;
- INIT_LIST_HEAD(&new_vma->anon_vma_chain);
if (anon_vma_clone(new_vma, vma))
goto out_free_mempol;
if (new_vma->vm_file)
@@ -3231,7 +3218,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
out_free_mempol:
mpol_put(vma_policy(new_vma));
out_free_vma:
- kmem_cache_free(vm_area_cachep, new_vma);
+ vm_area_free(new_vma);
out:
return NULL;
}
@@ -3355,12 +3342,10 @@ static struct vm_area_struct *__install_special_mapping(
int ret;
struct vm_area_struct *vma;
- vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+ vma = vm_area_alloc(mm);
if (unlikely(vma == NULL))
return ERR_PTR(-ENOMEM);
- INIT_LIST_HEAD(&vma->anon_vma_chain);
- vma->vm_mm = mm;
vma->vm_start = addr;
vma->vm_end = addr + len;
@@ -3381,7 +3366,7 @@ static struct vm_area_struct *__install_special_mapping(
return vma;
out:
- kmem_cache_free(vm_area_cachep, vma);
+ vm_area_free(vma);
return ERR_PTR(ret);
}
diff --git a/mm/nommu.c b/mm/nommu.c
index 4452d8bd9ae4..9fc9e43335b6 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -769,7 +769,7 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
if (vma->vm_file)
fput(vma->vm_file);
put_nommu_region(vma->vm_region);
- kmem_cache_free(vm_area_cachep, vma);
+ vm_area_free(vma);
}
/*
@@ -1145,6 +1145,8 @@ static int do_mmap_private(struct vm_area_struct *vma,
if (ret < len)
memset(base + ret, 0, len - ret);
+ } else {
+ vma_set_anonymous(vma);
}
return 0;
@@ -1204,7 +1206,7 @@ unsigned long do_mmap(struct file *file,
if (!region)
goto error_getting_region;
- vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+ vma = vm_area_alloc(current->mm);
if (!vma)
goto error_getting_vma;
@@ -1212,7 +1214,6 @@ unsigned long do_mmap(struct file *file,
region->vm_flags = vm_flags;
region->vm_pgoff = pgoff;
- INIT_LIST_HEAD(&vma->anon_vma_chain);
vma->vm_flags = vm_flags;
vma->vm_pgoff = pgoff;
@@ -1368,7 +1369,7 @@ error:
kmem_cache_free(vm_region_jar, region);
if (vma->vm_file)
fput(vma->vm_file);
- kmem_cache_free(vm_area_cachep, vma);
+ vm_area_free(vma);
return ret;
sharing_violation:
@@ -1469,14 +1470,13 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
if (!region)
return -ENOMEM;
- new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+ new = vm_area_dup(vma);
if (!new) {
kmem_cache_free(vm_region_jar, region);
return -ENOMEM;
}
/* most fields are the same, copy all, and then fixup */
- *new = *vma;
*region = *vma->vm_region;
new->vm_region = region;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1521100f1e63..3222193c46c6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6383,7 +6383,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
free_area_init_core(pgdat);
}
-#ifdef CONFIG_HAVE_MEMBLOCK
+#if defined(CONFIG_HAVE_MEMBLOCK) && !defined(CONFIG_FLAT_NODE_MEM_MAP)
/*
* Only struct pages that are backed by physical memory are zeroed and
* initialized by going through __init_single_page(). But, there are some
@@ -6421,7 +6421,7 @@ void __paginginit zero_resv_unavail(void)
if (pgcnt)
pr_info("Reserved but unavailable: %lld pages", pgcnt);
}
-#endif /* CONFIG_HAVE_MEMBLOCK */
+#endif /* CONFIG_HAVE_MEMBLOCK && !CONFIG_FLAT_NODE_MEM_MAP */
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
@@ -6847,6 +6847,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
/* Initialise every node */
mminit_verify_pageflags_layout();
setup_nr_node_ids();
+ zero_resv_unavail();
for_each_online_node(nid) {
pg_data_t *pgdat = NODE_DATA(nid);
free_area_init_node(nid, NULL,
@@ -6857,7 +6858,6 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
node_set_state(nid, N_MEMORY);
check_for_memory(pgdat, nid);
}
- zero_resv_unavail();
}
static int __init cmdline_parse_core(char *p, unsigned long *core,
@@ -6939,9 +6939,21 @@ unsigned long free_reserved_area(void *start, void *end, int poison, char *s)
start = (void *)PAGE_ALIGN((unsigned long)start);
end = (void *)((unsigned long)end & PAGE_MASK);
for (pos = start; pos < end; pos += PAGE_SIZE, pages++) {
+ struct page *page = virt_to_page(pos);
+ void *direct_map_addr;
+
+ /*
+ * 'direct_map_addr' might be different from 'pos'
+ * because some architectures' virt_to_page()
+ * work with aliases. Getting the direct map
+ * address ensures that we get a _writeable_
+ * alias for the memset().
+ */
+ direct_map_addr = page_address(page);
if ((unsigned int)poison <= 0xFF)
- memset(pos, poison, PAGE_SIZE);
- free_reserved_page(virt_to_page(pos));
+ memset(direct_map_addr, poison, PAGE_SIZE);
+
+ free_reserved_page(page);
}
if (pages && s)
@@ -7033,9 +7045,9 @@ void __init set_dma_reserve(unsigned long new_dma_reserve)
void __init free_area_init(unsigned long *zones_size)
{
+ zero_resv_unavail();
free_area_init_node(0, zones_size,
__pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL);
- zero_resv_unavail();
}
static int page_alloc_cpu_dead(unsigned int cpu)
diff --git a/mm/rmap.c b/mm/rmap.c
index 6db729dc4c50..eb477809a5c0 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -64,6 +64,7 @@
#include <linux/backing-dev.h>
#include <linux/page_idle.h>
#include <linux/memremap.h>
+#include <linux/userfaultfd_k.h>
#include <asm/tlbflush.h>
@@ -1481,11 +1482,16 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
set_pte_at(mm, address, pvmw.pte, pteval);
}
- } else if (pte_unused(pteval)) {
+ } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
/*
* The guest indicated that the page content is of no
* interest anymore. Simply discard the pte, vmscan
* will take care of the rest.
+ * A future reference will then fault in a new zero
+ * page. When userfaultfd is active, we must not drop
+ * this page though, as its main user (postcopy
+ * migration) will not expect userfaults on already
+ * copied pages.
*/
dec_mm_counter(mm, mm_counter(page));
/* We have to invalidate as we cleared the pte */
diff --git a/mm/shmem.c b/mm/shmem.c
index 2cab84403055..96bcc51fb9ec 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1421,6 +1421,7 @@ static void shmem_pseudo_vma_init(struct vm_area_struct *vma,
{
/* Create a pseudo vma that just contains the policy */
memset(vma, 0, sizeof(*vma));
+ vma_init(vma, NULL);
/* Bias interleave by inode number to distribute better across nodes */
vma->vm_pgoff = index + info->vfs_inode.i_ino;
vma->vm_policy = mpol_shared_policy_lookup(&info->policy, index);
@@ -3896,18 +3897,11 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range);
/* common code */
-static const struct dentry_operations anon_ops = {
- .d_dname = simple_dname
-};
-
static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, loff_t size,
unsigned long flags, unsigned int i_flags)
{
- struct file *res;
struct inode *inode;
- struct path path;
- struct super_block *sb;
- struct qstr this;
+ struct file *res;
if (IS_ERR(mnt))
return ERR_CAST(mnt);
@@ -3918,41 +3912,21 @@ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, l
if (shmem_acct_size(flags, size))
return ERR_PTR(-ENOMEM);
- res = ERR_PTR(-ENOMEM);
- this.name = name;
- this.len = strlen(name);
- this.hash = 0; /* will go */
- sb = mnt->mnt_sb;
- path.mnt = mntget(mnt);
- path.dentry = d_alloc_pseudo(sb, &this);
- if (!path.dentry)
- goto put_memory;
- d_set_d_op(path.dentry, &anon_ops);
-
- res = ERR_PTR(-ENOSPC);
- inode = shmem_get_inode(sb, NULL, S_IFREG | 0777, 0, flags);
- if (!inode)
- goto put_memory;
-
+ inode = shmem_get_inode(mnt->mnt_sb, NULL, S_IFREG | S_IRWXUGO, 0,
+ flags);
+ if (unlikely(!inode)) {
+ shmem_unacct_size(flags, size);
+ return ERR_PTR(-ENOSPC);
+ }
inode->i_flags |= i_flags;
- d_instantiate(path.dentry, inode);
inode->i_size = size;
clear_nlink(inode); /* It is unlinked */
res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size));
+ if (!IS_ERR(res))
+ res = alloc_file_pseudo(inode, mnt, name, O_RDWR,
+ &shmem_file_operations);
if (IS_ERR(res))
- goto put_path;
-
- res = alloc_file(&path, FMODE_WRITE | FMODE_READ,
- &shmem_file_operations);
- if (IS_ERR(res))
- goto put_path;
-
- return res;
-
-put_memory:
- shmem_unacct_size(flags, size);
-put_path:
- path_put(&path);
+ iput(inode);
return res;
}
diff --git a/mm/zswap.c b/mm/zswap.c
index 7d34e69507e3..cd91fd9d96b8 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1026,6 +1026,15 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
ret = -ENOMEM;
goto reject;
}
+
+ /* A second zswap_is_full() check after
+ * zswap_shrink() to make sure it's now
+ * under the max_pool_percent
+ */
+ if (zswap_is_full()) {
+ ret = -ENOMEM;
+ goto reject;
+ }
}
/* allocate entry */