summaryrefslogtreecommitdiffstats
path: root/mm/mmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mmap.c')
-rw-r--r--mm/mmap.c142
1 files changed, 90 insertions, 52 deletions
diff --git a/mm/mmap.c b/mm/mmap.c
index f609e9ec4a25..bdd19f5b994e 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -132,7 +132,7 @@ void vma_set_page_prot(struct vm_area_struct *vma)
vm_flags &= ~VM_SHARED;
vm_page_prot = vm_pgprot_modify(vm_page_prot, vm_flags);
}
- /* remove_protection_ptes reads vma->vm_page_prot without mmap_sem */
+ /* remove_protection_ptes reads vma->vm_page_prot without mmap_lock */
WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
}
@@ -198,7 +198,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
bool downgraded = false;
LIST_HEAD(uf);
- if (down_write_killable(&mm->mmap_sem))
+ if (mmap_write_lock_killable(mm))
return -EINTR;
origbrk = mm->brk;
@@ -238,14 +238,14 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
/*
* Always allow shrinking brk.
- * __do_munmap() may downgrade mmap_sem to read.
+ * __do_munmap() may downgrade mmap_lock to read.
*/
if (brk <= mm->brk) {
int ret;
/*
- * mm->brk must to be protected by write mmap_sem so update it
- * before downgrading mmap_sem. When __do_munmap() fails,
+ * mm->brk must to be protected by write mmap_lock so update it
+ * before downgrading mmap_lock. When __do_munmap() fails,
* mm->brk will be restored from origbrk.
*/
mm->brk = brk;
@@ -272,9 +272,9 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
success:
populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
if (downgraded)
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
else
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
userfaultfd_unmap_complete(mm, &uf);
if (populate)
mm_populate(oldbrk, newbrk - oldbrk);
@@ -282,7 +282,7 @@ success:
out:
retval = origbrk;
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
return retval;
}
@@ -505,7 +505,7 @@ static __always_inline void vma_rb_erase(struct vm_area_struct *vma,
* After the update, the vma will be reinserted using
* anon_vma_interval_tree_post_update_vma().
*
- * The entire update must be protected by exclusive mmap_sem and by
+ * The entire update must be protected by exclusive mmap_lock and by
* the root anon_vma's mutex.
*/
static inline void
@@ -1030,7 +1030,7 @@ static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
* anon_vmas, nor if same anon_vma is assigned but offsets incompatible.
*
* We don't check here for the merged mmap wrapping around the end of pagecache
- * indices (16TB on ia32) because do_mmap_pgoff() does not permit mmap's which
+ * indices (16TB on ia32) because do_mmap() does not permit mmap's which
* wrap, nor mmaps which cover the final page at index -1UL.
*/
static int
@@ -1207,7 +1207,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
}
/*
- * Rough compatbility check to quickly see if it's even worth looking
+ * Rough compatibility check to quickly see if it's even worth looking
* at sharing an anon_vma.
*
* They need to have the same vm_file, and the flags can only differ
@@ -1361,15 +1361,15 @@ static inline bool file_mmap_ok(struct file *file, struct inode *inode,
}
/*
- * The caller must hold down_write(&current->mm->mmap_sem).
+ * The caller must write-lock current->mm->mmap_lock.
*/
unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot,
- unsigned long flags, vm_flags_t vm_flags,
- unsigned long pgoff, unsigned long *populate,
- struct list_head *uf)
+ unsigned long flags, unsigned long pgoff,
+ unsigned long *populate, struct list_head *uf)
{
struct mm_struct *mm = current->mm;
+ vm_flags_t vm_flags;
int pkey = 0;
*populate = 0;
@@ -1431,7 +1431,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
* to. we assume access permissions have been handled by the open
* of the memory object, so we don't do any here.
*/
- vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
+ vm_flags = calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
if (flags & MAP_LOCKED)
@@ -1562,11 +1562,12 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
file = fget(fd);
if (!file)
return -EBADF;
- if (is_file_hugepages(file))
+ if (is_file_hugepages(file)) {
len = ALIGN(len, huge_page_size(hstate_file(file)));
- retval = -EINVAL;
- if (unlikely(flags & MAP_HUGETLB && !is_file_hugepages(file)))
+ } else if (unlikely(flags & MAP_HUGETLB)) {
+ retval = -EINVAL;
goto out_fput;
+ }
} else if (flags & MAP_HUGETLB) {
struct user_struct *user = NULL;
struct hstate *hs;
@@ -1689,7 +1690,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
struct list_head *uf)
{
struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma, *prev;
+ struct vm_area_struct *vma, *prev, *merge;
int error;
struct rb_node **rb_link, *rb_parent;
unsigned long charged = 0;
@@ -1773,6 +1774,29 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
if (error)
goto unmap_and_free_vma;
+ /* If vm_flags changed after call_mmap(), we should try merge vma again
+ * as we may succeed this time.
+ */
+ if (unlikely(vm_flags != vma->vm_flags && prev)) {
+ merge = vma_merge(mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags,
+ NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX);
+ if (merge) {
+ /* ->mmap() can change vma->vm_file and fput the original file. So
+ * fput the vma->vm_file here or we would add an extra fput for file
+ * and cause general protection fault ultimately.
+ */
+ fput(vma->vm_file);
+ vm_area_free(vma);
+ vma = merge;
+ /* Update vm_flags and possible addr to pick up the change. We don't
+ * warn here if addr changed as the vma is not linked by vma_link().
+ */
+ addr = vma->vm_start;
+ vm_flags = vma->vm_flags;
+ goto unmap_writable;
+ }
+ }
+
/* Can addr have changed??
*
* Answer: Yes, several device drivers can do it in their
@@ -1795,6 +1819,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
vma_link(mm, vma, prev, rb_link, rb_parent);
/* Once vma denies write, undo our temporary denial count */
if (file) {
+unmap_writable:
if (vm_flags & VM_SHARED)
mapping_unmap_writable(file->f_mapping);
if (vm_flags & VM_DENYWRITE)
@@ -2209,7 +2234,7 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
/*
* mmap_region() will call shmem_zero_setup() to create a file,
* so use shmem's get_unmapped_area in case it can be huge.
- * do_mmap_pgoff() will clear pgoff, so match alignment.
+ * do_mmap() will clear pgoff, so match alignment.
*/
pgoff = 0;
get_area = shmem_get_unmapped_area;
@@ -2371,7 +2396,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
/*
* vma->vm_start/vm_end cannot change under us because the caller
- * is required to hold the mmap_sem in read mode. We need the
+ * is required to hold the mmap_lock in read mode. We need the
* anon_vma lock to serialize against concurrent expand_stacks.
*/
anon_vma_lock_write(vma->anon_vma);
@@ -2389,7 +2414,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
if (!error) {
/*
* vma_gap_update() doesn't support concurrent
- * updates, but we only hold a shared mmap_sem
+ * updates, but we only hold a shared mmap_lock
* lock here, so we need to protect against
* concurrent vma expansions.
* anon_vma_lock_write() doesn't help here, as
@@ -2451,7 +2476,7 @@ int expand_downwards(struct vm_area_struct *vma,
/*
* vma->vm_start/vm_end cannot change under us because the caller
- * is required to hold the mmap_sem in read mode. We need the
+ * is required to hold the mmap_lock in read mode. We need the
* anon_vma lock to serialize against concurrent expand_stacks.
*/
anon_vma_lock_write(vma->anon_vma);
@@ -2469,7 +2494,7 @@ int expand_downwards(struct vm_area_struct *vma,
if (!error) {
/*
* vma_gap_update() doesn't support concurrent
- * updates, but we only hold a shared mmap_sem
+ * updates, but we only hold a shared mmap_lock
* lock here, so we need to protect against
* concurrent vma expansions.
* anon_vma_lock_write() doesn't help here, as
@@ -2620,7 +2645,7 @@ static void unmap_region(struct mm_struct *mm,
* Create a list of vma's touched by the unmap, removing them from the mm's
* vma list as we go..
*/
-static void
+static bool
detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
struct vm_area_struct *prev, unsigned long end)
{
@@ -2645,6 +2670,17 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
/* Kill the cache */
vmacache_invalidate(mm);
+
+ /*
+ * Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or
+ * VM_GROWSUP VMA. Such VMAs can change their size under
+ * down_read(mmap_lock) and collide with the VMA we are about to unmap.
+ */
+ if (vma && (vma->vm_flags & VM_GROWSDOWN))
+ return false;
+ if (prev && (prev->vm_flags & VM_GROWSUP))
+ return false;
+ return true;
}
/*
@@ -2825,10 +2861,11 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
}
/* Detach vmas from rbtree */
- detach_vmas_to_be_unmapped(mm, vma, prev, end);
+ if (!detach_vmas_to_be_unmapped(mm, vma, prev, end))
+ downgrade = false;
if (downgrade)
- downgrade_write(&mm->mmap_sem);
+ mmap_write_downgrade(mm);
unmap_region(mm, vma, prev, start, end);
@@ -2850,20 +2887,20 @@ static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
struct mm_struct *mm = current->mm;
LIST_HEAD(uf);
- if (down_write_killable(&mm->mmap_sem))
+ if (mmap_write_lock_killable(mm))
return -EINTR;
ret = __do_munmap(mm, start, len, &uf, downgrade);
/*
- * Returning 1 indicates mmap_sem is downgraded.
+ * Returning 1 indicates mmap_lock is downgraded.
* But 1 is not legal return value of vm_munmap() and munmap(), reset
* it to 0 before return.
*/
if (ret == 1) {
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
ret = 0;
} else
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
userfaultfd_unmap_complete(mm, &uf);
return ret;
@@ -2911,7 +2948,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
if (pgoff + (size >> PAGE_SHIFT) < pgoff)
return ret;
- if (down_write_killable(&mm->mmap_sem))
+ if (mmap_write_lock_killable(mm))
return -EINTR;
vma = find_vma(mm, start);
@@ -2970,11 +3007,11 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
}
file = get_file(vma->vm_file);
- ret = do_mmap_pgoff(vma->vm_file, start, size,
+ ret = do_mmap(vma->vm_file, start, size,
prot, flags, pgoff, &populate, NULL);
fput(file);
out:
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
if (populate)
mm_populate(ret, populate);
if (!IS_ERR_VALUE(ret))
@@ -3074,12 +3111,12 @@ int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
if (!len)
return 0;
- if (down_write_killable(&mm->mmap_sem))
+ if (mmap_write_lock_killable(mm))
return -EINTR;
ret = do_brk_flags(addr, len, flags, &uf);
populate = ((mm->def_flags & VM_LOCKED) != 0);
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
userfaultfd_unmap_complete(mm, &uf);
if (populate && !ret)
mm_populate(addr, len);
@@ -3107,12 +3144,12 @@ void exit_mmap(struct mm_struct *mm)
/*
* Manually reap the mm to free as much memory as possible.
* Then, as the oom reaper does, set MMF_OOM_SKIP to disregard
- * this mm from further consideration. Taking mm->mmap_sem for
+ * this mm from further consideration. Taking mm->mmap_lock for
* write after setting MMF_OOM_SKIP will guarantee that the oom
- * reaper will not run on this mm again after mmap_sem is
+ * reaper will not run on this mm again after mmap_lock is
* dropped.
*
- * Nothing can be holding mm->mmap_sem here and the above call
+ * Nothing can be holding mm->mmap_lock here and the above call
* to mmu_notifier_release(mm) ensures mmu notifier callbacks in
* __oom_reap_task_mm() will not block.
*
@@ -3123,8 +3160,8 @@ void exit_mmap(struct mm_struct *mm)
(void)__oom_reap_task_mm(mm);
set_bit(MMF_OOM_SKIP, &mm->flags);
- down_write(&mm->mmap_sem);
- up_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
+ mmap_write_unlock(mm);
}
if (mm->locked_vm) {
@@ -3159,6 +3196,7 @@ void exit_mmap(struct mm_struct *mm)
if (vma->vm_flags & VM_ACCOUNT)
nr_accounted += vma_pages(vma);
vma = remove_vma(vma);
+ cond_resched();
}
vm_unacct_memory(nr_accounted);
}
@@ -3189,7 +3227,7 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
* By setting it to reflect the virtual start address of the
* vma, merges and splits can happen in a seamless way, just
* using the existing file pgoff checks and manipulations.
- * Similarly in do_mmap_pgoff and in do_brk.
+ * Similarly in do_mmap and in do_brk.
*/
if (vma_is_anonymous(vma)) {
BUG_ON(vma->anon_vma);
@@ -3437,7 +3475,7 @@ bool vma_is_special_mapping(const struct vm_area_struct *vma,
}
/*
- * Called with mm->mmap_sem held for writing.
+ * Called with mm->mmap_lock held for writing.
* Insert a new vma covering the given region, with the given flags.
* Its pages are supplied by the given array of struct page *.
* The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
@@ -3474,7 +3512,7 @@ static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
* The LSB of head.next can't change from under us
* because we hold the mm_all_locks_mutex.
*/
- down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_sem);
+ down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_lock);
/*
* We can safely modify head.next after taking the
* anon_vma->root->rwsem. If some other vma in this mm shares
@@ -3504,7 +3542,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
*/
if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
BUG();
- down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_sem);
+ down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_lock);
}
}
@@ -3513,11 +3551,11 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
* operations that could ever happen on a certain mm. This includes
* vmtruncate, try_to_unmap, and all page faults.
*
- * The caller must take the mmap_sem in write mode before calling
+ * The caller must take the mmap_lock in write mode before calling
* mm_take_all_locks(). The caller isn't allowed to release the
- * mmap_sem until mm_drop_all_locks() returns.
+ * mmap_lock until mm_drop_all_locks() returns.
*
- * mmap_sem in write mode is required in order to block all operations
+ * mmap_lock in write mode is required in order to block all operations
* that could modify pagetables and free pages without need of
* altering the vma layout. It's also needed in write mode to avoid new
* anon_vmas to be associated with existing vmas.
@@ -3550,7 +3588,7 @@ int mm_take_all_locks(struct mm_struct *mm)
struct vm_area_struct *vma;
struct anon_vma_chain *avc;
- BUG_ON(down_read_trylock(&mm->mmap_sem));
+ BUG_ON(mmap_read_trylock(mm));
mutex_lock(&mm_all_locks_mutex);
@@ -3622,7 +3660,7 @@ static void vm_unlock_mapping(struct address_space *mapping)
}
/*
- * The mmap_sem cannot be released by the caller until
+ * The mmap_lock cannot be released by the caller until
* mm_drop_all_locks() returns.
*/
void mm_drop_all_locks(struct mm_struct *mm)
@@ -3630,7 +3668,7 @@ void mm_drop_all_locks(struct mm_struct *mm)
struct vm_area_struct *vma;
struct anon_vma_chain *avc;
- BUG_ON(down_read_trylock(&mm->mmap_sem));
+ BUG_ON(mmap_read_trylock(mm));
BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
for (vma = mm->mmap; vma; vma = vma->vm_next) {