From 7bba8f0ea428f8a904c022b86e584009a2f87357 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 6 Aug 2020 23:22:59 -0700 Subject: mm/mmap: optimize a branch judgment in ksys_mmap_pgoff() Look at the pseudo code below. It's very clear that, the judgement "!is_file_hugepages(file)" at 3) is duplicated to the one at 1), we can use "else if" to avoid it. And the assignment "retval = -EINVAL" at 2) is only needed by the branch 3), because "retval" will be overwritten at 4). No functional change, but it can reduce the code size. Maybe more clearer? Before: text data bss dec hex filename 28733 1590 1 30324 7674 mm/mmap.o After: text data bss dec hex filename 28701 1590 1 30292 7654 mm/mmap.o ====pseudo code====: if (!(flags & MAP_ANONYMOUS)) { ... 1) if (is_file_hugepages(file)) len = ALIGN(len, huge_page_size(hstate_file(file))); 2) retval = -EINVAL; 3) if (unlikely(flags & MAP_HUGETLB && !is_file_hugepages(file))) goto out_fput; } else if (flags & MAP_HUGETLB) { ... } ... 4) retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); out_fput: ... return retval; Signed-off-by: Zhen Lei Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Link: http://lkml.kernel.org/r/20200705080112.1405-1-thunder.leizhen@huawei.com Signed-off-by: Linus Torvalds --- mm/mmap.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'mm/mmap.c') diff --git a/mm/mmap.c b/mm/mmap.c index dcdab2675a21..853ae3aebc3d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1562,11 +1562,12 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, file = fget(fd); if (!file) return -EBADF; - if (is_file_hugepages(file)) + if (is_file_hugepages(file)) { len = ALIGN(len, huge_page_size(hstate_file(file))); - retval = -EINVAL; - if (unlikely(flags & MAP_HUGETLB && !is_file_hugepages(file))) + } else if (unlikely(flags & MAP_HUGETLB)) { + retval = -EINVAL; goto out_fput; + } } else if (flags & MAP_HUGETLB) { struct user_struct *user = NULL; struct hstate *hs; -- cgit v1.2.3 From d70cec8983241a6aafadf78e2d65bbafac87ab6a Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 6 Aug 2020 23:23:34 -0700 Subject: mm: mmap: merge vma after call_mmap() if possible The vm_flags may be changed after call_mmap() because drivers may set some flags for their own purpose. As a result, we failed to merge the adjacent vma due to the different vm_flags as userspace can't pass in the same one. Try to merge vma after call_mmap() to fix this issue. Signed-off-by: Hongxiang Lou Signed-off-by: Miaohe Lin Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Link: http://lkml.kernel.org/r/1594954065-23733-1-git-send-email-linmiaohe@huawei.com Signed-off-by: Linus Torvalds --- mm/mmap.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'mm/mmap.c') diff --git a/mm/mmap.c b/mm/mmap.c index 853ae3aebc3d..ba39409cdb17 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1690,7 +1690,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, struct list_head *uf) { struct mm_struct *mm = current->mm; - struct vm_area_struct *vma, *prev; + struct vm_area_struct *vma, *prev, *merge; int error; struct rb_node **rb_link, *rb_parent; unsigned long charged = 0; @@ -1774,6 +1774,25 @@ unsigned long mmap_region(struct file *file, unsigned long addr, if (error) goto unmap_and_free_vma; + /* If vm_flags changed after call_mmap(), we should try merge vma again + * as we may succeed this time. + */ + if (unlikely(vm_flags != vma->vm_flags && prev)) { + merge = vma_merge(mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags, + NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX); + if (merge) { + fput(file); + vm_area_free(vma); + vma = merge; + /* Update vm_flags and possible addr to pick up the change. We don't + * warn here if addr changed as the vma is not linked by vma_link(). + */ + addr = vma->vm_start; + vm_flags = vma->vm_flags; + goto unmap_writable; + } + } + /* Can addr have changed?? * * Answer: Yes, several device drivers can do it in their @@ -1796,6 +1815,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, vma_link(mm, vma, prev, rb_link, rb_parent); /* Once vma denies write, undo our temporary denial count */ if (file) { +unmap_writable: if (vm_flags & VM_SHARED) mapping_unmap_writable(file->f_mapping); if (vm_flags & VM_DENYWRITE) -- cgit v1.2.3 From 45e55300f11495ed58c53427da7f0d958800a30f Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 6 Aug 2020 23:23:37 -0700 Subject: mm: remove unnecessary wrapper function do_mmap_pgoff() The current split between do_mmap() and do_mmap_pgoff() was introduced in commit 1fcfd8db7f82 ("mm, mpx: add "vm_flags_t vm_flags" arg to do_mmap_pgoff()") to support MPX. The wrapper function do_mmap_pgoff() always passed 0 as the value of the vm_flags argument to do_mmap(). However, MPX support has subsequently been removed from the kernel and there were no more direct callers of do_mmap(); all calls were going via do_mmap_pgoff(). Simplify the code by removing do_mmap_pgoff() and changing all callers to directly call do_mmap(), which now no longer takes a vm_flags argument. Signed-off-by: Peter Collingbourne Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Reviewed-by: David Hildenbrand Link: http://lkml.kernel.org/r/20200727194109.1371462-1-pcc@google.com Signed-off-by: Linus Torvalds --- fs/aio.c | 6 +++--- fs/hugetlbfs/inode.c | 2 +- include/linux/fs.h | 2 +- include/linux/mm.h | 12 +----------- ipc/shm.c | 2 +- mm/mmap.c | 16 ++++++++-------- mm/nommu.c | 6 +++--- mm/shmem.c | 2 +- mm/util.c | 4 ++-- 9 files changed, 21 insertions(+), 31 deletions(-) (limited to 'mm/mmap.c') diff --git a/fs/aio.c b/fs/aio.c index 91e7cc4a9f17..5736bff48e9e 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -525,9 +525,9 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events) return -EINTR; } - ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size, - PROT_READ | PROT_WRITE, - MAP_SHARED, 0, &unused, NULL); + ctx->mmap_base = do_mmap(ctx->aio_ring_file, 0, ctx->mmap_size, + PROT_READ | PROT_WRITE, + MAP_SHARED, 0, &unused, NULL); mmap_write_unlock(mm); if (IS_ERR((void *)ctx->mmap_base)) { ctx->mmap_size = 0; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index ef5313f9c78f..523954d00dff 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -140,7 +140,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) * already been checked by prepare_hugepage_range. If you add * any error returns here, do so after setting VM_HUGETLB, so * is_vm_hugetlb_page tests below unmap_region go the right - * way when do_mmap_pgoff unwinds (may be important on powerpc + * way when do_mmap unwinds (may be important on powerpc * and ia64). */ vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND; diff --git a/include/linux/fs.h b/include/linux/fs.h index b1c3a14f12e8..f5da33bcaaae 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -528,7 +528,7 @@ static inline int mapping_mapped(struct address_space *mapping) /* * Might pages of this file have been modified in userspace? - * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff + * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap * marks vma as VM_SHARED if it is shared, and the file was opened for * writing i.e. vma may be mprotected writable even if now readonly. * diff --git a/include/linux/mm.h b/include/linux/mm.h index 98e35df8d88e..cf7e4605ff3f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2546,23 +2546,13 @@ extern unsigned long mmap_region(struct file *file, unsigned long addr, struct list_head *uf); extern unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, - vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate, - struct list_head *uf); + unsigned long pgoff, unsigned long *populate, struct list_head *uf); extern int __do_munmap(struct mm_struct *, unsigned long, size_t, struct list_head *uf, bool downgrade); extern int do_munmap(struct mm_struct *, unsigned long, size_t, struct list_head *uf); extern int do_madvise(unsigned long start, size_t len_in, int behavior); -static inline unsigned long -do_mmap_pgoff(struct file *file, unsigned long addr, - unsigned long len, unsigned long prot, unsigned long flags, - unsigned long pgoff, unsigned long *populate, - struct list_head *uf) -{ - return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate, uf); -} - #ifdef CONFIG_MMU extern int __mm_populate(unsigned long addr, unsigned long len, int ignore_errors); diff --git a/ipc/shm.c b/ipc/shm.c index 0a6dd94afa21..bf38d7e2fbe9 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -1558,7 +1558,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, goto invalid; } - addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate, NULL); + addr = do_mmap(file, addr, size, prot, flags, 0, &populate, NULL); *raddr = addr; err = 0; if (IS_ERR_VALUE(addr)) diff --git a/mm/mmap.c b/mm/mmap.c index ba39409cdb17..40248d84ad5f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1030,7 +1030,7 @@ static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1, * anon_vmas, nor if same anon_vma is assigned but offsets incompatible. * * We don't check here for the merged mmap wrapping around the end of pagecache - * indices (16TB on ia32) because do_mmap_pgoff() does not permit mmap's which + * indices (16TB on ia32) because do_mmap() does not permit mmap's which * wrap, nor mmaps which cover the final page at index -1UL. */ static int @@ -1365,11 +1365,11 @@ static inline bool file_mmap_ok(struct file *file, struct inode *inode, */ unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, - unsigned long flags, vm_flags_t vm_flags, - unsigned long pgoff, unsigned long *populate, - struct list_head *uf) + unsigned long flags, unsigned long pgoff, + unsigned long *populate, struct list_head *uf) { struct mm_struct *mm = current->mm; + vm_flags_t vm_flags; int pkey = 0; *populate = 0; @@ -1431,7 +1431,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr, * to. we assume access permissions have been handled by the open * of the memory object, so we don't do any here. */ - vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) | + vm_flags = calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) | mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; if (flags & MAP_LOCKED) @@ -2230,7 +2230,7 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, /* * mmap_region() will call shmem_zero_setup() to create a file, * so use shmem's get_unmapped_area in case it can be huge. - * do_mmap_pgoff() will clear pgoff, so match alignment. + * do_mmap() will clear pgoff, so match alignment. */ pgoff = 0; get_area = shmem_get_unmapped_area; @@ -3003,7 +3003,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, } file = get_file(vma->vm_file); - ret = do_mmap_pgoff(vma->vm_file, start, size, + ret = do_mmap(vma->vm_file, start, size, prot, flags, pgoff, &populate, NULL); fput(file); out: @@ -3223,7 +3223,7 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) * By setting it to reflect the virtual start address of the * vma, merges and splits can happen in a seamless way, just * using the existing file pgoff checks and manipulations. - * Similarly in do_mmap_pgoff and in do_brk. + * Similarly in do_mmap and in do_brk. */ if (vma_is_anonymous(vma)) { BUG_ON(vma->anon_vma); diff --git a/mm/nommu.c b/mm/nommu.c index 314174817b04..340ae7774c13 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1078,7 +1078,6 @@ unsigned long do_mmap(struct file *file, unsigned long len, unsigned long prot, unsigned long flags, - vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate, struct list_head *uf) @@ -1086,6 +1085,7 @@ unsigned long do_mmap(struct file *file, struct vm_area_struct *vma; struct vm_region *region; struct rb_node *rb; + vm_flags_t vm_flags; unsigned long capabilities, result; int ret; @@ -1104,7 +1104,7 @@ unsigned long do_mmap(struct file *file, /* we've determined that we can make the mapping, now translate what we * now know into VMA flags */ - vm_flags |= determine_vm_flags(file, prot, flags, capabilities); + vm_flags = determine_vm_flags(file, prot, flags, capabilities); /* we're going to need to record the mapping */ region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL); @@ -1763,7 +1763,7 @@ EXPORT_SYMBOL_GPL(access_process_vm); * * Check the shared mappings on an inode on behalf of a shrinking truncate to * make sure that that any outstanding VMAs aren't broken and then shrink the - * vm_regions that extend that beyond so that do_mmap_pgoff() doesn't + * vm_regions that extend that beyond so that do_mmap() doesn't * automatically grant mappings that are too large. */ int nommu_shrink_inode_mappings(struct inode *inode, size_t size, diff --git a/mm/shmem.c b/mm/shmem.c index c5c281893bb8..eb6b36d89722 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -4245,7 +4245,7 @@ EXPORT_SYMBOL_GPL(shmem_file_setup_with_mnt); /** * shmem_zero_setup - setup a shared anonymous mapping - * @vma: the vma to be mmapped is prepared by do_mmap_pgoff + * @vma: the vma to be mmapped is prepared by do_mmap */ int shmem_zero_setup(struct vm_area_struct *vma) { diff --git a/mm/util.c b/mm/util.c index 8d6280c05238..5ef378a2a038 100644 --- a/mm/util.c +++ b/mm/util.c @@ -503,8 +503,8 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, if (!ret) { if (mmap_write_lock_killable(mm)) return -EINTR; - ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff, - &populate, &uf); + ret = do_mmap(file, addr, len, prot, flag, pgoff, &populate, + &uf); mmap_write_unlock(mm); userfaultfd_unmap_complete(mm, &uf); if (populate) -- cgit v1.2.3