diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-14 10:10:55 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-14 10:10:55 -0700 |
commit | 318222a35bfb0ae9b5ff3e359a583463e6cfcd94 (patch) | |
tree | 6a8d921f7ac9915f3f12dd3fcd7efaaf6f16bb09 /fs | |
parent | 7e9890a3500d95c01511a4c45b7e7192dfa47ae2 (diff) | |
parent | 640be2d1ffbc1946f1547eb89b5005ed7542de99 (diff) | |
download | linux-318222a35bfb0ae9b5ff3e359a583463e6cfcd94.tar.bz2 |
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton:
- a few misc things and hotfixes
- ocfs2
- almost all of MM
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (139 commits)
kernel/memremap.c: remove the unused device_private_entry_fault() export
mm: delete find_get_entries_tag
mm/huge_memory.c: make __thp_get_unmapped_area static
mm/mprotect.c: fix compilation warning because of unused 'mm' variable
mm/page-writeback: introduce tracepoint for wait_on_page_writeback()
mm/vmscan: simplify trace_reclaim_flags and trace_shrink_flags
mm/Kconfig: update "Memory Model" help text
mm/vmscan.c: don't disable irq again when count pgrefill for memcg
mm: memblock: make keeping memblock memory opt-in rather than opt-out
hugetlbfs: always use address space in inode for resv_map pointer
mm/z3fold.c: support page migration
mm/z3fold.c: add structure for buddy handles
mm/z3fold.c: improve compression by extending search
mm/z3fold.c: introduce helper functions
mm/page_alloc.c: remove unnecessary parameter in rmqueue_pcplist
mm/hmm: add ARCH_HAS_HMM_MIRROR ARCH_HAS_HMM_DEVICE Kconfig
mm/vmscan.c: simplify shrink_inactive_list()
fs/sync.c: sync_file_range(2) may use WB_SYNC_ALL writeback
xen/privcmd-buf.c: convert to use vm_map_pages_zero()
xen/gntdev.c: convert to use vm_map_pages()
...
Diffstat (limited to 'fs')
-rw-r--r-- | fs/dax.c | 8 | ||||
-rw-r--r-- | fs/hugetlbfs/inode.c | 18 | ||||
-rw-r--r-- | fs/io_uring.c | 5 | ||||
-rw-r--r-- | fs/ocfs2/dir.c | 20 | ||||
-rw-r--r-- | fs/ocfs2/export.c | 30 | ||||
-rw-r--r-- | fs/ocfs2/ocfs2_fs.h | 28 | ||||
-rw-r--r-- | fs/orangefs/orangefs-bufmap.c | 2 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 3 | ||||
-rw-r--r-- | fs/sync.c | 21 | ||||
-rw-r--r-- | fs/userfaultfd.c | 5 |
10 files changed, 74 insertions, 66 deletions
@@ -814,7 +814,7 @@ static void dax_entry_mkclean(struct address_space *mapping, pgoff_t index, goto unlock_pmd; flush_cache_page(vma, address, pfn); - pmd = pmdp_huge_clear_flush(vma, address, pmdp); + pmd = pmdp_invalidate(vma, address, pmdp); pmd = pmd_wrprotect(pmd); pmd = pmd_mkclean(pmd); set_pmd_at(vma->vm_mm, address, pmdp, pmd); @@ -1575,8 +1575,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, } trace_dax_pmd_insert_mapping(inode, vmf, PMD_SIZE, pfn, entry); - result = vmf_insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn, - write); + result = vmf_insert_pfn_pmd(vmf, pfn, write); break; case IOMAP_UNWRITTEN: case IOMAP_HOLE: @@ -1686,8 +1685,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order) ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn); #ifdef CONFIG_FS_DAX_PMD else if (order == PMD_ORDER) - ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, - pfn, true); + ret = vmf_insert_pfn_pmd(vmf, pfn, FAULT_FLAG_WRITE); #endif else ret = VM_FAULT_FALLBACK; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index c74ef4426282..1dcc57189382 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -440,9 +440,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, u32 hash; index = page->index; - hash = hugetlb_fault_mutex_hash(h, current->mm, - &pseudo_vma, - mapping, index, 0); + hash = hugetlb_fault_mutex_hash(h, mapping, index, 0); mutex_lock(&hugetlb_fault_mutex_table[hash]); /* @@ -499,8 +497,15 @@ static void hugetlbfs_evict_inode(struct inode *inode) struct resv_map *resv_map; remove_inode_hugepages(inode, 0, LLONG_MAX); - resv_map = (struct resv_map *)inode->i_mapping->private_data; - /* root inode doesn't have the resv_map, so we should check it */ + + /* + * Get the resv_map from the address space embedded in the inode. + * This is the address space which points to any resv_map allocated + * at inode creation time. If this is a device special inode, + * i_mapping may not point to the original address space. + */ + resv_map = (struct resv_map *)(&inode->i_data)->private_data; + /* Only regular and link inodes have associated reserve maps */ if (resv_map) resv_map_release(&resv_map->refs); clear_inode(inode); @@ -639,8 +644,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, addr = index * hpage_size; /* mutex taken here, fault path and hole punch */ - hash = hugetlb_fault_mutex_hash(h, mm, &pseudo_vma, mapping, - index, addr); + hash = hugetlb_fault_mutex_hash(h, mapping, index, addr); mutex_lock(&hugetlb_fault_mutex_table[hash]); /* See if already present in mapping to avoid alloc/free */ diff --git a/fs/io_uring.c b/fs/io_uring.c index 48ea3977012a..fdc18321d70c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2697,8 +2697,9 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg, ret = 0; down_read(¤t->mm->mmap_sem); - pret = get_user_pages_longterm(ubuf, nr_pages, FOLL_WRITE, - pages, vmas); + pret = get_user_pages(ubuf, nr_pages, + FOLL_WRITE | FOLL_LONGTERM, + pages, vmas); if (pret == nr_pages) { /* don't support file backed memory */ for (j = 0; j < nr_pages; j++) { diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index c121abbdfc7d..85f21caaa6ec 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -69,10 +69,6 @@ #define NAMEI_RA_BLOCKS 4 #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) -static unsigned char ocfs2_filetype_table[] = { - DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK -}; - static int ocfs2_do_extend_dir(struct super_block *sb, handle_t *handle, struct inode *dir, @@ -1718,7 +1714,7 @@ int __ocfs2_add_entry(handle_t *handle, de->rec_len = cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len)); de = de1; } - de->file_type = OCFS2_FT_UNKNOWN; + de->file_type = FT_UNKNOWN; if (blkno) { de->inode = cpu_to_le64(blkno); ocfs2_set_de_type(de, inode->i_mode); @@ -1803,13 +1799,9 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode, } offset += le16_to_cpu(de->rec_len); if (le64_to_cpu(de->inode)) { - unsigned char d_type = DT_UNKNOWN; - - if (de->file_type < OCFS2_FT_MAX) - d_type = ocfs2_filetype_table[de->file_type]; - if (!dir_emit(ctx, de->name, de->name_len, - le64_to_cpu(de->inode), d_type)) + le64_to_cpu(de->inode), + fs_ftype_to_dtype(de->file_type))) goto out; } ctx->pos += le16_to_cpu(de->rec_len); @@ -1900,14 +1892,10 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, break; } if (le64_to_cpu(de->inode)) { - unsigned char d_type = DT_UNKNOWN; - - if (de->file_type < OCFS2_FT_MAX) - d_type = ocfs2_filetype_table[de->file_type]; if (!dir_emit(ctx, de->name, de->name_len, le64_to_cpu(de->inode), - d_type)) { + fs_ftype_to_dtype(de->file_type))) { brelse(bh); return 0; } diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 4bf8d5854b27..af2888d23de3 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -148,16 +148,24 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) u64 blkno; struct dentry *parent; struct inode *dir = d_inode(child); + int set; trace_ocfs2_get_parent(child, child->d_name.len, child->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno); + status = ocfs2_nfs_sync_lock(OCFS2_SB(dir->i_sb), 1); + if (status < 0) { + mlog(ML_ERROR, "getting nfs sync lock(EX) failed %d\n", status); + parent = ERR_PTR(status); + goto bail; + } + status = ocfs2_inode_lock(dir, NULL, 0); if (status < 0) { if (status != -ENOENT) mlog_errno(status); parent = ERR_PTR(status); - goto bail; + goto unlock_nfs_sync; } status = ocfs2_lookup_ino_from_name(dir, "..", 2, &blkno); @@ -166,11 +174,31 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) goto bail_unlock; } + status = ocfs2_test_inode_bit(OCFS2_SB(dir->i_sb), blkno, &set); + if (status < 0) { + if (status == -EINVAL) { + status = -ESTALE; + } else + mlog(ML_ERROR, "test inode bit failed %d\n", status); + parent = ERR_PTR(status); + goto bail_unlock; + } + + trace_ocfs2_get_dentry_test_bit(status, set); + if (!set) { + status = -ESTALE; + parent = ERR_PTR(status); + goto bail_unlock; + } + parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0)); bail_unlock: ocfs2_inode_unlock(dir, 0); +unlock_nfs_sync: + ocfs2_nfs_sync_unlock(OCFS2_SB(dir->i_sb), 1); + bail: trace_ocfs2_get_parent_end(parent); diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 7071ad0dec90..b86bf5e74348 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -392,21 +392,6 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { #define OCFS2_HB_GLOBAL "heartbeat=global" /* - * OCFS2 directory file types. Only the low 3 bits are used. The - * other bits are reserved for now. - */ -#define OCFS2_FT_UNKNOWN 0 -#define OCFS2_FT_REG_FILE 1 -#define OCFS2_FT_DIR 2 -#define OCFS2_FT_CHRDEV 3 -#define OCFS2_FT_BLKDEV 4 -#define OCFS2_FT_FIFO 5 -#define OCFS2_FT_SOCK 6 -#define OCFS2_FT_SYMLINK 7 - -#define OCFS2_FT_MAX 8 - -/* * OCFS2_DIR_PAD defines the directory entries boundaries * * NOTE: It must be a multiple of 4 @@ -424,17 +409,6 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { #define OCFS2_LINKS_HI_SHIFT 16 #define OCFS2_DX_ENTRIES_MAX (0xffffffffU) -#define S_SHIFT 12 -static unsigned char ocfs2_type_by_mode[S_IFMT >> S_SHIFT] = { - [S_IFREG >> S_SHIFT] = OCFS2_FT_REG_FILE, - [S_IFDIR >> S_SHIFT] = OCFS2_FT_DIR, - [S_IFCHR >> S_SHIFT] = OCFS2_FT_CHRDEV, - [S_IFBLK >> S_SHIFT] = OCFS2_FT_BLKDEV, - [S_IFIFO >> S_SHIFT] = OCFS2_FT_FIFO, - [S_IFSOCK >> S_SHIFT] = OCFS2_FT_SOCK, - [S_IFLNK >> S_SHIFT] = OCFS2_FT_SYMLINK, -}; - /* * Convenience casts @@ -1629,7 +1603,7 @@ static inline int ocfs2_sprintf_system_inode_name(char *buf, int len, static inline void ocfs2_set_de_type(struct ocfs2_dir_entry *de, umode_t mode) { - de->file_type = ocfs2_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; + de->file_type = fs_umode_to_ftype(mode); } static inline int ocfs2_gd_is_discontig(struct ocfs2_group_desc *gd) diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index d4811f981608..2bb916d68576 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -269,7 +269,7 @@ orangefs_bufmap_map(struct orangefs_bufmap *bufmap, /* map the pages */ ret = get_user_pages_fast((unsigned long)user_desc->ptr, - bufmap->page_count, 1, bufmap->page_array); + bufmap->page_count, FOLL_WRITE, bufmap->page_array); if (ret < 0) return ret; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 95ca1fe7283c..01d4eb0e6bd1 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1169,7 +1169,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, break; } - mmu_notifier_range_init(&range, mm, 0, -1UL); + mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY, + 0, NULL, mm, 0, -1UL); mmu_notifier_invalidate_range_start(&range); } walk_page_range(0, mm->highest_vm_end, &clear_refs_walk); diff --git a/fs/sync.c b/fs/sync.c index 01e82170545a..4d1ff010bc5a 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -292,8 +292,14 @@ int sync_file_range(struct file *file, loff_t offset, loff_t nbytes, } if (flags & SYNC_FILE_RANGE_WRITE) { + int sync_mode = WB_SYNC_NONE; + + if ((flags & SYNC_FILE_RANGE_WRITE_AND_WAIT) == + SYNC_FILE_RANGE_WRITE_AND_WAIT) + sync_mode = WB_SYNC_ALL; + ret = __filemap_fdatawrite_range(mapping, offset, endbyte, - WB_SYNC_NONE); + sync_mode); if (ret < 0) goto out; } @@ -306,9 +312,9 @@ out: } /* - * sys_sync_file_range() permits finely controlled syncing over a segment of + * ksys_sync_file_range() permits finely controlled syncing over a segment of * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is - * zero then sys_sync_file_range() will operate from offset out to EOF. + * zero then ksys_sync_file_range() will operate from offset out to EOF. * * The flag bits are: * @@ -325,7 +331,7 @@ out: * Useful combinations of the flag bits are: * * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE: ensures that all pages - * in the range which were dirty on entry to sys_sync_file_range() are placed + * in the range which were dirty on entry to ksys_sync_file_range() are placed * under writeout. This is a start-write-for-data-integrity operation. * * SYNC_FILE_RANGE_WRITE: start writeout of all dirty pages in the range which @@ -337,10 +343,13 @@ out: * earlier SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE operation to wait * for that operation to complete and to return the result. * - * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER: + * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER + * (a.k.a. SYNC_FILE_RANGE_WRITE_AND_WAIT): * a traditional sync() operation. This is a write-for-data-integrity operation * which will ensure that all pages in the range which were dirty on entry to - * sys_sync_file_range() are committed to disk. + * ksys_sync_file_range() are written to disk. It should be noted that disk + * caches are not flushed by this call, so there are no guarantees here that the + * data will be available on disk after a crash. * * * SYNC_FILE_RANGE_WAIT_BEFORE and SYNC_FILE_RANGE_WAIT_AFTER will detect any diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index f5de1e726356..3b30301c90ec 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -30,6 +30,8 @@ #include <linux/security.h> #include <linux/hugetlb.h> +int sysctl_unprivileged_userfaultfd __read_mostly = 1; + static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly; enum userfaultfd_state { @@ -1930,6 +1932,9 @@ SYSCALL_DEFINE1(userfaultfd, int, flags) struct userfaultfd_ctx *ctx; int fd; + if (!sysctl_unprivileged_userfaultfd && !capable(CAP_SYS_PTRACE)) + return -EPERM; + BUG_ON(!current->mm); /* Check the UFFD_* constants for consistency. */ |