summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-05-14 10:10:55 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-05-14 10:10:55 -0700
commit318222a35bfb0ae9b5ff3e359a583463e6cfcd94 (patch)
tree6a8d921f7ac9915f3f12dd3fcd7efaaf6f16bb09 /fs
parent7e9890a3500d95c01511a4c45b7e7192dfa47ae2 (diff)
parent640be2d1ffbc1946f1547eb89b5005ed7542de99 (diff)
downloadlinux-318222a35bfb0ae9b5ff3e359a583463e6cfcd94.tar.bz2
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton: - a few misc things and hotfixes - ocfs2 - almost all of MM * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (139 commits) kernel/memremap.c: remove the unused device_private_entry_fault() export mm: delete find_get_entries_tag mm/huge_memory.c: make __thp_get_unmapped_area static mm/mprotect.c: fix compilation warning because of unused 'mm' variable mm/page-writeback: introduce tracepoint for wait_on_page_writeback() mm/vmscan: simplify trace_reclaim_flags and trace_shrink_flags mm/Kconfig: update "Memory Model" help text mm/vmscan.c: don't disable irq again when count pgrefill for memcg mm: memblock: make keeping memblock memory opt-in rather than opt-out hugetlbfs: always use address space in inode for resv_map pointer mm/z3fold.c: support page migration mm/z3fold.c: add structure for buddy handles mm/z3fold.c: improve compression by extending search mm/z3fold.c: introduce helper functions mm/page_alloc.c: remove unnecessary parameter in rmqueue_pcplist mm/hmm: add ARCH_HAS_HMM_MIRROR ARCH_HAS_HMM_DEVICE Kconfig mm/vmscan.c: simplify shrink_inactive_list() fs/sync.c: sync_file_range(2) may use WB_SYNC_ALL writeback xen/privcmd-buf.c: convert to use vm_map_pages_zero() xen/gntdev.c: convert to use vm_map_pages() ...
Diffstat (limited to 'fs')
-rw-r--r--fs/dax.c8
-rw-r--r--fs/hugetlbfs/inode.c18
-rw-r--r--fs/io_uring.c5
-rw-r--r--fs/ocfs2/dir.c20
-rw-r--r--fs/ocfs2/export.c30
-rw-r--r--fs/ocfs2/ocfs2_fs.h28
-rw-r--r--fs/orangefs/orangefs-bufmap.c2
-rw-r--r--fs/proc/task_mmu.c3
-rw-r--r--fs/sync.c21
-rw-r--r--fs/userfaultfd.c5
10 files changed, 74 insertions, 66 deletions
diff --git a/fs/dax.c b/fs/dax.c
index e5e54da1715f..f74386293632 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -814,7 +814,7 @@ static void dax_entry_mkclean(struct address_space *mapping, pgoff_t index,
goto unlock_pmd;
flush_cache_page(vma, address, pfn);
- pmd = pmdp_huge_clear_flush(vma, address, pmdp);
+ pmd = pmdp_invalidate(vma, address, pmdp);
pmd = pmd_wrprotect(pmd);
pmd = pmd_mkclean(pmd);
set_pmd_at(vma->vm_mm, address, pmdp, pmd);
@@ -1575,8 +1575,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
}
trace_dax_pmd_insert_mapping(inode, vmf, PMD_SIZE, pfn, entry);
- result = vmf_insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn,
- write);
+ result = vmf_insert_pfn_pmd(vmf, pfn, write);
break;
case IOMAP_UNWRITTEN:
case IOMAP_HOLE:
@@ -1686,8 +1685,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
#ifdef CONFIG_FS_DAX_PMD
else if (order == PMD_ORDER)
- ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
- pfn, true);
+ ret = vmf_insert_pfn_pmd(vmf, pfn, FAULT_FLAG_WRITE);
#endif
else
ret = VM_FAULT_FALLBACK;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index c74ef4426282..1dcc57189382 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -440,9 +440,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
u32 hash;
index = page->index;
- hash = hugetlb_fault_mutex_hash(h, current->mm,
- &pseudo_vma,
- mapping, index, 0);
+ hash = hugetlb_fault_mutex_hash(h, mapping, index, 0);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
/*
@@ -499,8 +497,15 @@ static void hugetlbfs_evict_inode(struct inode *inode)
struct resv_map *resv_map;
remove_inode_hugepages(inode, 0, LLONG_MAX);
- resv_map = (struct resv_map *)inode->i_mapping->private_data;
- /* root inode doesn't have the resv_map, so we should check it */
+
+ /*
+ * Get the resv_map from the address space embedded in the inode.
+ * This is the address space which points to any resv_map allocated
+ * at inode creation time. If this is a device special inode,
+ * i_mapping may not point to the original address space.
+ */
+ resv_map = (struct resv_map *)(&inode->i_data)->private_data;
+ /* Only regular and link inodes have associated reserve maps */
if (resv_map)
resv_map_release(&resv_map->refs);
clear_inode(inode);
@@ -639,8 +644,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
addr = index * hpage_size;
/* mutex taken here, fault path and hole punch */
- hash = hugetlb_fault_mutex_hash(h, mm, &pseudo_vma, mapping,
- index, addr);
+ hash = hugetlb_fault_mutex_hash(h, mapping, index, addr);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
/* See if already present in mapping to avoid alloc/free */
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 48ea3977012a..fdc18321d70c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2697,8 +2697,9 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
ret = 0;
down_read(&current->mm->mmap_sem);
- pret = get_user_pages_longterm(ubuf, nr_pages, FOLL_WRITE,
- pages, vmas);
+ pret = get_user_pages(ubuf, nr_pages,
+ FOLL_WRITE | FOLL_LONGTERM,
+ pages, vmas);
if (pret == nr_pages) {
/* don't support file backed memory */
for (j = 0; j < nr_pages; j++) {
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index c121abbdfc7d..85f21caaa6ec 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -69,10 +69,6 @@
#define NAMEI_RA_BLOCKS 4
#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
-static unsigned char ocfs2_filetype_table[] = {
- DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
-};
-
static int ocfs2_do_extend_dir(struct super_block *sb,
handle_t *handle,
struct inode *dir,
@@ -1718,7 +1714,7 @@ int __ocfs2_add_entry(handle_t *handle,
de->rec_len = cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
de = de1;
}
- de->file_type = OCFS2_FT_UNKNOWN;
+ de->file_type = FT_UNKNOWN;
if (blkno) {
de->inode = cpu_to_le64(blkno);
ocfs2_set_de_type(de, inode->i_mode);
@@ -1803,13 +1799,9 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode,
}
offset += le16_to_cpu(de->rec_len);
if (le64_to_cpu(de->inode)) {
- unsigned char d_type = DT_UNKNOWN;
-
- if (de->file_type < OCFS2_FT_MAX)
- d_type = ocfs2_filetype_table[de->file_type];
-
if (!dir_emit(ctx, de->name, de->name_len,
- le64_to_cpu(de->inode), d_type))
+ le64_to_cpu(de->inode),
+ fs_ftype_to_dtype(de->file_type)))
goto out;
}
ctx->pos += le16_to_cpu(de->rec_len);
@@ -1900,14 +1892,10 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
break;
}
if (le64_to_cpu(de->inode)) {
- unsigned char d_type = DT_UNKNOWN;
-
- if (de->file_type < OCFS2_FT_MAX)
- d_type = ocfs2_filetype_table[de->file_type];
if (!dir_emit(ctx, de->name,
de->name_len,
le64_to_cpu(de->inode),
- d_type)) {
+ fs_ftype_to_dtype(de->file_type))) {
brelse(bh);
return 0;
}
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 4bf8d5854b27..af2888d23de3 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -148,16 +148,24 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
u64 blkno;
struct dentry *parent;
struct inode *dir = d_inode(child);
+ int set;
trace_ocfs2_get_parent(child, child->d_name.len, child->d_name.name,
(unsigned long long)OCFS2_I(dir)->ip_blkno);
+ status = ocfs2_nfs_sync_lock(OCFS2_SB(dir->i_sb), 1);
+ if (status < 0) {
+ mlog(ML_ERROR, "getting nfs sync lock(EX) failed %d\n", status);
+ parent = ERR_PTR(status);
+ goto bail;
+ }
+
status = ocfs2_inode_lock(dir, NULL, 0);
if (status < 0) {
if (status != -ENOENT)
mlog_errno(status);
parent = ERR_PTR(status);
- goto bail;
+ goto unlock_nfs_sync;
}
status = ocfs2_lookup_ino_from_name(dir, "..", 2, &blkno);
@@ -166,11 +174,31 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
goto bail_unlock;
}
+ status = ocfs2_test_inode_bit(OCFS2_SB(dir->i_sb), blkno, &set);
+ if (status < 0) {
+ if (status == -EINVAL) {
+ status = -ESTALE;
+ } else
+ mlog(ML_ERROR, "test inode bit failed %d\n", status);
+ parent = ERR_PTR(status);
+ goto bail_unlock;
+ }
+
+ trace_ocfs2_get_dentry_test_bit(status, set);
+ if (!set) {
+ status = -ESTALE;
+ parent = ERR_PTR(status);
+ goto bail_unlock;
+ }
+
parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0));
bail_unlock:
ocfs2_inode_unlock(dir, 0);
+unlock_nfs_sync:
+ ocfs2_nfs_sync_unlock(OCFS2_SB(dir->i_sb), 1);
+
bail:
trace_ocfs2_get_parent_end(parent);
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 7071ad0dec90..b86bf5e74348 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -392,21 +392,6 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
#define OCFS2_HB_GLOBAL "heartbeat=global"
/*
- * OCFS2 directory file types. Only the low 3 bits are used. The
- * other bits are reserved for now.
- */
-#define OCFS2_FT_UNKNOWN 0
-#define OCFS2_FT_REG_FILE 1
-#define OCFS2_FT_DIR 2
-#define OCFS2_FT_CHRDEV 3
-#define OCFS2_FT_BLKDEV 4
-#define OCFS2_FT_FIFO 5
-#define OCFS2_FT_SOCK 6
-#define OCFS2_FT_SYMLINK 7
-
-#define OCFS2_FT_MAX 8
-
-/*
* OCFS2_DIR_PAD defines the directory entries boundaries
*
* NOTE: It must be a multiple of 4
@@ -424,17 +409,6 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
#define OCFS2_LINKS_HI_SHIFT 16
#define OCFS2_DX_ENTRIES_MAX (0xffffffffU)
-#define S_SHIFT 12
-static unsigned char ocfs2_type_by_mode[S_IFMT >> S_SHIFT] = {
- [S_IFREG >> S_SHIFT] = OCFS2_FT_REG_FILE,
- [S_IFDIR >> S_SHIFT] = OCFS2_FT_DIR,
- [S_IFCHR >> S_SHIFT] = OCFS2_FT_CHRDEV,
- [S_IFBLK >> S_SHIFT] = OCFS2_FT_BLKDEV,
- [S_IFIFO >> S_SHIFT] = OCFS2_FT_FIFO,
- [S_IFSOCK >> S_SHIFT] = OCFS2_FT_SOCK,
- [S_IFLNK >> S_SHIFT] = OCFS2_FT_SYMLINK,
-};
-
/*
* Convenience casts
@@ -1629,7 +1603,7 @@ static inline int ocfs2_sprintf_system_inode_name(char *buf, int len,
static inline void ocfs2_set_de_type(struct ocfs2_dir_entry *de,
umode_t mode)
{
- de->file_type = ocfs2_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
+ de->file_type = fs_umode_to_ftype(mode);
}
static inline int ocfs2_gd_is_discontig(struct ocfs2_group_desc *gd)
diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c
index d4811f981608..2bb916d68576 100644
--- a/fs/orangefs/orangefs-bufmap.c
+++ b/fs/orangefs/orangefs-bufmap.c
@@ -269,7 +269,7 @@ orangefs_bufmap_map(struct orangefs_bufmap *bufmap,
/* map the pages */
ret = get_user_pages_fast((unsigned long)user_desc->ptr,
- bufmap->page_count, 1, bufmap->page_array);
+ bufmap->page_count, FOLL_WRITE, bufmap->page_array);
if (ret < 0)
return ret;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 95ca1fe7283c..01d4eb0e6bd1 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1169,7 +1169,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
break;
}
- mmu_notifier_range_init(&range, mm, 0, -1UL);
+ mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY,
+ 0, NULL, mm, 0, -1UL);
mmu_notifier_invalidate_range_start(&range);
}
walk_page_range(0, mm->highest_vm_end, &clear_refs_walk);
diff --git a/fs/sync.c b/fs/sync.c
index 01e82170545a..4d1ff010bc5a 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -292,8 +292,14 @@ int sync_file_range(struct file *file, loff_t offset, loff_t nbytes,
}
if (flags & SYNC_FILE_RANGE_WRITE) {
+ int sync_mode = WB_SYNC_NONE;
+
+ if ((flags & SYNC_FILE_RANGE_WRITE_AND_WAIT) ==
+ SYNC_FILE_RANGE_WRITE_AND_WAIT)
+ sync_mode = WB_SYNC_ALL;
+
ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
- WB_SYNC_NONE);
+ sync_mode);
if (ret < 0)
goto out;
}
@@ -306,9 +312,9 @@ out:
}
/*
- * sys_sync_file_range() permits finely controlled syncing over a segment of
+ * ksys_sync_file_range() permits finely controlled syncing over a segment of
* a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is
- * zero then sys_sync_file_range() will operate from offset out to EOF.
+ * zero then ksys_sync_file_range() will operate from offset out to EOF.
*
* The flag bits are:
*
@@ -325,7 +331,7 @@ out:
* Useful combinations of the flag bits are:
*
* SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE: ensures that all pages
- * in the range which were dirty on entry to sys_sync_file_range() are placed
+ * in the range which were dirty on entry to ksys_sync_file_range() are placed
* under writeout. This is a start-write-for-data-integrity operation.
*
* SYNC_FILE_RANGE_WRITE: start writeout of all dirty pages in the range which
@@ -337,10 +343,13 @@ out:
* earlier SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE operation to wait
* for that operation to complete and to return the result.
*
- * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER:
+ * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER
+ * (a.k.a. SYNC_FILE_RANGE_WRITE_AND_WAIT):
* a traditional sync() operation. This is a write-for-data-integrity operation
* which will ensure that all pages in the range which were dirty on entry to
- * sys_sync_file_range() are committed to disk.
+ * ksys_sync_file_range() are written to disk. It should be noted that disk
+ * caches are not flushed by this call, so there are no guarantees here that the
+ * data will be available on disk after a crash.
*
*
* SYNC_FILE_RANGE_WAIT_BEFORE and SYNC_FILE_RANGE_WAIT_AFTER will detect any
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index f5de1e726356..3b30301c90ec 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -30,6 +30,8 @@
#include <linux/security.h>
#include <linux/hugetlb.h>
+int sysctl_unprivileged_userfaultfd __read_mostly = 1;
+
static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;
enum userfaultfd_state {
@@ -1930,6 +1932,9 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
struct userfaultfd_ctx *ctx;
int fd;
+ if (!sysctl_unprivileged_userfaultfd && !capable(CAP_SYS_PTRACE))
+ return -EPERM;
+
BUG_ON(!current->mm);
/* Check the UFFD_* constants for consistency. */