summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c55
-rw-r--r--fs/binfmt_elf_fdpic.c2
-rw-r--r--fs/coda/upcall.c3
-rw-r--r--fs/compat_ioctl.c123
-rw-r--r--fs/coredump.c7
-rw-r--r--fs/cramfs/Kconfig39
-rw-r--r--fs/cramfs/README31
-rw-r--r--fs/cramfs/inode.c511
-rw-r--r--fs/ecryptfs/crypto.c44
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h9
-rw-r--r--fs/ecryptfs/inode.c4
-rw-r--r--fs/ecryptfs/keystore.c48
-rw-r--r--fs/ecryptfs/main.c4
-rw-r--r--fs/ecryptfs/messaging.c13
-rw-r--r--fs/ecryptfs/miscdev.c8
-rw-r--r--fs/ecryptfs/mmap.c2
-rw-r--r--fs/eventpoll.c4
-rw-r--r--fs/ext4/ioctl.c86
-rw-r--r--fs/fcntl.c16
-rw-r--r--fs/fhandle.c4
-rw-r--r--fs/file.c12
-rw-r--r--fs/internal.h1
-rw-r--r--fs/iomap.c24
-rw-r--r--fs/namei.c14
-rw-r--r--fs/nfs/cache_lib.c6
-rw-r--r--fs/nfs/cache_lib.h2
-rw-r--r--fs/nfs/callback.c14
-rw-r--r--fs/nfs/callback_proc.c2
-rw-r--r--fs/nfs/client.c10
-rw-r--r--fs/nfs/delegation.c27
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c50
-rw-r--r--fs/nfs/file.c18
-rw-r--r--fs/nfs/filelayout/filelayout.c12
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c20
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.h3
-rw-r--r--fs/nfs/inode.c16
-rw-r--r--fs/nfs/nfs3proc.c17
-rw-r--r--fs/nfs/nfs4_fs.h12
-rw-r--r--fs/nfs/nfs4client.c12
-rw-r--r--fs/nfs/nfs4proc.c511
-rw-r--r--fs/nfs/nfs4state.c53
-rw-r--r--fs/nfs/nfs4trace.h26
-rw-r--r--fs/nfs/nfs4xdr.c12
-rw-r--r--fs/nfs/pnfs.c44
-rw-r--r--fs/nfs/pnfs.h15
-rw-r--r--fs/nfs/pnfs_nfs.c10
-rw-r--r--fs/nfs/super.c14
-rw-r--r--fs/nfs/write.c17
-rw-r--r--fs/orangefs/orangefs-kernel.h6
-rw-r--r--fs/overlayfs/copy_up.c8
-rw-r--r--fs/overlayfs/dir.c25
-rw-r--r--fs/overlayfs/inode.c63
-rw-r--r--fs/overlayfs/namei.c59
-rw-r--r--fs/overlayfs/overlayfs.h13
-rw-r--r--fs/overlayfs/ovl_entry.h14
-rw-r--r--fs/overlayfs/readdir.c55
-rw-r--r--fs/overlayfs/super.c688
-rw-r--r--fs/overlayfs/util.c21
-rw-r--r--fs/proc/cpuinfo.c6
-rw-r--r--fs/pstore/platform.c2
-rw-r--r--fs/read_write.c21
-rw-r--r--fs/select.c68
-rw-r--r--fs/signalfd.c4
-rw-r--r--fs/statfs.c2
-rw-r--r--fs/super.c46
-rw-r--r--fs/xfs/libxfs/xfs_iext_tree.c2
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h4
-rw-r--r--fs/xfs/xfs_inode.c1
69 files changed, 1886 insertions, 1210 deletions
diff --git a/fs/aio.c b/fs/aio.c
index e6de7715228c..a062d75109cb 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1297,20 +1297,10 @@ static bool aio_read_events(struct kioctx *ctx, long min_nr, long nr,
static long read_events(struct kioctx *ctx, long min_nr, long nr,
struct io_event __user *event,
- struct timespec __user *timeout)
+ ktime_t until)
{
- ktime_t until = KTIME_MAX;
long ret = 0;
- if (timeout) {
- struct timespec ts;
-
- if (unlikely(copy_from_user(&ts, timeout, sizeof(ts))))
- return -EFAULT;
-
- until = timespec_to_ktime(ts);
- }
-
/*
* Note that aio_read_events() is being called as the conditional - i.e.
* we're calling it after prepare_to_wait() has set task state to
@@ -1826,6 +1816,25 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
return ret;
}
+static long do_io_getevents(aio_context_t ctx_id,
+ long min_nr,
+ long nr,
+ struct io_event __user *events,
+ struct timespec64 *ts)
+{
+ ktime_t until = ts ? timespec64_to_ktime(*ts) : KTIME_MAX;
+ struct kioctx *ioctx = lookup_ioctx(ctx_id);
+ long ret = -EINVAL;
+
+ if (likely(ioctx)) {
+ if (likely(min_nr <= nr && min_nr >= 0))
+ ret = read_events(ioctx, min_nr, nr, events, until);
+ percpu_ref_put(&ioctx->users);
+ }
+
+ return ret;
+}
+
/* io_getevents:
* Attempts to read at least min_nr events and up to nr events from
* the completion queue for the aio_context specified by ctx_id. If
@@ -1844,15 +1853,14 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
struct io_event __user *, events,
struct timespec __user *, timeout)
{
- struct kioctx *ioctx = lookup_ioctx(ctx_id);
- long ret = -EINVAL;
+ struct timespec64 ts;
- if (likely(ioctx)) {
- if (likely(min_nr <= nr && min_nr >= 0))
- ret = read_events(ioctx, min_nr, nr, events, timeout);
- percpu_ref_put(&ioctx->users);
+ if (timeout) {
+ if (unlikely(get_timespec64(&ts, timeout)))
+ return -EFAULT;
}
- return ret;
+
+ return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
}
#ifdef CONFIG_COMPAT
@@ -1862,17 +1870,14 @@ COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
struct io_event __user *, events,
struct compat_timespec __user *, timeout)
{
- struct timespec t;
- struct timespec __user *ut = NULL;
+ struct timespec64 t;
if (timeout) {
- if (compat_get_timespec(&t, timeout))
+ if (compat_get_timespec64(&t, timeout))
return -EFAULT;
- ut = compat_alloc_user_space(sizeof(*ut));
- if (copy_to_user(ut, &t, sizeof(t)))
- return -EFAULT;
}
- return sys_io_getevents(ctx_id, min_nr, nr, events, ut);
+
+ return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
}
#endif
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 5429b035e249..429326b6e2e7 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1498,7 +1498,9 @@ static bool elf_fdpic_dump_segments(struct coredump_params *cprm)
struct vm_area_struct *vma;
for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
+#ifdef CONFIG_MMU
unsigned long addr;
+#endif
if (!maydump(vma, cprm->mm_flags))
continue;
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index a37f003530d7..1175a1722411 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -447,8 +447,7 @@ int venus_fsync(struct super_block *sb, struct CodaFid *fid)
UPARG(CODA_FSYNC);
inp->coda_fsync.VFid = *fid;
- error = coda_upcall(coda_vcp(sb), sizeof(union inputArgs),
- &outsize, inp);
+ error = coda_upcall(coda_vcp(sb), insize, &outsize, inp);
CODA_FREE(inp, insize);
return error;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index bd5d91e119ca..5fc5dc660600 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -54,8 +54,6 @@
#include <linux/if_tun.h>
#include <linux/ctype.h>
#include <linux/syscalls.h>
-#include <linux/i2c.h>
-#include <linux/i2c-dev.h>
#include <linux/atalk.h>
#include <linux/gfp.h>
#include <linux/cec.h>
@@ -137,22 +135,6 @@ static int do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return vfs_ioctl(file, cmd, arg);
}
-static int w_long(struct file *file,
- unsigned int cmd, compat_ulong_t __user *argp)
-{
- int err;
- unsigned long __user *valp = compat_alloc_user_space(sizeof(*valp));
-
- if (valp == NULL)
- return -EFAULT;
- err = do_ioctl(file, cmd, (unsigned long)valp);
- if (err)
- return err;
- if (convert_in_user(valp, argp))
- return -EFAULT;
- return 0;
-}
-
struct compat_video_event {
int32_t type;
compat_time_t timestamp;
@@ -671,96 +653,6 @@ static int serial_struct_ioctl(struct file *file,
return err;
}
-/*
- * I2C layer ioctls
- */
-
-struct i2c_msg32 {
- u16 addr;
- u16 flags;
- u16 len;
- compat_caddr_t buf;
-};
-
-struct i2c_rdwr_ioctl_data32 {
- compat_caddr_t msgs; /* struct i2c_msg __user *msgs */
- u32 nmsgs;
-};
-
-struct i2c_smbus_ioctl_data32 {
- u8 read_write;
- u8 command;
- u32 size;
- compat_caddr_t data; /* union i2c_smbus_data *data */
-};
-
-struct i2c_rdwr_aligned {
- struct i2c_rdwr_ioctl_data cmd;
- struct i2c_msg msgs[0];
-};
-
-static int do_i2c_rdwr_ioctl(struct file *file,
- unsigned int cmd, struct i2c_rdwr_ioctl_data32 __user *udata)
-{
- struct i2c_rdwr_aligned __user *tdata;
- struct i2c_msg __user *tmsgs;
- struct i2c_msg32 __user *umsgs;
- compat_caddr_t datap;
- u32 nmsgs;
- int i;
-
- if (get_user(nmsgs, &udata->nmsgs))
- return -EFAULT;
- if (nmsgs > I2C_RDWR_IOCTL_MAX_MSGS)
- return -EINVAL;
-
- if (get_user(datap, &udata->msgs))
- return -EFAULT;
- umsgs = compat_ptr(datap);
-
- tdata = compat_alloc_user_space(sizeof(*tdata) +
- nmsgs * sizeof(struct i2c_msg));
- tmsgs = &tdata->msgs[0];
-
- if (put_user(nmsgs, &tdata->cmd.nmsgs) ||
- put_user(tmsgs, &tdata->cmd.msgs))
- return -EFAULT;
-
- for (i = 0; i < nmsgs; i++) {
- if (copy_in_user(&tmsgs[i].addr, &umsgs[i].addr, 3*sizeof(u16)))
- return -EFAULT;
- if (get_user(datap, &umsgs[i].buf) ||
- put_user(compat_ptr(datap), &tmsgs[i].buf))
- return -EFAULT;
- }
- return do_ioctl(file, cmd, (unsigned long)tdata);
-}
-
-static int do_i2c_smbus_ioctl(struct file *file,
- unsigned int cmd, struct i2c_smbus_ioctl_data32 __user *udata)
-{
- struct i2c_smbus_ioctl_data __user *tdata;
- union {
- /* beginnings of those have identical layouts */
- struct i2c_smbus_ioctl_data32 data32;
- struct i2c_smbus_ioctl_data data;
- } v;
-
- tdata = compat_alloc_user_space(sizeof(*tdata));
- if (tdata == NULL)
- return -ENOMEM;
-
- memset(&v, 0, sizeof(v));
- if (copy_from_user(&v.data32, udata, sizeof(v.data32)))
- return -EFAULT;
- v.data.data = compat_ptr(v.data32.data);
-
- if (copy_to_user(tdata, &v.data, sizeof(v.data)))
- return -EFAULT;
-
- return do_ioctl(file, cmd, (unsigned long)tdata);
-}
-
#define RTC_IRQP_READ32 _IOR('p', 0x0b, compat_ulong_t)
#define RTC_IRQP_SET32 _IOW('p', 0x0c, compat_ulong_t)
#define RTC_EPOCH_READ32 _IOR('p', 0x0d, compat_ulong_t)
@@ -1283,13 +1175,6 @@ COMPATIBLE_IOCTL(PCIIOC_CONTROLLER)
COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_IO)
COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_MEM)
COMPATIBLE_IOCTL(PCIIOC_WRITE_COMBINE)
-/* i2c */
-COMPATIBLE_IOCTL(I2C_SLAVE)
-COMPATIBLE_IOCTL(I2C_SLAVE_FORCE)
-COMPATIBLE_IOCTL(I2C_TENBIT)
-COMPATIBLE_IOCTL(I2C_PEC)
-COMPATIBLE_IOCTL(I2C_RETRIES)
-COMPATIBLE_IOCTL(I2C_TIMEOUT)
/* hiddev */
COMPATIBLE_IOCTL(HIDIOCGVERSION)
COMPATIBLE_IOCTL(HIDIOCAPPLICATION)
@@ -1464,13 +1349,6 @@ static long do_ioctl_trans(unsigned int cmd,
case TIOCGSERIAL:
case TIOCSSERIAL:
return serial_struct_ioctl(file, cmd, argp);
- /* i2c */
- case I2C_FUNCS:
- return w_long(file, cmd, argp);
- case I2C_RDWR:
- return do_i2c_rdwr_ioctl(file, cmd, argp);
- case I2C_SMBUS:
- return do_i2c_smbus_ioctl(file, cmd, argp);
/* Not implemented in the native kernel */
case RTC_IRQP_READ32:
case RTC_IRQP_SET32:
@@ -1580,6 +1458,7 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd,
case FICLONE:
case FICLONERANGE:
case FIDEDUPERANGE:
+ case FS_IOC_FIEMAP:
goto do_ioctl;
case FIBMAP:
diff --git a/fs/coredump.c b/fs/coredump.c
index 52c63d6c9143..1e2c87acac9b 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -680,16 +680,11 @@ void do_coredump(const siginfo_t *siginfo)
* privs and don't want to unlink another user's coredump.
*/
if (!need_suid_safe) {
- mm_segment_t old_fs;
-
- old_fs = get_fs();
- set_fs(KERNEL_DS);
/*
* If it doesn't exist, that's fine. If there's some
* other problem, we'll catch it at the filp_open().
*/
- (void) sys_unlink((const char __user *)cn.corename);
- set_fs(old_fs);
+ do_unlinkat(AT_FDCWD, getname_kernel(cn.corename));
}
/*
diff --git a/fs/cramfs/Kconfig b/fs/cramfs/Kconfig
index 11b29d491b7c..f937082f3244 100644
--- a/fs/cramfs/Kconfig
+++ b/fs/cramfs/Kconfig
@@ -1,6 +1,5 @@
config CRAMFS
- tristate "Compressed ROM file system support (cramfs) (OBSOLETE)"
- depends on BLOCK
+ tristate "Compressed ROM file system support (cramfs)"
select ZLIB_INFLATE
help
Saying Y here includes support for CramFs (Compressed ROM File
@@ -16,7 +15,39 @@ config CRAMFS
cramfs. Note that the root file system (the one containing the
directory /) cannot be compiled as a module.
- This filesystem is obsoleted by SquashFS, which is much better
- in terms of performance and features.
+ This filesystem is limited in capabilities and performance on
+ purpose to remain small and low on RAM usage. It is most suitable
+ for small embedded systems. If you have ample RAM to spare, you may
+ consider a more capable compressed filesystem such as SquashFS
+ which is much better in terms of performance and features.
+
+ If unsure, say N.
+
+config CRAMFS_BLOCKDEV
+ bool "Support CramFs image over a regular block device" if EXPERT
+ depends on CRAMFS && BLOCK
+ default y
+ help
+ This option allows the CramFs driver to load data from a regular
+ block device such a disk partition or a ramdisk.
+
+config CRAMFS_MTD
+ bool "Support CramFs image directly mapped in physical memory"
+ depends on CRAMFS && MTD
+ default y if !CRAMFS_BLOCKDEV
+ help
+ This option allows the CramFs driver to load data directly from
+ a linear adressed memory range (usually non volatile memory
+ like flash) instead of going through the block device layer.
+ This saves some memory since no intermediate buffering is
+ necessary.
+
+ The location of the CramFs image is determined by a
+ MTD device capable of direct memory mapping e.g. from
+ the 'physmap' map driver or a resulting MTD partition.
+ For example, this would mount the cramfs image stored in
+ the MTD partition named "xip_fs" on the /mnt mountpoint:
+
+ mount -t cramfs mtd:xip_fs /mnt
If unsure, say N.
diff --git a/fs/cramfs/README b/fs/cramfs/README
index 9d4e7ea311f4..d71b27e0ff15 100644
--- a/fs/cramfs/README
+++ b/fs/cramfs/README
@@ -49,17 +49,46 @@ same as the start of the (i+1)'th <block> if there is one). The first
<block> immediately follows the last <block_pointer> for the file.
<block_pointer>s are each 32 bits long.
+When the CRAMFS_FLAG_EXT_BLOCK_POINTERS capability bit is set, each
+<block_pointer>'s top bits may contain special flags as follows:
+
+CRAMFS_BLK_FLAG_UNCOMPRESSED (bit 31):
+ The block data is not compressed and should be copied verbatim.
+
+CRAMFS_BLK_FLAG_DIRECT_PTR (bit 30):
+ The <block_pointer> stores the actual block start offset and not
+ its end, shifted right by 2 bits. The block must therefore be
+ aligned to a 4-byte boundary. The block size is either blksize
+ if CRAMFS_BLK_FLAG_UNCOMPRESSED is also specified, otherwise
+ the compressed data length is included in the first 2 bytes of
+ the block data. This is used to allow discontiguous data layout
+ and specific data block alignments e.g. for XIP applications.
+
+
The order of <file_data>'s is a depth-first descent of the directory
tree, i.e. the same order as `find -size +0 \( -type f -o -type l \)
-print'.
<block>: The i'th <block> is the output of zlib's compress function
-applied to the i'th blksize-sized chunk of the input data.
+applied to the i'th blksize-sized chunk of the input data if the
+corresponding CRAMFS_BLK_FLAG_UNCOMPRESSED <block_ptr> bit is not set,
+otherwise it is the input data directly.
(For the last <block> of the file, the input may of course be smaller.)
Each <block> may be a different size. (See <block_pointer> above.)
+
<block>s are merely byte-aligned, not generally u32-aligned.
+When CRAMFS_BLK_FLAG_DIRECT_PTR is specified then the corresponding
+<block> may be located anywhere and not necessarily contiguous with
+the previous/next blocks. In that case it is minimally u32-aligned.
+If CRAMFS_BLK_FLAG_UNCOMPRESSED is also specified then the size is always
+blksize except for the last block which is limited by the file length.
+If CRAMFS_BLK_FLAG_DIRECT_PTR is set and CRAMFS_BLK_FLAG_UNCOMPRESSED
+is not set then the first 2 bytes of the block contains the size of the
+remaining block data as this cannot be determined from the placement of
+logically adjacent blocks.
+
Holes
-----
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 7919967488cb..9a2ab419ba62 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -15,10 +15,15 @@
#include <linux/module.h>
#include <linux/fs.h>
+#include <linux/file.h>
#include <linux/pagemap.h>
+#include <linux/pfn_t.h>
+#include <linux/ramfs.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/blkdev.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/super.h>
#include <linux/slab.h>
#include <linux/vfs.h>
#include <linux/mutex.h>
@@ -36,6 +41,9 @@ struct cramfs_sb_info {
unsigned long blocks;
unsigned long files;
unsigned long flags;
+ void *linear_virt_addr;
+ resource_size_t linear_phys_addr;
+ size_t mtd_point_size;
};
static inline struct cramfs_sb_info *CRAMFS_SB(struct super_block *sb)
@@ -46,6 +54,7 @@ static inline struct cramfs_sb_info *CRAMFS_SB(struct super_block *sb)
static const struct super_operations cramfs_ops;
static const struct inode_operations cramfs_dir_inode_operations;
static const struct file_operations cramfs_directory_operations;
+static const struct file_operations cramfs_physmem_fops;
static const struct address_space_operations cramfs_aops;
static DEFINE_MUTEX(read_mutex);
@@ -93,6 +102,10 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
case S_IFREG:
inode->i_fop = &generic_ro_fops;
inode->i_data.a_ops = &cramfs_aops;
+ if (IS_ENABLED(CONFIG_CRAMFS_MTD) &&
+ CRAMFS_SB(sb)->flags & CRAMFS_FLAG_EXT_BLOCK_POINTERS &&
+ CRAMFS_SB(sb)->linear_phys_addr)
+ inode->i_fop = &cramfs_physmem_fops;
break;
case S_IFDIR:
inode->i_op = &cramfs_dir_inode_operations;
@@ -140,6 +153,9 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
* BLKS_PER_BUF*PAGE_SIZE, so that the caller doesn't need to
* worry about end-of-buffer issues even when decompressing a full
* page cache.
+ *
+ * Note: This is all optimized away at compile time when
+ * CONFIG_CRAMFS_BLOCKDEV=n.
*/
#define READ_BUFFERS (2)
/* NEXT_BUFFER(): Loop over [0..(READ_BUFFERS-1)]. */
@@ -160,10 +176,10 @@ static struct super_block *buffer_dev[READ_BUFFERS];
static int next_buffer;
/*
- * Returns a pointer to a buffer containing at least LEN bytes of
- * filesystem starting at byte offset OFFSET into the filesystem.
+ * Populate our block cache and return a pointer to it.
*/
-static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned int len)
+static void *cramfs_blkdev_read(struct super_block *sb, unsigned int offset,
+ unsigned int len)
{
struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
struct page *pages[BLKS_PER_BUF];
@@ -239,11 +255,250 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
return read_buffers[buffer] + offset;
}
+/*
+ * Return a pointer to the linearly addressed cramfs image in memory.
+ */
+static void *cramfs_direct_read(struct super_block *sb, unsigned int offset,
+ unsigned int len)
+{
+ struct cramfs_sb_info *sbi = CRAMFS_SB(sb);
+
+ if (!len)
+ return NULL;
+ if (len > sbi->size || offset > sbi->size - len)
+ return page_address(ZERO_PAGE(0));
+ return sbi->linear_virt_addr + offset;
+}
+
+/*
+ * Returns a pointer to a buffer containing at least LEN bytes of
+ * filesystem starting at byte offset OFFSET into the filesystem.
+ */
+static void *cramfs_read(struct super_block *sb, unsigned int offset,
+ unsigned int len)
+{
+ struct cramfs_sb_info *sbi = CRAMFS_SB(sb);
+
+ if (IS_ENABLED(CONFIG_CRAMFS_MTD) && sbi->linear_virt_addr)
+ return cramfs_direct_read(sb, offset, len);
+ else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV))
+ return cramfs_blkdev_read(sb, offset, len);
+ else
+ return NULL;
+}
+
+/*
+ * For a mapping to be possible, we need a range of uncompressed and
+ * contiguous blocks. Return the offset for the first block and number of
+ * valid blocks for which that is true, or zero otherwise.
+ */
+static u32 cramfs_get_block_range(struct inode *inode, u32 pgoff, u32 *pages)
+{
+ struct cramfs_sb_info *sbi = CRAMFS_SB(inode->i_sb);
+ int i;
+ u32 *blockptrs, first_block_addr;
+
+ /*
+ * We can dereference memory directly here as this code may be
+ * reached only when there is a direct filesystem image mapping
+ * available in memory.
+ */
+ blockptrs = (u32 *)(sbi->linear_virt_addr + OFFSET(inode) + pgoff * 4);
+ first_block_addr = blockptrs[0] & ~CRAMFS_BLK_FLAGS;
+ i = 0;
+ do {
+ u32 block_off = i * (PAGE_SIZE >> CRAMFS_BLK_DIRECT_PTR_SHIFT);
+ u32 expect = (first_block_addr + block_off) |
+ CRAMFS_BLK_FLAG_DIRECT_PTR |
+ CRAMFS_BLK_FLAG_UNCOMPRESSED;
+ if (blockptrs[i] != expect) {
+ pr_debug("range: block %d/%d got %#x expects %#x\n",
+ pgoff+i, pgoff + *pages - 1,
+ blockptrs[i], expect);
+ if (i == 0)
+ return 0;
+ break;
+ }
+ } while (++i < *pages);
+
+ *pages = i;
+ return first_block_addr << CRAMFS_BLK_DIRECT_PTR_SHIFT;
+}
+
+#ifdef CONFIG_MMU
+
+/*
+ * Return true if the last page of a file in the filesystem image contains
+ * some other data that doesn't belong to that file. It is assumed that the
+ * last block is CRAMFS_BLK_FLAG_DIRECT_PTR | CRAMFS_BLK_FLAG_UNCOMPRESSED
+ * (verified by cramfs_get_block_range() and directly accessible in memory.
+ */
+static bool cramfs_last_page_is_shared(struct inode *inode)
+{
+ struct cramfs_sb_info *sbi = CRAMFS_SB(inode->i_sb);
+ u32 partial, last_page, blockaddr, *blockptrs;
+ char *tail_data;
+
+ partial = offset_in_page(inode->i_size);
+ if (!partial)
+ return false;
+ last_page = inode->i_size >> PAGE_SHIFT;
+ blockptrs = (u32 *)(sbi->linear_virt_addr + OFFSET(inode));
+ blockaddr = blockptrs[last_page] & ~CRAMFS_BLK_FLAGS;
+ blockaddr <<= CRAMFS_BLK_DIRECT_PTR_SHIFT;
+ tail_data = sbi->linear_virt_addr + blockaddr + partial;
+ return memchr_inv(tail_data, 0, PAGE_SIZE - partial) ? true : false;
+}
+
+static int cramfs_physmem_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct inode *inode = file_inode(file);
+ struct cramfs_sb_info *sbi = CRAMFS_SB(inode->i_sb);
+ unsigned int pages, max_pages, offset;
+ unsigned long address, pgoff = vma->vm_pgoff;
+ char *bailout_reason;
+ int ret;
+
+ ret = generic_file_readonly_mmap(file, vma);
+ if (ret)
+ return ret;
+
+ /*
+ * Now try to pre-populate ptes for this vma with a direct
+ * mapping avoiding memory allocation when possible.
+ */
+
+ /* Could COW work here? */
+ bailout_reason = "vma is writable";
+ if (vma->vm_flags & VM_WRITE)
+ goto bailout;
+
+ max_pages = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ bailout_reason = "beyond file limit";
+ if (pgoff >= max_pages)
+ goto bailout;
+ pages = min(vma_pages(vma), max_pages - pgoff);
+
+ offset = cramfs_get_block_range(inode, pgoff, &pages);
+ bailout_reason = "unsuitable block layout";
+ if (!offset)
+ goto bailout;
+ address = sbi->linear_phys_addr + offset;
+ bailout_reason = "data is not page aligned";
+ if (!PAGE_ALIGNED(address))
+ goto bailout;
+
+ /* Don't map the last page if it contains some other data */
+ if (pgoff + pages == max_pages && cramfs_last_page_is_shared(inode)) {
+ pr_debug("mmap: %s: last page is shared\n",
+ file_dentry(file)->d_name.name);
+ pages--;
+ }
+
+ if (!pages) {
+ bailout_reason = "no suitable block remaining";
+ goto bailout;
+ }
+
+ if (pages == vma_pages(vma)) {
+ /*
+ * The entire vma is mappable. remap_pfn_range() will
+ * make it distinguishable from a non-direct mapping
+ * in /proc/<pid>/maps by substituting the file offset
+ * with the actual physical address.
+ */
+ ret = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT,
+ pages * PAGE_SIZE, vma->vm_page_prot);
+ } else {
+ /*
+ * Let's create a mixed map if we can't map it all.
+ * The normal paging machinery will take care of the
+ * unpopulated ptes via cramfs_readpage().
+ */
+ int i;
+ vma->vm_flags |= VM_MIXEDMAP;
+ for (i = 0; i < pages && !ret; i++) {
+ unsigned long off = i * PAGE_SIZE;
+ pfn_t pfn = phys_to_pfn_t(address + off, PFN_DEV);
+ ret = vm_insert_mixed(vma, vma->vm_start + off, pfn);
+ }
+ }
+
+ if (!ret)
+ pr_debug("mapped %s[%lu] at 0x%08lx (%u/%lu pages) "
+ "to vma 0x%08lx, page_prot 0x%llx\n",
+ file_dentry(file)->d_name.name, pgoff,
+ address, pages, vma_pages(vma), vma->vm_start,
+ (unsigned long long)pgprot_val(vma->vm_page_prot));
+ return ret;
+
+bailout:
+ pr_debug("%s[%lu]: direct mmap impossible: %s\n",
+ file_dentry(file)->d_name.name, pgoff, bailout_reason);
+ /* Didn't manage any direct map, but normal paging is still possible */
+ return 0;
+}
+
+#else /* CONFIG_MMU */
+
+static int cramfs_physmem_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ return vma->vm_flags & (VM_SHARED | VM_MAYSHARE) ? 0 : -ENOSYS;
+}
+
+static unsigned long cramfs_physmem_get_unmapped_area(struct file *file,
+ unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags)
+{
+ struct inode *inode = file_inode(file);
+ struct super_block *sb = inode->i_sb;
+ struct cramfs_sb_info *sbi = CRAMFS_SB(sb);
+ unsigned int pages, block_pages, max_pages, offset;
+
+ pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ max_pages = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ if (pgoff >= max_pages || pages > max_pages - pgoff)
+ return -EINVAL;
+ block_pages = pages;
+ offset = cramfs_get_block_range(inode, pgoff, &block_pages);
+ if (!offset || block_pages != pages)
+ return -ENOSYS;
+ addr = sbi->linear_phys_addr + offset;
+ pr_debug("get_unmapped for %s ofs %#lx siz %lu at 0x%08lx\n",
+ file_dentry(file)->d_name.name, pgoff*PAGE_SIZE, len, addr);
+ return addr;
+}
+
+static unsigned int cramfs_physmem_mmap_capabilities(struct file *file)
+{
+ return NOMMU_MAP_COPY | NOMMU_MAP_DIRECT |
+ NOMMU_MAP_READ | NOMMU_MAP_EXEC;
+}
+
+#endif /* CONFIG_MMU */
+
+static const struct file_operations cramfs_physmem_fops = {
+ .llseek = generic_file_llseek,
+ .read_iter = generic_file_read_iter,
+ .splice_read = generic_file_splice_read,
+ .mmap = cramfs_physmem_mmap,
+#ifndef CONFIG_MMU
+ .get_unmapped_area = cramfs_physmem_get_unmapped_area,
+ .mmap_capabilities = cramfs_physmem_mmap_capabilities,
+#endif
+};
+
static void cramfs_kill_sb(struct super_block *sb)
{
struct cramfs_sb_info *sbi = CRAMFS_SB(sb);
- kill_block_super(sb);
+ if (IS_ENABLED(CCONFIG_CRAMFS_MTD) && sb->s_mtd) {
+ if (sbi && sbi->mtd_point_size)
+ mtd_unpoint(sb->s_mtd, 0, sbi->mtd_point_size);
+ kill_mtd_super(sb);
+ } else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) {
+ kill_block_super(sb);
+ }
kfree(sbi);
}
@@ -254,34 +509,24 @@ static int cramfs_remount(struct super_block *sb, int *flags, char *data)
return 0;
}
-static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
+static int cramfs_read_super(struct super_block *sb,
+ struct cramfs_super *super, int silent)
{
- int i;
- struct cramfs_super super;
+ struct cramfs_sb_info *sbi = CRAMFS_SB(sb);
unsigned long root_offset;
- struct cramfs_sb_info *sbi;
- struct inode *root;
- sb->s_flags |= MS_RDONLY;
-
- sbi = kzalloc(sizeof(struct cramfs_sb_info), GFP_KERNEL);
- if (!sbi)
- return -ENOMEM;
- sb->s_fs_info = sbi;
-
- /* Invalidate the read buffers on mount: think disk change.. */
- mutex_lock(&read_mutex);
- for (i = 0; i < READ_BUFFERS; i++)
- buffer_blocknr[i] = -1;
+ /* We don't know the real size yet */
+ sbi->size = PAGE_SIZE;
/* Read the first block and get the superblock from it */
- memcpy(&super, cramfs_read(sb, 0, sizeof(super)), sizeof(super));
+ mutex_lock(&read_mutex);
+ memcpy(super, cramfs_read(sb, 0, sizeof(*super)), sizeof(*super));
mutex_unlock(&read_mutex);
/* Do sanity checks on the superblock */
- if (super.magic != CRAMFS_MAGIC) {
+ if (super->magic != CRAMFS_MAGIC) {
/* check for wrong endianness */
- if (super.magic == CRAMFS_MAGIC_WEND) {
+ if (super->magic == CRAMFS_MAGIC_WEND) {
if (!silent)
pr_err("wrong endianness\n");
return -EINVAL;
@@ -289,10 +534,12 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
/* check at 512 byte offset */
mutex_lock(&read_mutex);
- memcpy(&super, cramfs_read(sb, 512, sizeof(super)), sizeof(super));
+ memcpy(super,
+ cramfs_read(sb, 512, sizeof(*super)),
+ sizeof(*super));
mutex_unlock(&read_mutex);
- if (super.magic != CRAMFS_MAGIC) {
- if (super.magic == CRAMFS_MAGIC_WEND && !silent)
+ if (super->magic != CRAMFS_MAGIC) {
+ if (super->magic == CRAMFS_MAGIC_WEND && !silent)
pr_err("wrong endianness\n");
else if (!silent)
pr_err("wrong magic\n");
@@ -301,34 +548,34 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
}
/* get feature flags first */
- if (super.flags & ~CRAMFS_SUPPORTED_FLAGS) {
+ if (super->flags & ~CRAMFS_SUPPORTED_FLAGS) {
pr_err("unsupported filesystem features\n");
return -EINVAL;
}
/* Check that the root inode is in a sane state */
- if (!S_ISDIR(super.root.mode)) {
+ if (!S_ISDIR(super->root.mode)) {
pr_err("root is not a directory\n");
return -EINVAL;
}
/* correct strange, hard-coded permissions of mkcramfs */
- super.root.mode |= (S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
+ super->root.mode |= 0555;
- root_offset = super.root.offset << 2;
- if (super.flags & CRAMFS_FLAG_FSID_VERSION_2) {
- sbi->size = super.size;
- sbi->blocks = super.fsid.blocks;
- sbi->files = super.fsid.files;
+ root_offset = super->root.offset << 2;
+ if (super->flags & CRAMFS_FLAG_FSID_VERSION_2) {
+ sbi->size = super->size;
+ sbi->blocks = super->fsid.blocks;
+ sbi->files = super->fsid.files;
} else {
sbi->size = 1<<28;
sbi->blocks = 0;
sbi->files = 0;
}
- sbi->magic = super.magic;
- sbi->flags = super.flags;
+ sbi->magic = super->magic;
+ sbi->flags = super->flags;
if (root_offset == 0)
pr_info("empty filesystem");
- else if (!(super.flags & CRAMFS_FLAG_SHIFTED_ROOT_OFFSET) &&
+ else if (!(super->flags & CRAMFS_FLAG_SHIFTED_ROOT_OFFSET) &&
((root_offset != sizeof(struct cramfs_super)) &&
(root_offset != 512 + sizeof(struct cramfs_super))))
{
@@ -336,9 +583,18 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
return -EINVAL;
}
+ return 0;
+}
+
+static int cramfs_finalize_super(struct super_block *sb,
+ struct cramfs_inode *cramfs_root)
+{
+ struct inode *root;
+
/* Set it all up.. */
+ sb->s_flags |= MS_RDONLY;
sb->s_op = &cramfs_ops;
- root = get_cramfs_inode(sb, &super.root, 0);
+ root = get_cramfs_inode(sb, cramfs_root, 0);
if (IS_ERR(root))
return PTR_ERR(root);
sb->s_root = d_make_root(root);
@@ -347,10 +603,79 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
return 0;
}
+static int cramfs_blkdev_fill_super(struct super_block *sb, void *data,
+ int silent)
+{
+ struct cramfs_sb_info *sbi;
+ struct cramfs_super super;
+ int i, err;
+
+ sbi = kzalloc(sizeof(struct cramfs_sb_info), GFP_KERNEL);
+ if (!sbi)
+ return -ENOMEM;
+ sb->s_fs_info = sbi;
+
+ /* Invalidate the read buffers on mount: think disk change.. */
+ for (i = 0; i < READ_BUFFERS; i++)
+ buffer_blocknr[i] = -1;
+
+ err = cramfs_read_super(sb, &super, silent);
+ if (err)
+ return err;
+ return cramfs_finalize_super(sb, &super.root);
+}
+
+static int cramfs_mtd_fill_super(struct super_block *sb, void *data,
+ int silent)
+{
+ struct cramfs_sb_info *sbi;
+ struct cramfs_super super;
+ int err;
+
+ sbi = kzalloc(sizeof(struct cramfs_sb_info), GFP_KERNEL);
+ if (!sbi)
+ return -ENOMEM;
+ sb->s_fs_info = sbi;
+
+ /* Map only one page for now. Will remap it when fs size is known. */
+ err = mtd_point(sb->s_mtd, 0, PAGE_SIZE, &sbi->mtd_point_size,
+ &sbi->linear_virt_addr, &sbi->linear_phys_addr);
+ if (err || sbi->mtd_point_size != PAGE_SIZE) {
+ pr_err("unable to get direct memory access to mtd:%s\n",
+ sb->s_mtd->name);
+ return err ? : -ENODATA;
+ }
+
+ pr_info("checking physical address %pap for linear cramfs image\n",
+ &sbi->linear_phys_addr);
+ err = cramfs_read_super(sb, &super, silent);
+ if (err)
+ return err;
+
+ /* Remap the whole filesystem now */
+ pr_info("linear cramfs image on mtd:%s appears to be %lu KB in size\n",
+ sb->s_mtd->name, sbi->size/1024);
+ mtd_unpoint(sb->s_mtd, 0, PAGE_SIZE);
+ err = mtd_point(sb->s_mtd, 0, sbi->size, &sbi->mtd_point_size,
+ &sbi->linear_virt_addr, &sbi->linear_phys_addr);
+ if (err || sbi->mtd_point_size != sbi->size) {
+ pr_err("unable to get direct memory access to mtd:%s\n",
+ sb->s_mtd->name);
+ return err ? : -ENODATA;
+ }
+
+ return cramfs_finalize_super(sb, &super.root);
+}
+
static int cramfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
- u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
+ u64 id = 0;
+
+ if (sb->s_bdev)
+ id = huge_encode_dev(sb->s_bdev->bd_dev);
+ else if (sb->s_dev)
+ id = huge_encode_dev(sb->s_dev);
buf->f_type = CRAMFS_MAGIC;
buf->f_bsize = PAGE_SIZE;
@@ -502,34 +827,86 @@ static int cramfs_readpage(struct file *file, struct page *page)
if (page->index < maxblock) {
struct super_block *sb = inode->i_sb;
- u32 blkptr_offset = OFFSET(inode) + page->index*4;
- u32 start_offset, compr_len;
+ u32 blkptr_offset = OFFSET(inode) + page->index * 4;
+ u32 block_ptr, block_start, block_len;
+ bool uncompressed, direct;
- start_offset = OFFSET(inode) + maxblock*4;
mutex_lock(&read_mutex);
- if (page->index)
- start_offset = *(u32 *) cramfs_read(sb, blkptr_offset-4,
- 4);
- compr_len = (*(u32 *) cramfs_read(sb, blkptr_offset, 4) -
- start_offset);
- mutex_unlock(&read_mutex);
+ block_ptr = *(u32 *) cramfs_read(sb, blkptr_offset, 4);
+ uncompressed = (block_ptr & CRAMFS_BLK_FLAG_UNCOMPRESSED);
+ direct = (block_ptr & CRAMFS_BLK_FLAG_DIRECT_PTR);
+ block_ptr &= ~CRAMFS_BLK_FLAGS;
+
+ if (direct) {
+ /*
+ * The block pointer is an absolute start pointer,
+ * shifted by 2 bits. The size is included in the
+ * first 2 bytes of the data block when compressed,
+ * or PAGE_SIZE otherwise.
+ */
+ block_start = block_ptr << CRAMFS_BLK_DIRECT_PTR_SHIFT;
+ if (uncompressed) {
+ block_len = PAGE_SIZE;
+ /* if last block: cap to file length */
+ if (page->index == maxblock - 1)
+ block_len =
+ offset_in_page(inode->i_size);
+ } else {
+ block_len = *(u16 *)
+ cramfs_read(sb, block_start, 2);
+ block_start += 2;
+ }
+ } else {
+ /*
+ * The block pointer indicates one past the end of
+ * the current block (start of next block). If this
+ * is the first block then it starts where the block
+ * pointer table ends, otherwise its start comes
+ * from the previous block's pointer.
+ */
+ block_start = OFFSET(inode) + maxblock * 4;
+ if (page->index)
+ block_start = *(u32 *)
+ cramfs_read(sb, blkptr_offset - 4, 4);
+ /* Beware... previous ptr might be a direct ptr */
+ if (unlikely(block_start & CRAMFS_BLK_FLAG_DIRECT_PTR)) {
+ /* See comments on earlier code. */
+ u32 prev_start = block_start;
+ block_start = prev_start & ~CRAMFS_BLK_FLAGS;
+ block_start <<= CRAMFS_BLK_DIRECT_PTR_SHIFT;
+ if (prev_start & CRAMFS_BLK_FLAG_UNCOMPRESSED) {
+ block_start += PAGE_SIZE;
+ } else {
+ block_len = *(u16 *)
+ cramfs_read(sb, block_start, 2);
+ block_start += 2 + block_len;
+ }
+ }
+ block_start &= ~CRAMFS_BLK_FLAGS;
+ block_len = block_ptr - block_start;
+ }
- if (compr_len == 0)
+ if (block_len == 0)
; /* hole */
- else if (unlikely(compr_len > (PAGE_SIZE << 1))) {
- pr_err("bad compressed blocksize %u\n",
- compr_len);
+ else if (unlikely(block_len > 2*PAGE_SIZE ||
+ (uncompressed && block_len > PAGE_SIZE))) {
+ mutex_unlock(&read_mutex);
+ pr_err("bad data blocksize %u\n", block_len);
goto err;
+ } else if (uncompressed) {
+ memcpy(pgdata,
+ cramfs_read(sb, block_start, block_len),
+ block_len);
+ bytes_filled = block_len;
} else {
- mutex_lock(&read_mutex);
bytes_filled = cramfs_uncompress_block(pgdata,
PAGE_SIZE,
- cramfs_read(sb, start_offset, compr_len),
- compr_len);
- mutex_unlock(&read_mutex);
- if (unlikely(bytes_filled < 0))
- goto err;
+ cramfs_read(sb, block_start, block_len),
+ block_len);
}
+ mutex_unlock(&read_mutex);
+ if (unlikely(bytes_filled < 0))
+ goto err;
}
memset(pgdata + bytes_filled, 0, PAGE_SIZE - bytes_filled);
@@ -573,10 +950,22 @@ static const struct super_operations cramfs_ops = {
.statfs = cramfs_statfs,
};
-static struct dentry *cramfs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+static struct dentry *cramfs_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data)
{
- return mount_bdev(fs_type, flags, dev_name, data, cramfs_fill_super);
+ struct dentry *ret = ERR_PTR(-ENOPROTOOPT);
+
+ if (IS_ENABLED(CONFIG_CRAMFS_MTD)) {
+ ret = mount_mtd(fs_type, flags, dev_name, data,
+ cramfs_mtd_fill_super);
+ if (!IS_ERR(ret))
+ return ret;
+ }
+ if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV)) {
+ ret = mount_bdev(fs_type, flags, dev_name, data,
+ cramfs_blkdev_fill_super);
+ }
+ return ret;
}
static struct file_system_type cramfs_fs_type = {
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index e5e29f8c920b..846ca150d52e 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -36,27 +36,13 @@
#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <asm/unaligned.h>
+#include <linux/kernel.h>
#include "ecryptfs_kernel.h"
#define DECRYPT 0
#define ENCRYPT 1
/**
- * ecryptfs_to_hex
- * @dst: Buffer to take hex character representation of contents of
- * src; must be at least of size (src_size * 2)
- * @src: Buffer to be converted to a hex string representation
- * @src_size: number of bytes to convert
- */
-void ecryptfs_to_hex(char *dst, char *src, size_t src_size)
-{
- int x;
-
- for (x = 0; x < src_size; x++)
- sprintf(&dst[x * 2], "%.2x", (unsigned char)src[x]);
-}
-
-/**
* ecryptfs_from_hex
* @dst: Buffer to take the bytes from src hex; must be at least of
* size (src_size / 2)
@@ -899,8 +885,7 @@ static int ecryptfs_process_flags(struct ecryptfs_crypt_stat *crypt_stat,
u32 flags;
flags = get_unaligned_be32(page_virt);
- for (i = 0; i < ((sizeof(ecryptfs_flag_map)
- / sizeof(struct ecryptfs_flag_map_elem))); i++)
+ for (i = 0; i < ARRAY_SIZE(ecryptfs_flag_map); i++)
if (flags & ecryptfs_flag_map[i].file_flag) {
crypt_stat->flags |= ecryptfs_flag_map[i].local_flag;
} else
@@ -937,8 +922,7 @@ void ecryptfs_write_crypt_stat_flags(char *page_virt,
u32 flags = 0;
int i;
- for (i = 0; i < ((sizeof(ecryptfs_flag_map)
- / sizeof(struct ecryptfs_flag_map_elem))); i++)
+ for (i = 0; i < ARRAY_SIZE(ecryptfs_flag_map); i++)
if (crypt_stat->flags & ecryptfs_flag_map[i].local_flag)
flags |= ecryptfs_flag_map[i].file_flag;
/* Version is in top 8 bits of the 32-bit flag vector */
@@ -1434,8 +1418,6 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
page_virt = kmem_cache_alloc(ecryptfs_header_cache, GFP_USER);
if (!page_virt) {
rc = -ENOMEM;
- printk(KERN_ERR "%s: Unable to allocate page_virt\n",
- __func__);
goto out;
}
rc = ecryptfs_read_lower(page_virt, 0, crypt_stat->extent_size,
@@ -1522,9 +1504,6 @@ ecryptfs_encrypt_filename(struct ecryptfs_filename *filename,
filename->encrypted_filename =
kmalloc(filename->encrypted_filename_size, GFP_KERNEL);
if (!filename->encrypted_filename) {
- printk(KERN_ERR "%s: Out of memory whilst attempting "
- "to kmalloc [%zd] bytes\n", __func__,
- filename->encrypted_filename_size);
rc = -ENOMEM;
goto out;
}
@@ -1669,12 +1648,10 @@ ecryptfs_add_new_key_tfm(struct ecryptfs_key_tfm **key_tfm, char *cipher_name,
BUG_ON(!mutex_is_locked(&key_tfm_list_mutex));
tmp_tfm = kmem_cache_alloc(ecryptfs_key_tfm_cache, GFP_KERNEL);
- if (key_tfm != NULL)
+ if (key_tfm)
(*key_tfm) = tmp_tfm;
if (!tmp_tfm) {
rc = -ENOMEM;
- printk(KERN_ERR "Error attempting to allocate from "
- "ecryptfs_key_tfm_cache\n");
goto out;
}
mutex_init(&tmp_tfm->key_tfm_mutex);
@@ -1690,7 +1667,7 @@ ecryptfs_add_new_key_tfm(struct ecryptfs_key_tfm **key_tfm, char *cipher_name,
"cipher with name = [%s]; rc = [%d]\n",
tmp_tfm->cipher_name, rc);
kmem_cache_free(ecryptfs_key_tfm_cache, tmp_tfm);
- if (key_tfm != NULL)
+ if (key_tfm)
(*key_tfm) = NULL;
goto out;
}
@@ -1881,7 +1858,7 @@ ecryptfs_decode_from_filename(unsigned char *dst, size_t *dst_size,
size_t src_byte_offset = 0;
size_t dst_byte_offset = 0;
- if (dst == NULL) {
+ if (!dst) {
(*dst_size) = ecryptfs_max_decoded_size(src_size);
goto out;
}
@@ -1949,9 +1926,6 @@ int ecryptfs_encrypt_and_encode_filename(
filename = kzalloc(sizeof(*filename), GFP_KERNEL);
if (!filename) {
- printk(KERN_ERR "%s: Out of memory whilst attempting "
- "to kzalloc [%zd] bytes\n", __func__,
- sizeof(*filename));
rc = -ENOMEM;
goto out;
}
@@ -1980,9 +1954,6 @@ int ecryptfs_encrypt_and_encode_filename(
+ encoded_name_no_prefix_size);
(*encoded_name) = kmalloc((*encoded_name_size) + 1, GFP_KERNEL);
if (!(*encoded_name)) {
- printk(KERN_ERR "%s: Out of memory whilst attempting "
- "to kzalloc [%zd] bytes\n", __func__,
- (*encoded_name_size));
rc = -ENOMEM;
kfree(filename->encrypted_filename);
kfree(filename);
@@ -2064,9 +2035,6 @@ int ecryptfs_decode_and_decrypt_filename(char **plaintext_name,
name, name_size);
decoded_name = kmalloc(decoded_name_size, GFP_KERNEL);
if (!decoded_name) {
- printk(KERN_ERR "%s: Out of memory whilst attempting "
- "to kmalloc [%zd] bytes\n", __func__,
- decoded_name_size);
rc = -ENOMEM;
goto out;
}
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 3fbc0ff79699..e74cb2a0b299 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -31,6 +31,7 @@
#include <crypto/skcipher.h>
#include <keys/user-type.h>
#include <keys/encrypted-type.h>
+#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/fs_stack.h>
#include <linux/namei.h>
@@ -51,7 +52,13 @@
#define ECRYPTFS_XATTR_NAME "user.ecryptfs"
void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok);
-extern void ecryptfs_to_hex(char *dst, char *src, size_t src_size);
+static inline void
+ecryptfs_to_hex(char *dst, char *src, size_t src_size)
+{
+ char *end = bin2hex(dst, src, src_size);
+ *end = '\0';
+}
+
extern void ecryptfs_from_hex(char *dst, char *src, int dst_size);
struct ecryptfs_key_record {
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index efc2db42d175..847904aa63a9 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -64,7 +64,6 @@ static int ecryptfs_inode_set(struct inode *inode, void *opaque)
/* i_size will be overwritten for encrypted regular files */
fsstack_copy_inode_size(inode, lower_inode);
inode->i_ino = lower_inode->i_ino;
- inode->i_version++;
inode->i_mapping->a_ops = &ecryptfs_aops;
if (S_ISLNK(inode->i_mode))
@@ -334,9 +333,6 @@ static struct dentry *ecryptfs_lookup_interpose(struct dentry *dentry,
dentry_info = kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL);
if (!dentry_info) {
- printk(KERN_ERR "%s: Out of memory whilst attempting "
- "to allocate ecryptfs_dentry_info struct\n",
- __func__);
dput(lower_dentry);
return ERR_PTR(-ENOMEM);
}
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index fa218cd64f74..c89a58cfc991 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -639,11 +639,9 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
int rc = 0;
s = kzalloc(sizeof(*s), GFP_KERNEL);
- if (!s) {
- printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc "
- "[%zd] bytes of kernel memory\n", __func__, sizeof(*s));
+ if (!s)
return -ENOMEM;
- }
+
(*packet_size) = 0;
rc = ecryptfs_find_auth_tok_for_sig(
&auth_tok_key,
@@ -687,7 +685,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
* separator, and then the filename */
s->max_packet_size = (ECRYPTFS_TAG_70_MAX_METADATA_SIZE
+ s->block_aligned_filename_size);
- if (dest == NULL) {
+ if (!dest) {
(*packet_size) = s->max_packet_size;
goto out_unlock;
}
@@ -714,9 +712,6 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
s->block_aligned_filename = kzalloc(s->block_aligned_filename_size,
GFP_KERNEL);
if (!s->block_aligned_filename) {
- printk(KERN_ERR "%s: Out of kernel memory whilst attempting to "
- "kzalloc [%zd] bytes\n", __func__,
- s->block_aligned_filename_size);
rc = -ENOMEM;
goto out_unlock;
}
@@ -769,10 +764,6 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
s->hash_desc = kmalloc(sizeof(*s->hash_desc) +
crypto_shash_descsize(s->hash_tfm), GFP_KERNEL);
if (!s->hash_desc) {
- printk(KERN_ERR "%s: Out of kernel memory whilst attempting to "
- "kmalloc [%zd] bytes\n", __func__,
- sizeof(*s->hash_desc) +
- crypto_shash_descsize(s->hash_tfm));
rc = -ENOMEM;
goto out_release_free_unlock;
}
@@ -925,11 +916,9 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
(*filename_size) = 0;
(*filename) = NULL;
s = kzalloc(sizeof(*s), GFP_KERNEL);
- if (!s) {
- printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc "
- "[%zd] bytes of kernel memory\n", __func__, sizeof(*s));
+ if (!s)
return -ENOMEM;
- }
+
if (max_packet_size < ECRYPTFS_TAG_70_MIN_METADATA_SIZE) {
printk(KERN_WARNING "%s: max_packet_size is [%zd]; it must be "
"at least [%d]\n", __func__, max_packet_size,
@@ -1015,9 +1004,6 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
s->decrypted_filename = kmalloc(s->block_aligned_filename_size,
GFP_KERNEL);
if (!s->decrypted_filename) {
- printk(KERN_ERR "%s: Out of memory whilst attempting to "
- "kmalloc [%zd] bytes\n", __func__,
- s->block_aligned_filename_size);
rc = -ENOMEM;
goto out_unlock;
}
@@ -1097,9 +1083,6 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
}
(*filename) = kmalloc(((*filename_size) + 1), GFP_KERNEL);
if (!(*filename)) {
- printk(KERN_ERR "%s: Out of memory whilst attempting to "
- "kmalloc [%zd] bytes\n", __func__,
- ((*filename_size) + 1));
rc = -ENOMEM;
goto out_free_unlock;
}
@@ -1333,7 +1316,7 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat,
if ((*new_auth_tok)->session_key.encrypted_key_size
> ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) {
printk(KERN_WARNING "Tag 1 packet contains key larger "
- "than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES");
+ "than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES\n");
rc = -EINVAL;
goto out;
}
@@ -2525,11 +2508,9 @@ int ecryptfs_add_keysig(struct ecryptfs_crypt_stat *crypt_stat, char *sig)
struct ecryptfs_key_sig *new_key_sig;
new_key_sig = kmem_cache_alloc(ecryptfs_key_sig_cache, GFP_KERNEL);
- if (!new_key_sig) {
- printk(KERN_ERR
- "Error allocating from ecryptfs_key_sig_cache\n");
+ if (!new_key_sig)
return -ENOMEM;
- }
+
memcpy(new_key_sig->keysig, sig, ECRYPTFS_SIG_SIZE_HEX);
new_key_sig->keysig[ECRYPTFS_SIG_SIZE_HEX] = '\0';
/* Caller must hold keysig_list_mutex */
@@ -2545,16 +2526,12 @@ ecryptfs_add_global_auth_tok(struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
char *sig, u32 global_auth_tok_flags)
{
struct ecryptfs_global_auth_tok *new_auth_tok;
- int rc = 0;
new_auth_tok = kmem_cache_zalloc(ecryptfs_global_auth_tok_cache,
GFP_KERNEL);
- if (!new_auth_tok) {
- rc = -ENOMEM;
- printk(KERN_ERR "Error allocating from "
- "ecryptfs_global_auth_tok_cache\n");
- goto out;
- }
+ if (!new_auth_tok)
+ return -ENOMEM;
+
memcpy(new_auth_tok->sig, sig, ECRYPTFS_SIG_SIZE_HEX);
new_auth_tok->flags = global_auth_tok_flags;
new_auth_tok->sig[ECRYPTFS_SIG_SIZE_HEX] = '\0';
@@ -2562,7 +2539,6 @@ ecryptfs_add_global_auth_tok(struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
list_add(&new_auth_tok->mount_crypt_stat_list,
&mount_crypt_stat->global_auth_tok_list);
mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex);
-out:
- return rc;
+ return 0;
}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 25aeaa7328ba..f2677c90d96e 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -426,7 +426,7 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
mount_crypt_stat->global_default_cipher_key_size);
if (!cipher_code) {
ecryptfs_printk(KERN_ERR,
- "eCryptfs doesn't support cipher: %s",
+ "eCryptfs doesn't support cipher: %s\n",
mount_crypt_stat->global_default_cipher_name);
rc = -EINVAL;
goto out;
@@ -781,7 +781,7 @@ static struct attribute *attributes[] = {
NULL,
};
-static struct attribute_group attr_group = {
+static const struct attribute_group attr_group = {
.attrs = attributes,
};
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 286f10b0363b..9fdd5bcf4564 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -147,8 +147,6 @@ ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, struct file *file)
(*daemon) = kzalloc(sizeof(**daemon), GFP_KERNEL);
if (!(*daemon)) {
rc = -ENOMEM;
- printk(KERN_ERR "%s: Failed to allocate [%zd] bytes of "
- "GFP_KERNEL memory\n", __func__, sizeof(**daemon));
goto out;
}
(*daemon)->file = file;
@@ -250,8 +248,6 @@ int ecryptfs_process_response(struct ecryptfs_daemon *daemon,
msg_ctx->msg = kmemdup(msg, msg_size, GFP_KERNEL);
if (!msg_ctx->msg) {
rc = -ENOMEM;
- printk(KERN_ERR "%s: Failed to allocate [%zd] bytes of "
- "GFP_KERNEL memory\n", __func__, msg_size);
goto unlock;
}
msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_DONE;
@@ -386,7 +382,6 @@ int __init ecryptfs_init_messaging(void)
GFP_KERNEL);
if (!ecryptfs_daemon_hash) {
rc = -ENOMEM;
- printk(KERN_ERR "%s: Failed to allocate memory\n", __func__);
mutex_unlock(&ecryptfs_daemon_hash_mux);
goto out;
}
@@ -398,7 +393,6 @@ int __init ecryptfs_init_messaging(void)
GFP_KERNEL);
if (!ecryptfs_msg_ctx_arr) {
rc = -ENOMEM;
- printk(KERN_ERR "%s: Failed to allocate memory\n", __func__);
goto out;
}
mutex_init(&ecryptfs_msg_ctx_lists_mux);
@@ -442,15 +436,16 @@ void ecryptfs_release_messaging(void)
}
if (ecryptfs_daemon_hash) {
struct ecryptfs_daemon *daemon;
+ struct hlist_node *n;
int i;
mutex_lock(&ecryptfs_daemon_hash_mux);
for (i = 0; i < (1 << ecryptfs_hash_bits); i++) {
int rc;
- hlist_for_each_entry(daemon,
- &ecryptfs_daemon_hash[i],
- euid_chain) {
+ hlist_for_each_entry_safe(daemon, n,
+ &ecryptfs_daemon_hash[i],
+ euid_chain) {
rc = ecryptfs_exorcise_daemon(daemon);
if (rc)
printk(KERN_ERR "%s: Error whilst "
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index e4141f257495..f09cacaf8c80 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -163,12 +163,8 @@ int ecryptfs_send_miscdev(char *data, size_t data_size,
struct ecryptfs_message *msg;
msg = kmalloc((sizeof(*msg) + data_size), GFP_KERNEL);
- if (!msg) {
- printk(KERN_ERR "%s: Out of memory whilst attempting "
- "to kmalloc(%zd, GFP_KERNEL)\n", __func__,
- (sizeof(*msg) + data_size));
+ if (!msg)
return -ENOMEM;
- }
mutex_lock(&msg_ctx->mux);
msg_ctx->msg = msg;
@@ -383,7 +379,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
goto memdup;
} else if (count < MIN_MSG_PKT_SIZE || count > MAX_MSG_PKT_SIZE) {
printk(KERN_WARNING "%s: Acceptable packet size range is "
- "[%d-%zu], but amount of data written is [%zu].",
+ "[%d-%zu], but amount of data written is [%zu].\n",
__func__, MIN_MSG_PKT_SIZE, MAX_MSG_PKT_SIZE, count);
return -EINVAL;
}
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 1f0c471b4ba3..cdf358b209d9 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -431,8 +431,6 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
}
xattr_virt = kmem_cache_alloc(ecryptfs_xattr_cache, GFP_KERNEL);
if (!xattr_virt) {
- printk(KERN_ERR "Out of memory whilst attempting to write "
- "inode size to xattr\n");
rc = -ENOMEM;
goto out;
}
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 1e048144f17c..afd548ebc328 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -2239,7 +2239,6 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
compat_size_t, sigsetsize)
{
long err;
- compat_sigset_t csigmask;
sigset_t ksigmask, sigsaved;
/*
@@ -2249,9 +2248,8 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
if (sigmask) {
if (sigsetsize != sizeof(compat_sigset_t))
return -EINVAL;
- if (copy_from_user(&csigmask, sigmask, sizeof(csigmask)))
+ if (get_compat_sigset(&ksigmask, sigmask))
return -EFAULT;
- sigset_from_compat(&ksigmask, &csigmask);
sigsaved = current->blocked;
set_current_blocked(&ksigmask);
}
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index b7558f292420..1eec25014f62 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -592,6 +592,44 @@ static int ext4_ioc_getfsmap(struct super_block *sb,
return 0;
}
+static long ext4_ioctl_group_add(struct file *file,
+ struct ext4_new_group_data *input)
+{
+ struct super_block *sb = file_inode(file)->i_sb;
+ int err, err2=0;
+
+ err = ext4_resize_begin(sb);
+ if (err)
+ return err;
+
+ if (ext4_has_feature_bigalloc(sb)) {
+ ext4_msg(sb, KERN_ERR,
+ "Online resizing not supported with bigalloc");
+ err = -EOPNOTSUPP;
+ goto group_add_out;
+ }
+
+ err = mnt_want_write_file(file);
+ if (err)
+ goto group_add_out;
+
+ err = ext4_group_add(sb, input);
+ if (EXT4_SB(sb)->s_journal) {
+ jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
+ err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
+ jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+ }
+ if (err == 0)
+ err = err2;
+ mnt_drop_write_file(file);
+ if (!err && ext4_has_group_desc_csum(sb) &&
+ test_opt(sb, INIT_INODE_TABLE))
+ err = ext4_register_li_request(sb, input->group);
+group_add_out:
+ ext4_resize_end(sb);
+ return err;
+}
+
long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -776,44 +814,12 @@ mext_out:
case EXT4_IOC_GROUP_ADD: {
struct ext4_new_group_data input;
- int err, err2=0;
-
- err = ext4_resize_begin(sb);
- if (err)
- return err;
if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg,
- sizeof(input))) {
- err = -EFAULT;
- goto group_add_out;
- }
-
- if (ext4_has_feature_bigalloc(sb)) {
- ext4_msg(sb, KERN_ERR,
- "Online resizing not supported with bigalloc");
- err = -EOPNOTSUPP;
- goto group_add_out;
- }
-
- err = mnt_want_write_file(filp);
- if (err)
- goto group_add_out;
+ sizeof(input)))
+ return -EFAULT;
- err = ext4_group_add(sb, &input);
- if (EXT4_SB(sb)->s_journal) {
- jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
- err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
- jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
- }
- if (err == 0)
- err = err2;
- mnt_drop_write_file(filp);
- if (!err && ext4_has_group_desc_csum(sb) &&
- test_opt(sb, INIT_INODE_TABLE))
- err = ext4_register_li_request(sb, input.group);
-group_add_out:
- ext4_resize_end(sb);
- return err;
+ return ext4_ioctl_group_add(filp, &input);
}
case EXT4_IOC_MIGRATE:
@@ -1078,8 +1084,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
break;
case EXT4_IOC32_GROUP_ADD: {
struct compat_ext4_new_group_input __user *uinput;
- struct ext4_new_group_input input;
- mm_segment_t old_fs;
+ struct ext4_new_group_data input;
int err;
uinput = compat_ptr(arg);
@@ -1092,12 +1097,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
&uinput->reserved_blocks);
if (err)
return -EFAULT;
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- err = ext4_ioctl(file, EXT4_IOC_GROUP_ADD,
- (unsigned long) &input);
- set_fs(old_fs);
- return err;
+ return ext4_ioctl_group_add(file, &input);
}
case EXT4_IOC_MOVE_EXT:
case EXT4_IOC_RESIZE_FS:
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 30f47d0f74a0..0522e283a4f4 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -563,6 +563,9 @@ static int put_compat_flock64(const struct flock *kfl, struct compat_flock64 __u
{
struct compat_flock64 fl;
+ BUILD_BUG_ON(sizeof(kfl->l_start) > sizeof(ufl->l_start));
+ BUILD_BUG_ON(sizeof(kfl->l_len) > sizeof(ufl->l_len));
+
memset(&fl, 0, sizeof(struct compat_flock64));
copy_flock_fields(&fl, kfl);
if (copy_to_user(ufl, &fl, sizeof(struct compat_flock64)))
@@ -632,9 +635,8 @@ COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
if (err)
break;
err = fixup_compat_flock(&flock);
- if (err)
- return err;
- err = put_compat_flock(&flock, compat_ptr(arg));
+ if (!err)
+ err = put_compat_flock(&flock, compat_ptr(arg));
break;
case F_GETLK64:
case F_OFD_GETLK:
@@ -642,12 +644,8 @@ COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
if (err)
break;
err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
- if (err)
- break;
- err = fixup_compat_flock(&flock);
- if (err)
- return err;
- err = put_compat_flock64(&flock, compat_ptr(arg));
+ if (!err)
+ err = put_compat_flock64(&flock, compat_ptr(arg));
break;
case F_SETLK:
case F_SETLKW:
diff --git a/fs/fhandle.c b/fs/fhandle.c
index 474adc8d2a3a..0ace128f5d23 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -213,8 +213,8 @@ out_err:
return retval;
}
-long do_handle_open(int mountdirfd,
- struct file_handle __user *ufh, int open_flag)
+static long do_handle_open(int mountdirfd, struct file_handle __user *ufh,
+ int open_flag)
{
long retval = 0;
struct path path;
diff --git a/fs/file.c b/fs/file.c
index 4eecbf4244a5..3b080834b870 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -593,13 +593,16 @@ void __fd_install(struct files_struct *files, unsigned int fd,
{
struct fdtable *fdt;
- might_sleep();
rcu_read_lock_sched();
- while (unlikely(files->resize_in_progress)) {
+ if (unlikely(files->resize_in_progress)) {
rcu_read_unlock_sched();
- wait_event(files->resize_wait, !files->resize_in_progress);
- rcu_read_lock_sched();
+ spin_lock(&files->file_lock);
+ fdt = files_fdtable(files);
+ BUG_ON(fdt->fd[fd] != NULL);
+ rcu_assign_pointer(fdt->fd[fd], file);
+ spin_unlock(&files->file_lock);
+ return;
}
/* coupled with smp_wmb() in expand_fdtable() */
smp_rmb();
@@ -632,7 +635,6 @@ int __close_fd(struct files_struct *files, unsigned fd)
if (!file)
goto out_unlock;
rcu_assign_pointer(fdt->fd[fd], NULL);
- __clear_close_on_exec(fd, fdt);
__put_unused_fd(files, fd);
spin_unlock(&files->file_lock);
return filp_close(file, files);
diff --git a/fs/internal.h b/fs/internal.h
index 48cee21b4f14..df262f41a0ef 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -55,6 +55,7 @@ extern void __init chrdev_init(void);
extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *);
extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
const char *, unsigned int, struct path *);
+long do_unlinkat(int dfd, struct filename *name);
/*
* namespace.c
diff --git a/fs/iomap.c b/fs/iomap.c
index b9f74803e56c..47d29ccffaef 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -856,6 +856,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
struct bio *bio;
bool need_zeroout = false;
int nr_pages, ret;
+ size_t copied = 0;
if ((pos | length | align) & ((1 << blkbits) - 1))
return -EINVAL;
@@ -867,7 +868,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
/*FALLTHRU*/
case IOMAP_UNWRITTEN:
if (!(dio->flags & IOMAP_DIO_WRITE)) {
- iov_iter_zero(length, dio->submit.iter);
+ length = iov_iter_zero(length, dio->submit.iter);
dio->size += length;
return length;
}
@@ -904,8 +905,11 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
}
do {
- if (dio->error)
+ size_t n;
+ if (dio->error) {
+ iov_iter_revert(dio->submit.iter, copied);
return 0;
+ }
bio = bio_alloc(GFP_KERNEL, nr_pages);
bio_set_dev(bio, iomap->bdev);
@@ -918,20 +922,24 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
ret = bio_iov_iter_get_pages(bio, &iter);
if (unlikely(ret)) {
bio_put(bio);
- return ret;
+ return copied ? copied : ret;
}
+ n = bio->bi_iter.bi_size;
if (dio->flags & IOMAP_DIO_WRITE) {
bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC | REQ_IDLE);
- task_io_account_write(bio->bi_iter.bi_size);
+ task_io_account_write(n);
} else {
bio_set_op_attrs(bio, REQ_OP_READ, 0);
if (dio->flags & IOMAP_DIO_DIRTY)
bio_set_pages_dirty(bio);
}
- dio->size += bio->bi_iter.bi_size;
- pos += bio->bi_iter.bi_size;
+ iov_iter_advance(dio->submit.iter, n);
+
+ dio->size += n;
+ pos += n;
+ copied += n;
nr_pages = iov_iter_npages(&iter, BIO_MAX_PAGES);
@@ -947,9 +955,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
if (pad)
iomap_dio_zero(dio, iomap, pos, fs_block_size - pad);
}
-
- iov_iter_advance(dio->submit.iter, length);
- return length;
+ return copied;
}
ssize_t
diff --git a/fs/namei.c b/fs/namei.c
index 5424b10cfdc4..f0c7a7b9b6ca 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3459,7 +3459,7 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
goto out;
child = vfs_tmpfile(path.dentry, op->mode, op->open_flag);
error = PTR_ERR(child);
- if (unlikely(IS_ERR(child)))
+ if (IS_ERR(child))
goto out2;
dput(path.dentry);
path.dentry = child;
@@ -4010,10 +4010,9 @@ EXPORT_SYMBOL(vfs_unlink);
* writeout happening, and we don't want to prevent access to the directory
* while waiting on the I/O.
*/
-static long do_unlinkat(int dfd, const char __user *pathname)
+long do_unlinkat(int dfd, struct filename *name)
{
int error;
- struct filename *name;
struct dentry *dentry;
struct path path;
struct qstr last;
@@ -4022,8 +4021,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
struct inode *delegated_inode = NULL;
unsigned int lookup_flags = 0;
retry:
- name = filename_parentat(dfd, getname(pathname), lookup_flags,
- &path, &last, &type);
+ name = filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
if (IS_ERR(name))
return PTR_ERR(name);
@@ -4065,12 +4063,12 @@ exit2:
mnt_drop_write(path.mnt);
exit1:
path_put(&path);
- putname(name);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
inode = NULL;
goto retry;
}
+ putname(name);
return error;
slashes:
@@ -4091,12 +4089,12 @@ SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag)
if (flag & AT_REMOVEDIR)
return do_rmdir(dfd, pathname);
- return do_unlinkat(dfd, pathname);
+ return do_unlinkat(dfd, getname(pathname));
}
SYSCALL_DEFINE1(unlink, const char __user *, pathname)
{
- return do_unlinkat(AT_FDCWD, pathname);
+ return do_unlinkat(AT_FDCWD, getname(pathname));
}
int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c
index b60627bcfc62..ef6729568432 100644
--- a/fs/nfs/cache_lib.c
+++ b/fs/nfs/cache_lib.c
@@ -67,7 +67,7 @@ out:
*/
void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq)
{
- if (atomic_dec_and_test(&dreq->count))
+ if (refcount_dec_and_test(&dreq->count))
kfree(dreq);
}
@@ -87,7 +87,7 @@ static struct cache_deferred_req *nfs_dns_cache_defer(struct cache_req *req)
dreq = container_of(req, struct nfs_cache_defer_req, req);
dreq->deferred_req.revisit = nfs_dns_cache_revisit;
- atomic_inc(&dreq->count);
+ refcount_inc(&dreq->count);
return &dreq->deferred_req;
}
@@ -99,7 +99,7 @@ struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void)
dreq = kzalloc(sizeof(*dreq), GFP_KERNEL);
if (dreq) {
init_completion(&dreq->completion);
- atomic_set(&dreq->count, 1);
+ refcount_set(&dreq->count, 1);
dreq->req.defer = nfs_dns_cache_defer;
}
return dreq;
diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h
index 4e6236a86cf7..220ee409abc4 100644
--- a/fs/nfs/cache_lib.h
+++ b/fs/nfs/cache_lib.h
@@ -16,7 +16,7 @@ struct nfs_cache_defer_req {
struct cache_req req;
struct cache_deferred_req deferred_req;
struct completion completion;
- atomic_t count;
+ refcount_t count;
};
extern int nfs_cache_upcall(struct cache_detail *cd, char *entry_name);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index cd9d992feb2e..509dc5adeb8f 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -49,15 +49,15 @@ static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net)
if (ret <= 0)
goto out_err;
nn->nfs_callback_tcpport = ret;
- dprintk("NFS: Callback listener port = %u (af %u, net %p)\n",
- nn->nfs_callback_tcpport, PF_INET, net);
+ dprintk("NFS: Callback listener port = %u (af %u, net %x)\n",
+ nn->nfs_callback_tcpport, PF_INET, net->ns.inum);
ret = svc_create_xprt(serv, "tcp", net, PF_INET6,
nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
if (ret > 0) {
nn->nfs_callback_tcpport6 = ret;
- dprintk("NFS: Callback listener port = %u (af %u, net %p)\n",
- nn->nfs_callback_tcpport6, PF_INET6, net);
+ dprintk("NFS: Callback listener port = %u (af %u, net %x\n",
+ nn->nfs_callback_tcpport6, PF_INET6, net->ns.inum);
} else if (ret != -EAFNOSUPPORT)
goto out_err;
return 0;
@@ -185,7 +185,7 @@ static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struc
if (--nn->cb_users[minorversion])
return;
- dprintk("NFS: destroy per-net callback data; net=%p\n", net);
+ dprintk("NFS: destroy per-net callback data; net=%x\n", net->ns.inum);
svc_shutdown_net(serv, net);
}
@@ -198,7 +198,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
if (nn->cb_users[minorversion]++)
return 0;
- dprintk("NFS: create per-net callback data; net=%p\n", net);
+ dprintk("NFS: create per-net callback data; net=%x\n", net->ns.inum);
ret = svc_bind(serv, net);
if (ret < 0) {
@@ -223,7 +223,7 @@ err_socks:
err_bind:
nn->cb_users[minorversion]--;
dprintk("NFS: Couldn't create callback socket: err = %d; "
- "net = %p\n", ret, net);
+ "net = %x\n", ret, net->ns.inum);
return ret;
}
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 19151f6c0e97..2435af56b87e 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -440,7 +440,7 @@ static bool referring_call_exists(struct nfs_client *clp,
uint32_t nrclists,
struct referring_call_list *rclists)
{
- bool status = 0;
+ bool status = false;
int i, j;
struct nfs4_session *session;
struct nfs4_slot_table *tbl;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 22880ef6d8dd..0ac2fb1c6b63 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -163,7 +163,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
clp->rpc_ops = clp->cl_nfs_mod->rpc_ops;
- atomic_set(&clp->cl_count, 1);
+ refcount_set(&clp->cl_count, 1);
clp->cl_cons_state = NFS_CS_INITING;
memcpy(&clp->cl_addr, cl_init->addr, cl_init->addrlen);
@@ -269,7 +269,7 @@ void nfs_put_client(struct nfs_client *clp)
nn = net_generic(clp->cl_net, nfs_net_id);
- if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) {
+ if (refcount_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) {
list_del(&clp->cl_share_link);
nfs_cb_idr_remove_locked(clp);
spin_unlock(&nn->nfs_client_lock);
@@ -314,7 +314,7 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
sap))
continue;
- atomic_inc(&clp->cl_count);
+ refcount_inc(&clp->cl_count);
return clp;
}
return NULL;
@@ -1006,7 +1006,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
/* Copy data from the source */
server->nfs_client = source->nfs_client;
server->destroy = source->destroy;
- atomic_inc(&server->nfs_client->cl_count);
+ refcount_inc(&server->nfs_client->cl_count);
nfs_server_copy_userdata(server, source);
server->fsid = fattr->fsid;
@@ -1166,7 +1166,7 @@ static int nfs_server_list_show(struct seq_file *m, void *v)
clp->rpc_ops->version,
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR),
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT),
- atomic_read(&clp->cl_count),
+ refcount_read(&clp->cl_count),
clp->cl_hostname);
rcu_read_unlock();
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 606dd3871f66..ade44ca0c66c 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -1041,6 +1041,33 @@ int nfs_delegations_present(struct nfs_client *clp)
}
/**
+ * nfs4_refresh_delegation_stateid - Update delegation stateid seqid
+ * @dst: stateid to refresh
+ * @inode: inode to check
+ *
+ * Returns "true" and updates "dst->seqid" * if inode had a delegation
+ * that matches our delegation stateid. Otherwise "false" is returned.
+ */
+bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
+{
+ struct nfs_delegation *delegation;
+ bool ret = false;
+ if (!inode)
+ goto out;
+
+ rcu_read_lock();
+ delegation = rcu_dereference(NFS_I(inode)->delegation);
+ if (delegation != NULL &&
+ nfs4_stateid_match_other(dst, &delegation->stateid)) {
+ dst->seqid = delegation->stateid.seqid;
+ return ret;
+ }
+ rcu_read_unlock();
+out:
+ return ret;
+}
+
+/**
* nfs4_copy_delegation_stateid - Copy inode's state ID information
* @inode: inode to check
* @flags: delegation type requirement
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index ddaf2644cf13..185a09f37a89 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -62,6 +62,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4
int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid, fmode_t type);
int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid);
bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, struct rpc_cred **cred);
+bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
int nfs4_have_delegation(struct inode *inode, fmode_t flags);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index f439f1c45008..e51ae52ed14f 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -118,13 +118,6 @@ nfs_opendir(struct inode *inode, struct file *filp)
goto out;
}
filp->private_data = ctx;
- if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) {
- /* This is a mountpoint, so d_revalidate will never
- * have been called, so we need to refresh the
- * inode (for close-open consistency) ourselves.
- */
- __nfs_revalidate_inode(NFS_SERVER(inode), inode);
- }
out:
put_rpccred(cred);
return res;
@@ -253,7 +246,7 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri
desc->cache_entry_index = index;
return 0;
out_eof:
- desc->eof = 1;
+ desc->eof = true;
return -EBADCOOKIE;
}
@@ -307,7 +300,7 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
if (array->eof_index >= 0) {
status = -EBADCOOKIE;
if (*desc->dir_cookie == array->last_cookie)
- desc->eof = 1;
+ desc->eof = true;
}
out:
return status;
@@ -761,7 +754,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
ent = &array->array[i];
if (!dir_emit(desc->ctx, ent->string.name, ent->string.len,
nfs_compat_user_ino64(ent->ino), ent->d_type)) {
- desc->eof = 1;
+ desc->eof = true;
break;
}
desc->ctx->pos++;
@@ -773,7 +766,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
ctx->duped = 1;
}
if (array->eof_index >= 0)
- desc->eof = 1;
+ desc->eof = true;
kunmap(desc->page);
cache_page_release(desc);
@@ -873,7 +866,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
if (res == -EBADCOOKIE) {
res = 0;
/* This means either end of directory */
- if (*desc->dir_cookie && desc->eof == 0) {
+ if (*desc->dir_cookie && !desc->eof) {
/* Or that the server has 'lost' a cookie */
res = uncached_readdir(desc);
if (res == 0)
@@ -1241,8 +1234,7 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
return 0;
}
- if (nfs_mapping_need_revalidate_inode(inode))
- error = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ error = nfs_lookup_verify_inode(inode, flags);
dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n",
__func__, inode->i_ino, error ? "invalid" : "valid");
return !error;
@@ -1393,6 +1385,7 @@ static int nfs4_lookup_revalidate(struct dentry *, unsigned int);
const struct dentry_operations nfs4_dentry_operations = {
.d_revalidate = nfs4_lookup_revalidate,
+ .d_weak_revalidate = nfs_weak_revalidate,
.d_delete = nfs_dentry_delete,
.d_iput = nfs_dentry_iput,
.d_automount = nfs_d_automount,
@@ -2064,7 +2057,7 @@ out:
* should mark the directories for revalidation.
*/
d_move(old_dentry, new_dentry);
- nfs_set_verifier(new_dentry,
+ nfs_set_verifier(old_dentry,
nfs_save_change_attribute(new_dir));
} else if (error == -ENOENT)
nfs_dentry_handle_enoent(old_dentry);
@@ -2369,15 +2362,15 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
}
EXPORT_SYMBOL_GPL(nfs_access_add_cache);
-#define NFS_MAY_READ (NFS4_ACCESS_READ)
-#define NFS_MAY_WRITE (NFS4_ACCESS_MODIFY | \
- NFS4_ACCESS_EXTEND | \
- NFS4_ACCESS_DELETE)
-#define NFS_FILE_MAY_WRITE (NFS4_ACCESS_MODIFY | \
- NFS4_ACCESS_EXTEND)
+#define NFS_MAY_READ (NFS_ACCESS_READ)
+#define NFS_MAY_WRITE (NFS_ACCESS_MODIFY | \
+ NFS_ACCESS_EXTEND | \
+ NFS_ACCESS_DELETE)
+#define NFS_FILE_MAY_WRITE (NFS_ACCESS_MODIFY | \
+ NFS_ACCESS_EXTEND)
#define NFS_DIR_MAY_WRITE NFS_MAY_WRITE
-#define NFS_MAY_LOOKUP (NFS4_ACCESS_LOOKUP)
-#define NFS_MAY_EXECUTE (NFS4_ACCESS_EXECUTE)
+#define NFS_MAY_LOOKUP (NFS_ACCESS_LOOKUP)
+#define NFS_MAY_EXECUTE (NFS_ACCESS_EXECUTE)
static int
nfs_access_calc_mask(u32 access_result, umode_t umode)
{
@@ -2425,9 +2418,14 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
if (!may_block)
goto out;
- /* Be clever: ask server to check for all possible rights */
- cache.mask = NFS_MAY_LOOKUP | NFS_MAY_EXECUTE
- | NFS_MAY_WRITE | NFS_MAY_READ;
+ /*
+ * Determine which access bits we want to ask for...
+ */
+ cache.mask = NFS_ACCESS_READ | NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND;
+ if (S_ISDIR(inode->i_mode))
+ cache.mask |= NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP;
+ else
+ cache.mask |= NFS_ACCESS_EXECUTE;
cache.cred = cred;
status = NFS_PROTO(inode)->access(inode, &cache);
if (status != 0) {
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 0214dd1e1060..81cca49a8375 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -829,23 +829,9 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK)
is_local = 1;
- /*
- * VFS doesn't require the open mode to match a flock() lock's type.
- * NFS, however, may simulate flock() locking with posix locking which
- * requires the open mode to match the lock type.
- */
- switch (fl->fl_type) {
- case F_UNLCK:
+ /* We're simulating flock() locks using posix locks on the server */
+ if (fl->fl_type == F_UNLCK)
return do_unlk(filp, cmd, fl, is_local);
- case F_RDLCK:
- if (!(filp->f_mode & FMODE_READ))
- return -EBADF;
- break;
- case F_WRLCK:
- if (!(filp->f_mode & FMODE_WRITE))
- return -EBADF;
- }
-
return do_setlk(filp, cmd, fl, is_local);
}
EXPORT_SYMBOL_GPL(nfs_flock);
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 508126eb49f9..4e54d8b5413a 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -471,10 +471,10 @@ filelayout_read_pagelist(struct nfs_pgio_header *hdr)
return PNFS_NOT_ATTEMPTED;
dprintk("%s USE DS: %s cl_count %d\n", __func__,
- ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
+ ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count));
/* No multipath support. Use first DS */
- atomic_inc(&ds->ds_clp->cl_count);
+ refcount_inc(&ds->ds_clp->cl_count);
hdr->ds_clp = ds->ds_clp;
hdr->ds_commit_idx = idx;
fh = nfs4_fl_select_ds_fh(lseg, j);
@@ -515,10 +515,10 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d\n",
__func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
- offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
+ offset, ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count));
hdr->pgio_done_cb = filelayout_write_done_cb;
- atomic_inc(&ds->ds_clp->cl_count);
+ refcount_inc(&ds->ds_clp->cl_count);
hdr->ds_clp = ds->ds_clp;
hdr->ds_commit_idx = idx;
fh = nfs4_fl_select_ds_fh(lseg, j);
@@ -1064,9 +1064,9 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
goto out_err;
dprintk("%s ino %lu, how %d cl_count %d\n", __func__,
- data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count));
+ data->inode->i_ino, how, refcount_read(&ds->ds_clp->cl_count));
data->commit_done_cb = filelayout_commit_done_cb;
- atomic_inc(&ds->ds_clp->cl_count);
+ refcount_inc(&ds->ds_clp->cl_count);
data->ds_clp = ds->ds_clp;
fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
if (fh)
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index b0fa83a60754..c75ad982bcfc 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -187,7 +187,7 @@ ff_layout_add_mirror(struct pnfs_layout_hdr *lo,
continue;
if (!ff_mirror_match_fh(mirror, pos))
continue;
- if (atomic_inc_not_zero(&pos->ref)) {
+ if (refcount_inc_not_zero(&pos->ref)) {
spin_unlock(&inode->i_lock);
return pos;
}
@@ -218,7 +218,7 @@ static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
mirror = kzalloc(sizeof(*mirror), gfp_flags);
if (mirror != NULL) {
spin_lock_init(&mirror->lock);
- atomic_set(&mirror->ref, 1);
+ refcount_set(&mirror->ref, 1);
INIT_LIST_HEAD(&mirror->mirrors);
}
return mirror;
@@ -242,7 +242,7 @@ static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror)
static void ff_layout_put_mirror(struct nfs4_ff_layout_mirror *mirror)
{
- if (mirror != NULL && atomic_dec_and_test(&mirror->ref))
+ if (mirror != NULL && refcount_dec_and_test(&mirror->ref))
ff_layout_free_mirror(mirror);
}
@@ -1726,10 +1726,10 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
vers = nfs4_ff_layout_ds_version(lseg, idx);
dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__,
- ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count), vers);
+ ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count), vers);
hdr->pgio_done_cb = ff_layout_read_done_cb;
- atomic_inc(&ds->ds_clp->cl_count);
+ refcount_inc(&ds->ds_clp->cl_count);
hdr->ds_clp = ds->ds_clp;
fh = nfs4_ff_layout_select_ds_fh(lseg, idx);
if (fh)
@@ -1785,11 +1785,11 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d vers %d\n",
__func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
- offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count),
+ offset, ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count),
vers);
hdr->pgio_done_cb = ff_layout_write_done_cb;
- atomic_inc(&ds->ds_clp->cl_count);
+ refcount_inc(&ds->ds_clp->cl_count);
hdr->ds_clp = ds->ds_clp;
hdr->ds_commit_idx = idx;
fh = nfs4_ff_layout_select_ds_fh(lseg, idx);
@@ -1863,11 +1863,11 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
vers = nfs4_ff_layout_ds_version(lseg, idx);
dprintk("%s ino %lu, how %d cl_count %d vers %d\n", __func__,
- data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count),
+ data->inode->i_ino, how, refcount_read(&ds->ds_clp->cl_count),
vers);
data->commit_done_cb = ff_layout_commit_done_cb;
data->cred = ds_cred;
- atomic_inc(&ds->ds_clp->cl_count);
+ refcount_inc(&ds->ds_clp->cl_count);
data->ds_clp = ds->ds_clp;
fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
if (fh)
@@ -2286,7 +2286,7 @@ ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags))
continue;
/* mirror refcount put in cleanup_layoutstats */
- if (!atomic_inc_not_zero(&mirror->ref))
+ if (!refcount_inc_not_zero(&mirror->ref))
continue;
dev = &mirror->mirror_ds->id_node;
memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE);
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index 679cb087ef3f..411798346e48 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -14,6 +14,7 @@
#define FF_FLAGS_NO_IO_THRU_MDS 2
#define FF_FLAGS_NO_READ_IO 4
+#include <linux/refcount.h>
#include "../pnfs.h"
/* XXX: Let's filter out insanely large mirror count for now to avoid oom
@@ -82,7 +83,7 @@ struct nfs4_ff_layout_mirror {
nfs4_stateid stateid;
struct rpc_cred __rcu *ro_cred;
struct rpc_cred __rcu *rw_cred;
- atomic_t ref;
+ refcount_t ref;
spinlock_t lock;
unsigned long flags;
struct nfs4_ff_layoutstat read_stat;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 1629056aa2c9..38b93d54c02e 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -783,7 +783,7 @@ EXPORT_SYMBOL_GPL(nfs_getattr);
static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
{
- atomic_set(&l_ctx->count, 1);
+ refcount_set(&l_ctx->count, 1);
l_ctx->lockowner = current->files;
INIT_LIST_HEAD(&l_ctx->list);
atomic_set(&l_ctx->io_count, 0);
@@ -797,7 +797,7 @@ static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context
do {
if (pos->lockowner != current->files)
continue;
- atomic_inc(&pos->count);
+ refcount_inc(&pos->count);
return pos;
} while ((pos = list_entry(pos->list.next, typeof(*pos), list)) != head);
return NULL;
@@ -836,7 +836,7 @@ void nfs_put_lock_context(struct nfs_lock_context *l_ctx)
struct nfs_open_context *ctx = l_ctx->open_context;
struct inode *inode = d_inode(ctx->dentry);
- if (!atomic_dec_and_lock(&l_ctx->count, &inode->i_lock))
+ if (!refcount_dec_and_lock(&l_ctx->count, &inode->i_lock))
return;
list_del(&l_ctx->list);
spin_unlock(&inode->i_lock);
@@ -913,7 +913,7 @@ EXPORT_SYMBOL_GPL(alloc_nfs_open_context);
struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
{
if (ctx != NULL)
- atomic_inc(&ctx->lock_context.count);
+ refcount_inc(&ctx->lock_context.count);
return ctx;
}
EXPORT_SYMBOL_GPL(get_nfs_open_context);
@@ -924,11 +924,11 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
struct super_block *sb = ctx->dentry->d_sb;
if (!list_empty(&ctx->list)) {
- if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock))
+ if (!refcount_dec_and_lock(&ctx->lock_context.count, &inode->i_lock))
return;
list_del(&ctx->list);
spin_unlock(&inode->i_lock);
- } else if (!atomic_dec_and_test(&ctx->lock_context.count))
+ } else if (!refcount_dec_and_test(&ctx->lock_context.count))
return;
if (inode != NULL)
NFS_PROTO(inode)->close_context(ctx, is_sync);
@@ -2084,8 +2084,12 @@ static int nfs_net_init(struct net *net)
static void nfs_net_exit(struct net *net)
{
+ struct nfs_net *nn = net_generic(net, nfs_net_id);
+
nfs_fs_proc_net_exit(net);
nfs_cleanup_cb_ident_idr(net);
+ WARN_ON_ONCE(!list_empty(&nn->nfs_client_list));
+ WARN_ON_ONCE(!list_empty(&nn->nfs_volume_list));
}
static struct pernet_operations nfs_net_ops = {
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index bc673fb47fb3..49f848fd1f04 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -188,6 +188,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
{
struct nfs3_accessargs arg = {
.fh = NFS_FH(inode),
+ .access = entry->mask,
};
struct nfs3_accessres res;
struct rpc_message msg = {
@@ -196,25 +197,9 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
.rpc_resp = &res,
.rpc_cred = entry->cred,
};
- int mode = entry->mask;
int status = -ENOMEM;
dprintk("NFS call access\n");
-
- if (mode & MAY_READ)
- arg.access |= NFS3_ACCESS_READ;
- if (S_ISDIR(inode->i_mode)) {
- if (mode & MAY_WRITE)
- arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE;
- if (mode & MAY_EXEC)
- arg.access |= NFS3_ACCESS_LOOKUP;
- } else {
- if (mode & MAY_WRITE)
- arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND;
- if (mode & MAY_EXEC)
- arg.access |= NFS3_ACCESS_EXECUTE;
- }
-
res.fattr = nfs_alloc_fattr();
if (res.fattr == NULL)
goto out;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index dcfcf7fd7438..b374f680830c 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -145,7 +145,7 @@ struct nfs4_lock_state {
unsigned long ls_flags;
struct nfs_seqid_counter ls_seqid;
nfs4_stateid ls_stateid;
- atomic_t ls_count;
+ refcount_t ls_count;
fl_owner_t ls_owner;
};
@@ -162,6 +162,7 @@ enum {
NFS_STATE_POSIX_LOCKS, /* Posix locks are supported */
NFS_STATE_RECOVERY_FAILED, /* OPEN stateid state recovery failed */
NFS_STATE_MAY_NOTIFY_LOCK, /* server may CB_NOTIFY_LOCK */
+ NFS_STATE_CHANGE_WAIT, /* A state changing operation is outstanding */
};
struct nfs4_state {
@@ -185,6 +186,8 @@ struct nfs4_state {
unsigned int n_rdwr; /* Number of read/write references */
fmode_t state; /* State on the server (R,W, or RW) */
atomic_t count;
+
+ wait_queue_head_t waitq;
};
@@ -458,6 +461,10 @@ extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
extern int nfs4_select_rw_stateid(struct nfs4_state *, fmode_t,
const struct nfs_lock_context *, nfs4_stateid *,
struct rpc_cred **);
+extern bool nfs4_refresh_open_stateid(nfs4_stateid *dst,
+ struct nfs4_state *state);
+extern bool nfs4_copy_open_stateid(nfs4_stateid *dst,
+ struct nfs4_state *state);
extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask);
extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
@@ -465,7 +472,7 @@ extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid);
extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
extern void nfs_release_seqid(struct nfs_seqid *seqid);
extern void nfs_free_seqid(struct nfs_seqid *seqid);
-extern int nfs4_setup_sequence(const struct nfs_client *client,
+extern int nfs4_setup_sequence(struct nfs_client *client,
struct nfs4_sequence_args *args,
struct nfs4_sequence_res *res,
struct rpc_task *task);
@@ -475,6 +482,7 @@ extern int nfs4_sequence_done(struct rpc_task *task,
extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp);
extern const nfs4_stateid zero_stateid;
+extern const nfs4_stateid invalid_stateid;
/* nfs4super.c */
struct nfs_mount_info;
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index e9bea90dc017..12bbab0becb4 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -483,7 +483,7 @@ static int nfs4_match_client(struct nfs_client *pos, struct nfs_client *new,
* ID and serverowner fields. Wait for CREATE_SESSION
* to finish. */
if (pos->cl_cons_state > NFS_CS_READY) {
- atomic_inc(&pos->cl_count);
+ refcount_inc(&pos->cl_count);
spin_unlock(&nn->nfs_client_lock);
nfs_put_client(*prev);
@@ -559,7 +559,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
* way that a SETCLIENTID_CONFIRM to pos can succeed is
* if new and pos point to the same server:
*/
- atomic_inc(&pos->cl_count);
+ refcount_inc(&pos->cl_count);
spin_unlock(&nn->nfs_client_lock);
nfs_put_client(prev);
@@ -715,7 +715,7 @@ int nfs41_walk_client_list(struct nfs_client *new,
continue;
found:
- atomic_inc(&pos->cl_count);
+ refcount_inc(&pos->cl_count);
*result = pos;
status = 0;
break;
@@ -749,7 +749,7 @@ nfs4_find_client_ident(struct net *net, int cb_ident)
spin_lock(&nn->nfs_client_lock);
clp = idr_find(&nn->cb_ident_idr, cb_ident);
if (clp)
- atomic_inc(&clp->cl_count);
+ refcount_inc(&clp->cl_count);
spin_unlock(&nn->nfs_client_lock);
return clp;
}
@@ -793,7 +793,7 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
spin_lock(&nn->nfs_client_lock);
list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
- if (nfs4_cb_match_client(addr, clp, minorversion) == false)
+ if (!nfs4_cb_match_client(addr, clp, minorversion))
continue;
if (!nfs4_has_session(clp))
@@ -804,7 +804,7 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
sid->data, NFS4_MAX_SESSIONID_LEN) != 0)
continue;
- atomic_inc(&clp->cl_count);
+ refcount_inc(&clp->cl_count);
spin_unlock(&nn->nfs_client_lock);
return clp;
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f90090e8c959..56fa5a16e097 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -96,6 +96,10 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
struct nfs_open_context *ctx, struct nfs4_label *ilabel,
struct nfs4_label *olabel);
#ifdef CONFIG_NFS_V4_1
+static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
+ struct rpc_cred *cred,
+ struct nfs4_slot *slot,
+ bool is_privileged);
static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *,
struct rpc_cred *);
static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *,
@@ -254,15 +258,12 @@ const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE
};
const u32 nfs4_fs_locations_bitmap[3] = {
- FATTR4_WORD0_TYPE
- | FATTR4_WORD0_CHANGE
+ FATTR4_WORD0_CHANGE
| FATTR4_WORD0_SIZE
| FATTR4_WORD0_FSID
| FATTR4_WORD0_FILEID
| FATTR4_WORD0_FS_LOCATIONS,
- FATTR4_WORD1_MODE
- | FATTR4_WORD1_NUMLINKS
- | FATTR4_WORD1_OWNER
+ FATTR4_WORD1_OWNER
| FATTR4_WORD1_OWNER_GROUP
| FATTR4_WORD1_RAWDEV
| FATTR4_WORD1_SPACE_USED
@@ -644,13 +645,14 @@ static int nfs40_sequence_done(struct rpc_task *task,
#if defined(CONFIG_NFS_V4_1)
-static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
+static void nfs41_release_slot(struct nfs4_slot *slot)
{
struct nfs4_session *session;
struct nfs4_slot_table *tbl;
- struct nfs4_slot *slot = res->sr_slot;
bool send_new_highest_used_slotid = false;
+ if (!slot)
+ return;
tbl = slot->table;
session = tbl->session;
@@ -676,13 +678,18 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
send_new_highest_used_slotid = false;
out_unlock:
spin_unlock(&tbl->slot_tbl_lock);
- res->sr_slot = NULL;
if (send_new_highest_used_slotid)
nfs41_notify_server(session->clp);
if (waitqueue_active(&tbl->slot_waitq))
wake_up_all(&tbl->slot_waitq);
}
+static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
+{
+ nfs41_release_slot(res->sr_slot);
+ res->sr_slot = NULL;
+}
+
static int nfs41_sequence_process(struct rpc_task *task,
struct nfs4_sequence_res *res)
{
@@ -710,13 +717,6 @@ static int nfs41_sequence_process(struct rpc_task *task,
/* Check the SEQUENCE operation status */
switch (res->sr_status) {
case 0:
- /* If previous op on slot was interrupted and we reused
- * the seq# and got a reply from the cache, then retry
- */
- if (task->tk_status == -EREMOTEIO && interrupted) {
- ++slot->seq_nr;
- goto retry_nowait;
- }
/* Update the slot's sequence and clientid lease timer */
slot->seq_done = 1;
clp = session->clp;
@@ -750,16 +750,16 @@ static int nfs41_sequence_process(struct rpc_task *task,
* The slot id we used was probably retired. Try again
* using a different slot id.
*/
+ if (slot->seq_nr < slot->table->target_highest_slotid)
+ goto session_recover;
goto retry_nowait;
case -NFS4ERR_SEQ_MISORDERED:
/*
* Was the last operation on this sequence interrupted?
* If so, retry after bumping the sequence number.
*/
- if (interrupted) {
- ++slot->seq_nr;
- goto retry_nowait;
- }
+ if (interrupted)
+ goto retry_new_seq;
/*
* Could this slot have been previously retired?
* If so, then the server may be expecting seq_nr = 1!
@@ -768,10 +768,11 @@ static int nfs41_sequence_process(struct rpc_task *task,
slot->seq_nr = 1;
goto retry_nowait;
}
- break;
+ goto session_recover;
case -NFS4ERR_SEQ_FALSE_RETRY:
- ++slot->seq_nr;
- goto retry_nowait;
+ if (interrupted)
+ goto retry_new_seq;
+ goto session_recover;
default:
/* Just update the slot sequence no. */
slot->seq_done = 1;
@@ -781,6 +782,11 @@ out:
dprintk("%s: Error %d free the slot \n", __func__, res->sr_status);
out_noaction:
return ret;
+session_recover:
+ nfs4_schedule_session_recovery(session, res->sr_status);
+ goto retry_nowait;
+retry_new_seq:
+ ++slot->seq_nr;
retry_nowait:
if (rpc_restart_call_prepare(task)) {
nfs41_sequence_free_slot(res);
@@ -857,6 +863,17 @@ static const struct rpc_call_ops nfs41_call_sync_ops = {
.rpc_call_done = nfs41_call_sync_done,
};
+static void
+nfs4_sequence_process_interrupted(struct nfs_client *client,
+ struct nfs4_slot *slot, struct rpc_cred *cred)
+{
+ struct rpc_task *task;
+
+ task = _nfs41_proc_sequence(client, cred, slot, true);
+ if (!IS_ERR(task))
+ rpc_put_task_async(task);
+}
+
#else /* !CONFIG_NFS_V4_1 */
static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res)
@@ -877,9 +894,34 @@ int nfs4_sequence_done(struct rpc_task *task,
}
EXPORT_SYMBOL_GPL(nfs4_sequence_done);
+static void
+nfs4_sequence_process_interrupted(struct nfs_client *client,
+ struct nfs4_slot *slot, struct rpc_cred *cred)
+{
+ WARN_ON_ONCE(1);
+ slot->interrupted = 0;
+}
+
#endif /* !CONFIG_NFS_V4_1 */
-int nfs4_setup_sequence(const struct nfs_client *client,
+static
+void nfs4_sequence_attach_slot(struct nfs4_sequence_args *args,
+ struct nfs4_sequence_res *res,
+ struct nfs4_slot *slot)
+{
+ if (!slot)
+ return;
+ slot->privileged = args->sa_privileged ? 1 : 0;
+ args->sa_slot = slot;
+
+ res->sr_slot = slot;
+ res->sr_timestamp = jiffies;
+ res->sr_status_flags = 0;
+ res->sr_status = 1;
+
+}
+
+int nfs4_setup_sequence(struct nfs_client *client,
struct nfs4_sequence_args *args,
struct nfs4_sequence_res *res,
struct rpc_task *task)
@@ -897,29 +939,28 @@ int nfs4_setup_sequence(const struct nfs_client *client,
task->tk_timeout = 0;
}
- spin_lock(&tbl->slot_tbl_lock);
- /* The state manager will wait until the slot table is empty */
- if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
- goto out_sleep;
+ for (;;) {
+ spin_lock(&tbl->slot_tbl_lock);
+ /* The state manager will wait until the slot table is empty */
+ if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
+ goto out_sleep;
+
+ slot = nfs4_alloc_slot(tbl);
+ if (IS_ERR(slot)) {
+ /* Try again in 1/4 second */
+ if (slot == ERR_PTR(-ENOMEM))
+ task->tk_timeout = HZ >> 2;
+ goto out_sleep;
+ }
+ spin_unlock(&tbl->slot_tbl_lock);
- slot = nfs4_alloc_slot(tbl);
- if (IS_ERR(slot)) {
- /* Try again in 1/4 second */
- if (slot == ERR_PTR(-ENOMEM))
- task->tk_timeout = HZ >> 2;
- goto out_sleep;
+ if (likely(!slot->interrupted))
+ break;
+ nfs4_sequence_process_interrupted(client,
+ slot, task->tk_msg.rpc_cred);
}
- spin_unlock(&tbl->slot_tbl_lock);
-
- slot->privileged = args->sa_privileged ? 1 : 0;
- args->sa_slot = slot;
- res->sr_slot = slot;
- if (session) {
- res->sr_timestamp = jiffies;
- res->sr_status_flags = 0;
- res->sr_status = 1;
- }
+ nfs4_sequence_attach_slot(args, res, slot);
trace_nfs4_setup_sequence(session, args);
out_start:
@@ -1044,6 +1085,12 @@ struct nfs4_opendata {
int rpc_status;
};
+struct nfs4_open_createattrs {
+ struct nfs4_label *label;
+ struct iattr *sattr;
+ const __u32 verf[2];
+};
+
static bool nfs4_clear_cap_atomic_open_v1(struct nfs_server *server,
int err, struct nfs4_exception *exception)
{
@@ -1113,8 +1160,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p)
static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
struct nfs4_state_owner *sp, fmode_t fmode, int flags,
- const struct iattr *attrs,
- struct nfs4_label *label,
+ const struct nfs4_open_createattrs *c,
enum open_claim_type4 claim,
gfp_t gfp_mask)
{
@@ -1122,6 +1168,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
struct inode *dir = d_inode(parent);
struct nfs_server *server = NFS_SERVER(dir);
struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t);
+ struct nfs4_label *label = (c != NULL) ? c->label : NULL;
struct nfs4_opendata *p;
p = kzalloc(sizeof(*p), gfp_mask);
@@ -1187,15 +1234,11 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
p->o_arg.fh = NFS_FH(d_inode(dentry));
}
- if (attrs != NULL && attrs->ia_valid != 0) {
- __u32 verf[2];
-
+ if (c != NULL && c->sattr != NULL && c->sattr->ia_valid != 0) {
p->o_arg.u.attrs = &p->attrs;
- memcpy(&p->attrs, attrs, sizeof(p->attrs));
+ memcpy(&p->attrs, c->sattr, sizeof(p->attrs));
- verf[0] = jiffies;
- verf[1] = current->pid;
- memcpy(p->o_arg.u.verifier.data, verf,
+ memcpy(p->o_arg.u.verifier.data, c->verf,
sizeof(p->o_arg.u.verifier.data));
}
p->c_arg.fh = &p->o_res.fh;
@@ -1334,6 +1377,25 @@ static bool nfs_open_stateid_recover_openmode(struct nfs4_state *state)
}
#endif /* CONFIG_NFS_V4_1 */
+static void nfs_state_log_update_open_stateid(struct nfs4_state *state)
+{
+ if (test_and_clear_bit(NFS_STATE_CHANGE_WAIT, &state->flags))
+ wake_up_all(&state->waitq);
+}
+
+static void nfs_state_log_out_of_order_open_stateid(struct nfs4_state *state,
+ const nfs4_stateid *stateid)
+{
+ u32 state_seqid = be32_to_cpu(state->open_stateid.seqid);
+ u32 stateid_seqid = be32_to_cpu(stateid->seqid);
+
+ if (stateid_seqid == state_seqid + 1U ||
+ (stateid_seqid == 1U && state_seqid == 0xffffffffU))
+ nfs_state_log_update_open_stateid(state);
+ else
+ set_bit(NFS_STATE_CHANGE_WAIT, &state->flags);
+}
+
static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state)
{
struct nfs_client *clp = state->owner->so_server->nfs_client;
@@ -1349,18 +1411,32 @@ static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state)
nfs4_state_mark_reclaim_nograce(clp, state);
}
+/*
+ * Check for whether or not the caller may update the open stateid
+ * to the value passed in by stateid.
+ *
+ * Note: This function relies heavily on the server implementing
+ * RFC7530 Section 9.1.4.2, and RFC5661 Section 8.2.2
+ * correctly.
+ * i.e. The stateid seqids have to be initialised to 1, and
+ * are then incremented on every state transition.
+ */
static bool nfs_need_update_open_stateid(struct nfs4_state *state,
- const nfs4_stateid *stateid, nfs4_stateid *freeme)
+ const nfs4_stateid *stateid)
{
- if (test_and_set_bit(NFS_OPEN_STATE, &state->flags) == 0)
- return true;
- if (!nfs4_stateid_match_other(stateid, &state->open_stateid)) {
- nfs4_stateid_copy(freeme, &state->open_stateid);
- nfs_test_and_clear_all_open_stateid(state);
+ if (test_bit(NFS_OPEN_STATE, &state->flags) == 0 ||
+ !nfs4_stateid_match_other(stateid, &state->open_stateid)) {
+ if (stateid->seqid == cpu_to_be32(1))
+ nfs_state_log_update_open_stateid(state);
+ else
+ set_bit(NFS_STATE_CHANGE_WAIT, &state->flags);
return true;
}
- if (nfs4_stateid_is_newer(stateid, &state->open_stateid))
+
+ if (nfs4_stateid_is_newer(stateid, &state->open_stateid)) {
+ nfs_state_log_out_of_order_open_stateid(state, stateid);
return true;
+ }
return false;
}
@@ -1399,11 +1475,14 @@ static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
if (nfs4_stateid_match_other(stateid, &state->open_stateid) &&
!nfs4_stateid_is_newer(stateid, &state->open_stateid)) {
nfs_resync_open_stateid_locked(state);
- return;
+ goto out;
}
if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
nfs4_stateid_copy(&state->stateid, stateid);
nfs4_stateid_copy(&state->open_stateid, stateid);
+ trace_nfs4_open_stateid_update(state->inode, stateid, 0);
+out:
+ nfs_state_log_update_open_stateid(state);
}
static void nfs_clear_open_stateid(struct nfs4_state *state,
@@ -1420,29 +1499,60 @@ static void nfs_clear_open_stateid(struct nfs4_state *state,
}
static void nfs_set_open_stateid_locked(struct nfs4_state *state,
- const nfs4_stateid *stateid, fmode_t fmode,
- nfs4_stateid *freeme)
+ const nfs4_stateid *stateid, nfs4_stateid *freeme)
{
- switch (fmode) {
- case FMODE_READ:
- set_bit(NFS_O_RDONLY_STATE, &state->flags);
+ DEFINE_WAIT(wait);
+ int status = 0;
+ for (;;) {
+
+ if (!nfs_need_update_open_stateid(state, stateid))
+ return;
+ if (!test_bit(NFS_STATE_CHANGE_WAIT, &state->flags))
break;
- case FMODE_WRITE:
- set_bit(NFS_O_WRONLY_STATE, &state->flags);
+ if (status)
break;
- case FMODE_READ|FMODE_WRITE:
- set_bit(NFS_O_RDWR_STATE, &state->flags);
+ /* Rely on seqids for serialisation with NFSv4.0 */
+ if (!nfs4_has_session(NFS_SERVER(state->inode)->nfs_client))
+ break;
+
+ prepare_to_wait(&state->waitq, &wait, TASK_KILLABLE);
+ /*
+ * Ensure we process the state changes in the same order
+ * in which the server processed them by delaying the
+ * update of the stateid until we are in sequence.
+ */
+ write_sequnlock(&state->seqlock);
+ spin_unlock(&state->owner->so_lock);
+ rcu_read_unlock();
+ trace_nfs4_open_stateid_update_wait(state->inode, stateid, 0);
+ if (!signal_pending(current)) {
+ if (schedule_timeout(5*HZ) == 0)
+ status = -EAGAIN;
+ else
+ status = 0;
+ } else
+ status = -EINTR;
+ finish_wait(&state->waitq, &wait);
+ rcu_read_lock();
+ spin_lock(&state->owner->so_lock);
+ write_seqlock(&state->seqlock);
}
- if (!nfs_need_update_open_stateid(state, stateid, freeme))
- return;
+
+ if (test_bit(NFS_OPEN_STATE, &state->flags) &&
+ !nfs4_stateid_match_other(stateid, &state->open_stateid)) {
+ nfs4_stateid_copy(freeme, &state->open_stateid);
+ nfs_test_and_clear_all_open_stateid(state);
+ }
+
if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
nfs4_stateid_copy(&state->stateid, stateid);
nfs4_stateid_copy(&state->open_stateid, stateid);
+ trace_nfs4_open_stateid_update(state->inode, stateid, status);
+ nfs_state_log_update_open_stateid(state);
}
-static void __update_open_stateid(struct nfs4_state *state,
+static void nfs_state_set_open_stateid(struct nfs4_state *state,
const nfs4_stateid *open_stateid,
- const nfs4_stateid *deleg_stateid,
fmode_t fmode,
nfs4_stateid *freeme)
{
@@ -1450,17 +1560,34 @@ static void __update_open_stateid(struct nfs4_state *state,
* Protect the call to nfs4_state_set_mode_locked and
* serialise the stateid update
*/
- spin_lock(&state->owner->so_lock);
write_seqlock(&state->seqlock);
- if (deleg_stateid != NULL) {
- nfs4_stateid_copy(&state->stateid, deleg_stateid);
- set_bit(NFS_DELEGATED_STATE, &state->flags);
+ nfs_set_open_stateid_locked(state, open_stateid, freeme);
+ switch (fmode) {
+ case FMODE_READ:
+ set_bit(NFS_O_RDONLY_STATE, &state->flags);
+ break;
+ case FMODE_WRITE:
+ set_bit(NFS_O_WRONLY_STATE, &state->flags);
+ break;
+ case FMODE_READ|FMODE_WRITE:
+ set_bit(NFS_O_RDWR_STATE, &state->flags);
}
- if (open_stateid != NULL)
- nfs_set_open_stateid_locked(state, open_stateid, fmode, freeme);
+ set_bit(NFS_OPEN_STATE, &state->flags);
+ write_sequnlock(&state->seqlock);
+}
+
+static void nfs_state_set_delegation(struct nfs4_state *state,
+ const nfs4_stateid *deleg_stateid,
+ fmode_t fmode)
+{
+ /*
+ * Protect the call to nfs4_state_set_mode_locked and
+ * serialise the stateid update
+ */
+ write_seqlock(&state->seqlock);
+ nfs4_stateid_copy(&state->stateid, deleg_stateid);
+ set_bit(NFS_DELEGATED_STATE, &state->flags);
write_sequnlock(&state->seqlock);
- update_open_stateflags(state, fmode);
- spin_unlock(&state->owner->so_lock);
}
static int update_open_stateid(struct nfs4_state *state,
@@ -1478,6 +1605,12 @@ static int update_open_stateid(struct nfs4_state *state,
fmode &= (FMODE_READ|FMODE_WRITE);
rcu_read_lock();
+ spin_lock(&state->owner->so_lock);
+ if (open_stateid != NULL) {
+ nfs_state_set_open_stateid(state, open_stateid, fmode, &freeme);
+ ret = 1;
+ }
+
deleg_cur = rcu_dereference(nfsi->delegation);
if (deleg_cur == NULL)
goto no_delegation;
@@ -1494,18 +1627,16 @@ static int update_open_stateid(struct nfs4_state *state,
goto no_delegation_unlock;
nfs_mark_delegation_referenced(deleg_cur);
- __update_open_stateid(state, open_stateid, &deleg_cur->stateid,
- fmode, &freeme);
+ nfs_state_set_delegation(state, &deleg_cur->stateid, fmode);
ret = 1;
no_delegation_unlock:
spin_unlock(&deleg_cur->lock);
no_delegation:
+ if (ret)
+ update_open_stateflags(state, fmode);
+ spin_unlock(&state->owner->so_lock);
rcu_read_unlock();
- if (!ret && open_stateid != NULL) {
- __update_open_stateid(state, open_stateid, NULL, fmode, &freeme);
- ret = 1;
- }
if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags))
nfs4_schedule_state_manager(clp);
if (freeme.type != 0)
@@ -1761,7 +1892,7 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context
struct nfs4_opendata *opendata;
opendata = nfs4_opendata_alloc(ctx->dentry, state->owner, 0, 0,
- NULL, NULL, claim, GFP_NOFS);
+ NULL, claim, GFP_NOFS);
if (opendata == NULL)
return ERR_PTR(-ENOMEM);
opendata->state = state;
@@ -2518,7 +2649,7 @@ static int nfs41_check_expired_locks(struct nfs4_state *state)
if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) {
struct rpc_cred *cred = lsp->ls_state->owner->so_cred;
- atomic_inc(&lsp->ls_count);
+ refcount_inc(&lsp->ls_count);
spin_unlock(&state->state_lock);
nfs4_put_lock_state(prev);
@@ -2692,8 +2823,7 @@ out:
static int _nfs4_do_open(struct inode *dir,
struct nfs_open_context *ctx,
int flags,
- struct iattr *sattr,
- struct nfs4_label *label,
+ const struct nfs4_open_createattrs *c,
int *opened)
{
struct nfs4_state_owner *sp;
@@ -2705,6 +2835,8 @@ static int _nfs4_do_open(struct inode *dir,
struct nfs4_threshold **ctx_th = &ctx->mdsthreshold;
fmode_t fmode = ctx->mode & (FMODE_READ|FMODE_WRITE|FMODE_EXEC);
enum open_claim_type4 claim = NFS4_OPEN_CLAIM_NULL;
+ struct iattr *sattr = c->sattr;
+ struct nfs4_label *label = c->label;
struct nfs4_label *olabel = NULL;
int status;
@@ -2723,8 +2855,8 @@ static int _nfs4_do_open(struct inode *dir,
status = -ENOMEM;
if (d_really_is_positive(dentry))
claim = NFS4_OPEN_CLAIM_FH;
- opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr,
- label, claim, GFP_KERNEL);
+ opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags,
+ c, claim, GFP_KERNEL);
if (opendata == NULL)
goto err_put_state_owner;
@@ -2805,10 +2937,18 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir,
struct nfs_server *server = NFS_SERVER(dir);
struct nfs4_exception exception = { };
struct nfs4_state *res;
+ struct nfs4_open_createattrs c = {
+ .label = label,
+ .sattr = sattr,
+ .verf = {
+ [0] = (__u32)jiffies,
+ [1] = (__u32)current->pid,
+ },
+ };
int status;
do {
- status = _nfs4_do_open(dir, ctx, flags, sattr, label, opened);
+ status = _nfs4_do_open(dir, ctx, flags, &c, opened);
res = ctx->state;
trace_nfs4_open_file(ctx, flags, status);
if (status == 0)
@@ -3024,18 +3164,20 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
calldata->arg.lr_args = NULL;
calldata->res.lr_res = NULL;
break;
+ case -NFS4ERR_OLD_STATEID:
+ if (nfs4_refresh_layout_stateid(&calldata->arg.lr_args->stateid,
+ calldata->inode))
+ goto lr_restart;
+ /* Fallthrough */
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_EXPIRED:
case -NFS4ERR_BAD_STATEID:
- case -NFS4ERR_OLD_STATEID:
case -NFS4ERR_UNKNOWN_LAYOUTTYPE:
case -NFS4ERR_WRONG_CRED:
calldata->arg.lr_args = NULL;
calldata->res.lr_res = NULL;
- calldata->res.lr_ret = 0;
- rpc_restart_call_prepare(task);
- return;
+ goto lr_restart;
}
}
@@ -3051,39 +3193,43 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
if (calldata->arg.bitmask != NULL) {
calldata->arg.bitmask = NULL;
calldata->res.fattr = NULL;
- task->tk_status = 0;
- rpc_restart_call_prepare(task);
- goto out_release;
+ goto out_restart;
}
break;
+ case -NFS4ERR_OLD_STATEID:
+ /* Did we race with OPEN? */
+ if (nfs4_refresh_open_stateid(&calldata->arg.stateid,
+ state))
+ goto out_restart;
+ goto out_release;
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
nfs4_free_revoked_stateid(server,
&calldata->arg.stateid,
task->tk_msg.rpc_cred);
- case -NFS4ERR_OLD_STATEID:
+ /* Fallthrough */
case -NFS4ERR_BAD_STATEID:
- if (!nfs4_stateid_match(&calldata->arg.stateid,
- &state->open_stateid)) {
- rpc_restart_call_prepare(task);
- goto out_release;
- }
- if (calldata->arg.fmode == 0)
- break;
+ break;
default:
- if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) {
- rpc_restart_call_prepare(task);
- goto out_release;
- }
+ if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN)
+ goto out_restart;
}
nfs_clear_open_stateid(state, &calldata->arg.stateid,
res_stateid, calldata->arg.fmode);
out_release:
+ task->tk_status = 0;
nfs_release_seqid(calldata->arg.seqid);
nfs_refresh_inode(calldata->inode, &calldata->fattr);
dprintk("%s: done, ret = %d!\n", __func__, task->tk_status);
+ return;
+lr_restart:
+ calldata->res.lr_ret = 0;
+out_restart:
+ task->tk_status = 0;
+ rpc_restart_call_prepare(task);
+ goto out_release;
}
static void nfs4_close_prepare(struct rpc_task *task, void *data)
@@ -3103,7 +3249,6 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags);
is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags);
is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags);
- nfs4_stateid_copy(&calldata->arg.stateid, &state->open_stateid);
/* Calculate the change in open mode */
calldata->arg.fmode = 0;
if (state->n_rdwr == 0) {
@@ -3121,7 +3266,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
calldata->arg.fmode |= FMODE_READ|FMODE_WRITE;
if (!nfs4_valid_open_stateid(state) ||
- test_bit(NFS_OPEN_STATE, &state->flags) == 0)
+ !nfs4_refresh_open_stateid(&calldata->arg.stateid, state))
call_close = 0;
spin_unlock(&state->owner->so_lock);
@@ -3215,6 +3360,8 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait)
calldata->inode = state->inode;
calldata->state = state;
calldata->arg.fh = NFS_FH(state->inode);
+ if (!nfs4_copy_open_stateid(&calldata->arg.stateid, state))
+ goto out_free_calldata;
/* Serialization for the sequence id */
alloc_seqid = server->nfs_client->cl_mvops->alloc_seqid;
calldata->arg.seqid = alloc_seqid(&state->owner->so_seqid, gfp_mask);
@@ -3889,6 +4036,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
struct nfs4_accessargs args = {
.fh = NFS_FH(inode),
.bitmask = server->cache_consistency_bitmask,
+ .access = entry->mask,
};
struct nfs4_accessres res = {
.server = server,
@@ -3899,26 +4047,8 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
.rpc_resp = &res,
.rpc_cred = entry->cred,
};
- int mode = entry->mask;
int status = 0;
- /*
- * Determine which access bits we want to ask for...
- */
- if (mode & MAY_READ)
- args.access |= NFS4_ACCESS_READ;
- if (S_ISDIR(inode->i_mode)) {
- if (mode & MAY_WRITE)
- args.access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE;
- if (mode & MAY_EXEC)
- args.access |= NFS4_ACCESS_LOOKUP;
- } else {
- if (mode & MAY_WRITE)
- args.access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND;
- if (mode & MAY_EXEC)
- args.access |= NFS4_ACCESS_EXECUTE;
- }
-
res.fattr = nfs_alloc_fattr();
if (res.fattr == NULL)
return -ENOMEM;
@@ -4843,7 +4973,7 @@ static void nfs4_renew_release(void *calldata)
struct nfs4_renewdata *data = calldata;
struct nfs_client *clp = data->client;
- if (atomic_read(&clp->cl_count) > 1)
+ if (refcount_read(&clp->cl_count) > 1)
nfs4_schedule_state_renewal(clp);
nfs_put_client(clp);
kfree(data);
@@ -4891,7 +5021,7 @@ static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred,
if (renew_flags == 0)
return 0;
- if (!atomic_inc_not_zero(&clp->cl_count))
+ if (!refcount_inc_not_zero(&clp->cl_count))
return -EIO;
data = kmalloc(sizeof(*data), GFP_NOFS);
if (data == NULL) {
@@ -5643,18 +5773,20 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
data->args.lr_args = NULL;
data->res.lr_res = NULL;
break;
+ case -NFS4ERR_OLD_STATEID:
+ if (nfs4_refresh_layout_stateid(&data->args.lr_args->stateid,
+ data->inode))
+ goto lr_restart;
+ /* Fallthrough */
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_EXPIRED:
case -NFS4ERR_BAD_STATEID:
- case -NFS4ERR_OLD_STATEID:
case -NFS4ERR_UNKNOWN_LAYOUTTYPE:
case -NFS4ERR_WRONG_CRED:
data->args.lr_args = NULL;
data->res.lr_res = NULL;
- data->res.lr_ret = 0;
- rpc_restart_call_prepare(task);
- return;
+ goto lr_restart;
}
}
@@ -5668,27 +5800,36 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
nfs4_free_revoked_stateid(data->res.server,
data->args.stateid,
task->tk_msg.rpc_cred);
+ /* Fallthrough */
case -NFS4ERR_BAD_STATEID:
- case -NFS4ERR_OLD_STATEID:
case -NFS4ERR_STALE_STATEID:
task->tk_status = 0;
break;
+ case -NFS4ERR_OLD_STATEID:
+ if (nfs4_refresh_delegation_stateid(&data->stateid, data->inode))
+ goto out_restart;
+ task->tk_status = 0;
+ break;
case -NFS4ERR_ACCESS:
if (data->args.bitmask) {
data->args.bitmask = NULL;
data->res.fattr = NULL;
- task->tk_status = 0;
- rpc_restart_call_prepare(task);
- return;
+ goto out_restart;
}
+ /* Fallthrough */
default:
if (nfs4_async_handle_error(task, data->res.server,
NULL, NULL) == -EAGAIN) {
- rpc_restart_call_prepare(task);
- return;
+ goto out_restart;
}
}
data->rpc_status = task->tk_status;
+ return;
+lr_restart:
+ data->res.lr_ret = 0;
+out_restart:
+ task->tk_status = 0;
+ rpc_restart_call_prepare(task);
}
static void nfs4_delegreturn_release(void *calldata)
@@ -5896,7 +6037,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
p->arg.seqid = seqid;
p->res.seqid = seqid;
p->lsp = lsp;
- atomic_inc(&lsp->ls_count);
+ refcount_inc(&lsp->ls_count);
/* Ensure we don't close file until we're done freeing locks! */
p->ctx = get_nfs_open_context(ctx);
p->l_ctx = nfs_get_lock_context(ctx);
@@ -6112,7 +6253,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
p->res.lock_seqid = p->arg.lock_seqid;
p->lsp = lsp;
p->server = server;
- atomic_inc(&lsp->ls_count);
+ refcount_inc(&lsp->ls_count);
p->ctx = get_nfs_open_context(ctx);
memcpy(&p->fl, fl, sizeof(p->fl));
return p;
@@ -6568,6 +6709,20 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
!test_bit(NFS_STATE_POSIX_LOCKS, &state->flags))
return -ENOLCK;
+ /*
+ * Don't rely on the VFS having checked the file open mode,
+ * since it won't do this for flock() locks.
+ */
+ switch (request->fl_type) {
+ case F_RDLCK:
+ if (!(filp->f_mode & FMODE_READ))
+ return -EBADF;
+ break;
+ case F_WRLCK:
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+ }
+
status = nfs4_set_lock_state(state, request);
if (status != 0)
return status;
@@ -6763,9 +6918,7 @@ static int _nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir,
struct page *page)
{
struct nfs_server *server = NFS_SERVER(dir);
- u32 bitmask[3] = {
- [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS,
- };
+ u32 bitmask[3];
struct nfs4_fs_locations_arg args = {
.dir_fh = NFS_FH(dir),
.name = name,
@@ -6784,12 +6937,15 @@ static int _nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir,
dprintk("%s: start\n", __func__);
+ bitmask[0] = nfs4_fattr_bitmap[0] | FATTR4_WORD0_FS_LOCATIONS;
+ bitmask[1] = nfs4_fattr_bitmap[1];
+
/* Ask for the fileid of the absent filesystem if mounted_on_fileid
* is not supported */
if (NFS_SERVER(dir)->attr_bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
- bitmask[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID;
+ bitmask[0] &= ~FATTR4_WORD0_FILEID;
else
- bitmask[0] |= FATTR4_WORD0_FILEID;
+ bitmask[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
nfs_fattr_init(&fs_locations->fattr);
fs_locations->server = server;
@@ -7472,7 +7628,7 @@ nfs4_run_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
struct nfs41_exchange_id_data *calldata;
int status;
- if (!atomic_inc_not_zero(&clp->cl_count))
+ if (!refcount_inc_not_zero(&clp->cl_count))
return ERR_PTR(-EIO);
status = -ENOMEM;
@@ -8072,7 +8228,7 @@ static void nfs41_sequence_release(void *data)
struct nfs4_sequence_data *calldata = data;
struct nfs_client *clp = calldata->clp;
- if (atomic_read(&clp->cl_count) > 1)
+ if (refcount_read(&clp->cl_count) > 1)
nfs4_schedule_state_renewal(clp);
nfs_put_client(clp);
kfree(calldata);
@@ -8101,7 +8257,7 @@ static void nfs41_sequence_call_done(struct rpc_task *task, void *data)
trace_nfs4_sequence(clp, task->tk_status);
if (task->tk_status < 0) {
dprintk("%s ERROR %d\n", __func__, task->tk_status);
- if (atomic_read(&clp->cl_count) == 1)
+ if (refcount_read(&clp->cl_count) == 1)
goto out;
if (nfs41_sequence_handle_errors(task, clp) == -EAGAIN) {
@@ -8135,6 +8291,7 @@ static const struct rpc_call_ops nfs41_sequence_ops = {
static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
struct rpc_cred *cred,
+ struct nfs4_slot *slot,
bool is_privileged)
{
struct nfs4_sequence_data *calldata;
@@ -8148,15 +8305,18 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
.callback_ops = &nfs41_sequence_ops,
.flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
};
+ struct rpc_task *ret;
- if (!atomic_inc_not_zero(&clp->cl_count))
- return ERR_PTR(-EIO);
+ ret = ERR_PTR(-EIO);
+ if (!refcount_inc_not_zero(&clp->cl_count))
+ goto out_err;
+
+ ret = ERR_PTR(-ENOMEM);
calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
- if (calldata == NULL) {
- nfs_put_client(clp);
- return ERR_PTR(-ENOMEM);
- }
+ if (calldata == NULL)
+ goto out_put_clp;
nfs4_init_sequence(&calldata->args, &calldata->res, 0);
+ nfs4_sequence_attach_slot(&calldata->args, &calldata->res, slot);
if (is_privileged)
nfs4_set_sequence_privileged(&calldata->args);
msg.rpc_argp = &calldata->args;
@@ -8164,7 +8324,15 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
calldata->clp = clp;
task_setup_data.callback_data = calldata;
- return rpc_run_task(&task_setup_data);
+ ret = rpc_run_task(&task_setup_data);
+ if (IS_ERR(ret))
+ goto out_err;
+ return ret;
+out_put_clp:
+ nfs_put_client(clp);
+out_err:
+ nfs41_release_slot(slot);
+ return ret;
}
static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags)
@@ -8174,7 +8342,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0)
return -EAGAIN;
- task = _nfs41_proc_sequence(clp, cred, false);
+ task = _nfs41_proc_sequence(clp, cred, NULL, false);
if (IS_ERR(task))
ret = PTR_ERR(task);
else
@@ -8188,7 +8356,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
struct rpc_task *task;
int ret;
- task = _nfs41_proc_sequence(clp, cred, true);
+ task = _nfs41_proc_sequence(clp, cred, NULL, true);
if (IS_ERR(task)) {
ret = PTR_ERR(task);
goto out;
@@ -8588,18 +8756,27 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
server = NFS_SERVER(lrp->args.inode);
switch (task->tk_status) {
+ case -NFS4ERR_OLD_STATEID:
+ if (nfs4_refresh_layout_stateid(&lrp->args.stateid,
+ lrp->args.inode))
+ goto out_restart;
+ /* Fallthrough */
default:
task->tk_status = 0;
+ /* Fallthrough */
case 0:
break;
case -NFS4ERR_DELAY:
if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN)
break;
- nfs4_sequence_free_slot(&lrp->res.seq_res);
- rpc_restart_call_prepare(task);
- return;
+ goto out_restart;
}
dprintk("<-- %s\n", __func__);
+ return;
+out_restart:
+ task->tk_status = 0;
+ nfs4_sequence_free_slot(&lrp->res.seq_res);
+ rpc_restart_call_prepare(task);
}
static void nfs4_layoutreturn_release(void *calldata)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 0378e2257ca7..54fd56d715a8 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -69,6 +69,14 @@ const nfs4_stateid zero_stateid = {
{ .data = { 0 } },
.type = NFS4_SPECIAL_STATEID_TYPE,
};
+const nfs4_stateid invalid_stateid = {
+ {
+ .seqid = cpu_to_be32(0xffffffffU),
+ .other = { 0 },
+ },
+ .type = NFS4_INVALID_STATEID_TYPE,
+};
+
static DEFINE_MUTEX(nfs_clid_init_mutex);
int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
@@ -645,6 +653,7 @@ nfs4_alloc_open_state(void)
INIT_LIST_HEAD(&state->lock_states);
spin_lock_init(&state->state_lock);
seqlock_init(&state->seqlock);
+ init_waitqueue_head(&state->waitq);
return state;
}
@@ -825,7 +834,7 @@ __nfs4_find_lock_state(struct nfs4_state *state,
ret = pos;
}
if (ret)
- atomic_inc(&ret->ls_count);
+ refcount_inc(&ret->ls_count);
return ret;
}
@@ -843,7 +852,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
if (lsp == NULL)
return NULL;
nfs4_init_seqid_counter(&lsp->ls_seqid);
- atomic_set(&lsp->ls_count, 1);
+ refcount_set(&lsp->ls_count, 1);
lsp->ls_state = state;
lsp->ls_owner = fl_owner;
lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS);
@@ -907,7 +916,7 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
if (lsp == NULL)
return;
state = lsp->ls_state;
- if (!atomic_dec_and_lock(&lsp->ls_count, &state->state_lock))
+ if (!refcount_dec_and_lock(&lsp->ls_count, &state->state_lock))
return;
list_del(&lsp->ls_locks);
if (list_empty(&state->lock_states))
@@ -927,7 +936,7 @@ static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
struct nfs4_lock_state *lsp = src->fl_u.nfs4_fl.owner;
dst->fl_u.nfs4_fl.owner = lsp;
- atomic_inc(&lsp->ls_count);
+ refcount_inc(&lsp->ls_count);
}
static void nfs4_fl_release_lock(struct file_lock *fl)
@@ -985,18 +994,39 @@ out:
return ret;
}
-static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
+bool nfs4_refresh_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
+{
+ bool ret;
+ int seq;
+
+ do {
+ ret = false;
+ seq = read_seqbegin(&state->seqlock);
+ if (nfs4_state_match_open_stateid_other(state, dst)) {
+ dst->seqid = state->open_stateid.seqid;
+ ret = true;
+ }
+ } while (read_seqretry(&state->seqlock, seq));
+ return ret;
+}
+
+bool nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
{
+ bool ret;
const nfs4_stateid *src;
int seq;
do {
+ ret = false;
src = &zero_stateid;
seq = read_seqbegin(&state->seqlock);
- if (test_bit(NFS_OPEN_STATE, &state->flags))
+ if (test_bit(NFS_OPEN_STATE, &state->flags)) {
src = &state->open_stateid;
+ ret = true;
+ }
nfs4_stateid_copy(dst, src);
} while (read_seqretry(&state->seqlock, seq));
+ return ret;
}
/*
@@ -1177,7 +1207,7 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
return;
__module_get(THIS_MODULE);
- atomic_inc(&clp->cl_count);
+ refcount_inc(&clp->cl_count);
/* The rcu_read_lock() is not strictly necessary, as the state
* manager is the only thread that ever changes the rpc_xprt
@@ -1269,7 +1299,7 @@ int nfs4_wait_clnt_recover(struct nfs_client *clp)
might_sleep();
- atomic_inc(&clp->cl_count);
+ refcount_inc(&clp->cl_count);
res = wait_on_bit_action(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
nfs_wait_bit_killable, TASK_KILLABLE);
if (res)
@@ -1409,6 +1439,11 @@ void nfs_inode_find_state_and_recover(struct inode *inode,
found = true;
continue;
}
+ if (nfs4_stateid_match_other(&state->open_stateid, stateid) &&
+ nfs4_state_mark_reclaim_nograce(clp, state)) {
+ found = true;
+ continue;
+ }
if (nfs_state_lock_state_matches_stateid(state, stateid) &&
nfs4_state_mark_reclaim_nograce(clp, state))
found = true;
@@ -2510,7 +2545,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
break;
if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
break;
- } while (atomic_read(&clp->cl_count) > 1);
+ } while (refcount_read(&clp->cl_count) > 1);
return;
out_error:
if (strlen(section))
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index e7c6275519b0..a275fba93170 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -202,17 +202,13 @@ DECLARE_EVENT_CLASS(nfs4_clientid_event,
TP_ARGS(clp, error),
TP_STRUCT__entry(
- __string(dstaddr,
- rpc_peeraddr2str(clp->cl_rpcclient,
- RPC_DISPLAY_ADDR))
+ __string(dstaddr, clp->cl_hostname)
__field(int, error)
),
TP_fast_assign(
__entry->error = error;
- __assign_str(dstaddr,
- rpc_peeraddr2str(clp->cl_rpcclient,
- RPC_DISPLAY_ADDR));
+ __assign_str(dstaddr, clp->cl_hostname);
),
TP_printk(
@@ -1066,6 +1062,8 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_event,
DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_setattr);
DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_delegreturn);
+DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update);
+DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update_wait);
DECLARE_EVENT_CLASS(nfs4_getattr_event,
TP_PROTO(
@@ -1133,9 +1131,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event,
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
- __string(dstaddr, clp ?
- rpc_peeraddr2str(clp->cl_rpcclient,
- RPC_DISPLAY_ADDR) : "unknown")
+ __string(dstaddr, clp ? clp->cl_hostname : "unknown")
),
TP_fast_assign(
@@ -1148,9 +1144,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event,
__entry->fileid = 0;
__entry->dev = 0;
}
- __assign_str(dstaddr, clp ?
- rpc_peeraddr2str(clp->cl_rpcclient,
- RPC_DISPLAY_ADDR) : "unknown")
+ __assign_str(dstaddr, clp ? clp->cl_hostname : "unknown")
),
TP_printk(
@@ -1192,9 +1186,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event,
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
- __string(dstaddr, clp ?
- rpc_peeraddr2str(clp->cl_rpcclient,
- RPC_DISPLAY_ADDR) : "unknown")
+ __string(dstaddr, clp ? clp->cl_hostname : "unknown")
__field(int, stateid_seq)
__field(u32, stateid_hash)
),
@@ -1209,9 +1201,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event,
__entry->fileid = 0;
__entry->dev = 0;
}
- __assign_str(dstaddr, clp ?
- rpc_peeraddr2str(clp->cl_rpcclient,
- RPC_DISPLAY_ADDR) : "unknown")
+ __assign_str(dstaddr, clp ? clp->cl_hostname : "unknown")
__entry->stateid_seq =
be32_to_cpu(stateid->seqid);
__entry->stateid_hash =
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 14ed9791ec9c..77c6729e57f0 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4385,6 +4385,14 @@ static int decode_delegation_stateid(struct xdr_stream *xdr, nfs4_stateid *state
return decode_stateid(xdr, stateid);
}
+static int decode_invalid_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
+{
+ nfs4_stateid dummy;
+
+ nfs4_stateid_copy(stateid, &invalid_stateid);
+ return decode_stateid(xdr, &dummy);
+}
+
static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
{
int status;
@@ -4393,7 +4401,7 @@ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
if (status != -EIO)
nfs_increment_open_seqid(status, res->seqid);
if (!status)
- status = decode_open_stateid(xdr, &res->stateid);
+ status = decode_invalid_stateid(xdr, &res->stateid);
return status;
}
@@ -6108,6 +6116,8 @@ static int decode_layoutreturn(struct xdr_stream *xdr,
res->lrs_present = be32_to_cpup(p);
if (res->lrs_present)
status = decode_layout_stateid(xdr, &res->stateid);
+ else
+ nfs4_stateid_copy(&res->stateid, &invalid_stateid);
return status;
out_overflow:
print_overflow_msg(__func__, xdr);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 3bcd669a3152..d602fe9e1ac8 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -251,7 +251,7 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
void
pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo)
{
- atomic_inc(&lo->plh_refcount);
+ refcount_inc(&lo->plh_refcount);
}
static struct pnfs_layout_hdr *
@@ -296,7 +296,7 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
pnfs_layoutreturn_before_put_layout_hdr(lo);
- if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
+ if (refcount_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
if (!list_empty(&lo->plh_segs))
WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n");
pnfs_detach_layout_hdr(lo);
@@ -355,6 +355,24 @@ pnfs_clear_lseg_state(struct pnfs_layout_segment *lseg,
}
/*
+ * Update the seqid of a layout stateid
+ */
+bool nfs4_refresh_layout_stateid(nfs4_stateid *dst, struct inode *inode)
+{
+ struct pnfs_layout_hdr *lo;
+ bool ret = false;
+
+ spin_lock(&inode->i_lock);
+ lo = NFS_I(inode)->layout;
+ if (lo && nfs4_stateid_match_other(dst, &lo->plh_stateid)) {
+ dst->seqid = lo->plh_stateid.seqid;
+ ret = true;
+ }
+ spin_unlock(&inode->i_lock);
+ return ret;
+}
+
+/*
* Mark a pnfs_layout_hdr and all associated layout segments as invalid
*
* In order to continue using the pnfs_layout_hdr, a full recovery
@@ -395,14 +413,14 @@ pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit)
{
lo->plh_retry_timestamp = jiffies;
if (!test_and_set_bit(fail_bit, &lo->plh_flags))
- atomic_inc(&lo->plh_refcount);
+ refcount_inc(&lo->plh_refcount);
}
static void
pnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit)
{
if (test_and_clear_bit(fail_bit, &lo->plh_flags))
- atomic_dec(&lo->plh_refcount);
+ refcount_dec(&lo->plh_refcount);
}
static void
@@ -450,7 +468,7 @@ pnfs_init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg,
{
INIT_LIST_HEAD(&lseg->pls_list);
INIT_LIST_HEAD(&lseg->pls_lc_list);
- atomic_set(&lseg->pls_refcount, 1);
+ refcount_set(&lseg->pls_refcount, 1);
set_bit(NFS_LSEG_VALID, &lseg->pls_flags);
lseg->pls_layout = lo;
lseg->pls_range = *range;
@@ -472,7 +490,7 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
list_del_init(&lseg->pls_list);
/* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */
- atomic_dec(&lo->plh_refcount);
+ refcount_dec(&lo->plh_refcount);
if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
return;
if (list_empty(&lo->plh_segs) &&
@@ -507,13 +525,13 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg)
return;
dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
- atomic_read(&lseg->pls_refcount),
+ refcount_read(&lseg->pls_refcount),
test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
lo = lseg->pls_layout;
inode = lo->plh_inode;
- if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
+ if (refcount_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
spin_unlock(&inode->i_lock);
return;
@@ -551,7 +569,7 @@ pnfs_lseg_range_contained(const struct pnfs_layout_range *l1,
static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
struct list_head *tmp_list)
{
- if (!atomic_dec_and_test(&lseg->pls_refcount))
+ if (!refcount_dec_and_test(&lseg->pls_refcount))
return false;
pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
list_add(&lseg->pls_list, tmp_list);
@@ -570,7 +588,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
* outstanding io is finished.
*/
dprintk("%s: lseg %p ref %d\n", __func__, lseg,
- atomic_read(&lseg->pls_refcount));
+ refcount_read(&lseg->pls_refcount));
if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list))
rv = 1;
}
@@ -1451,7 +1469,7 @@ alloc_init_layout_hdr(struct inode *ino,
lo = pnfs_alloc_layout_hdr(ino, gfp_flags);
if (!lo)
return NULL;
- atomic_set(&lo->plh_refcount, 1);
+ refcount_set(&lo->plh_refcount, 1);
INIT_LIST_HEAD(&lo->plh_layouts);
INIT_LIST_HEAD(&lo->plh_segs);
INIT_LIST_HEAD(&lo->plh_return_segs);
@@ -1513,7 +1531,7 @@ pnfs_lseg_range_match(const struct pnfs_layout_range *ls_range,
if ((range->iomode == IOMODE_RW &&
ls_range->iomode != IOMODE_RW) ||
(range->iomode != ls_range->iomode &&
- strict_iomode == true) ||
+ strict_iomode) ||
!pnfs_lseg_range_intersecting(ls_range, range))
return 0;
@@ -1546,7 +1564,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
}
dprintk("%s:Return lseg %p ref %d\n",
- __func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0);
+ __func__, ret, ret ? refcount_read(&ret->pls_refcount) : 0);
return ret;
}
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 87f144f14d1e..8d507c361d98 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -30,6 +30,7 @@
#ifndef FS_NFS_PNFS_H
#define FS_NFS_PNFS_H
+#include <linux/refcount.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
#include <linux/workqueue.h>
@@ -54,7 +55,7 @@ struct nfs4_pnfs_ds {
char *ds_remotestr; /* comma sep list of addrs */
struct list_head ds_addrs;
struct nfs_client *ds_clp;
- atomic_t ds_count;
+ refcount_t ds_count;
unsigned long ds_state;
#define NFS4DS_CONNECTING 0 /* ds is establishing connection */
};
@@ -63,7 +64,7 @@ struct pnfs_layout_segment {
struct list_head pls_list;
struct list_head pls_lc_list;
struct pnfs_layout_range pls_range;
- atomic_t pls_refcount;
+ refcount_t pls_refcount;
u32 pls_seq;
unsigned long pls_flags;
struct pnfs_layout_hdr *pls_layout;
@@ -179,7 +180,7 @@ struct pnfs_layoutdriver_type {
};
struct pnfs_layout_hdr {
- atomic_t plh_refcount;
+ refcount_t plh_refcount;
atomic_t plh_outstanding; /* number of RPCs out */
struct list_head plh_layouts; /* other client layouts */
struct list_head plh_bulk_destroy;
@@ -251,6 +252,7 @@ int pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
bool is_recall);
int pnfs_destroy_layouts_byclid(struct nfs_client *clp,
bool is_recall);
+bool nfs4_refresh_layout_stateid(nfs4_stateid *dst, struct inode *inode);
void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo);
void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
const nfs4_stateid *new,
@@ -393,7 +395,7 @@ static inline struct pnfs_layout_segment *
pnfs_get_lseg(struct pnfs_layout_segment *lseg)
{
if (lseg) {
- atomic_inc(&lseg->pls_refcount);
+ refcount_inc(&lseg->pls_refcount);
smp_mb__after_atomic();
}
return lseg;
@@ -764,6 +766,11 @@ static inline void nfs4_pnfs_v3_ds_connect_unload(void)
{
}
+static inline bool nfs4_refresh_layout_stateid(nfs4_stateid *dst,
+ struct inode *inode)
+{
+ return false;
+}
#endif /* CONFIG_NFS_V4_1 */
#if IS_ENABLED(CONFIG_NFS_V4_2)
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 60da59be83b6..03aaa60c7768 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -338,7 +338,7 @@ print_ds(struct nfs4_pnfs_ds *ds)
" client %p\n"
" cl_exchange_flags %x\n",
ds->ds_remotestr,
- atomic_read(&ds->ds_count), ds->ds_clp,
+ refcount_read(&ds->ds_count), ds->ds_clp,
ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
}
@@ -451,7 +451,7 @@ static void destroy_ds(struct nfs4_pnfs_ds *ds)
void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds)
{
- if (atomic_dec_and_lock(&ds->ds_count,
+ if (refcount_dec_and_lock(&ds->ds_count,
&nfs4_ds_cache_lock)) {
list_del_init(&ds->ds_node);
spin_unlock(&nfs4_ds_cache_lock);
@@ -537,7 +537,7 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
INIT_LIST_HEAD(&ds->ds_addrs);
list_splice_init(dsaddrs, &ds->ds_addrs);
ds->ds_remotestr = remotestr;
- atomic_set(&ds->ds_count, 1);
+ refcount_set(&ds->ds_count, 1);
INIT_LIST_HEAD(&ds->ds_node);
ds->ds_clp = NULL;
list_add(&ds->ds_node, &nfs4_data_server_cache);
@@ -546,10 +546,10 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
} else {
kfree(remotestr);
kfree(ds);
- atomic_inc(&tmp_ds->ds_count);
+ refcount_inc(&tmp_ds->ds_count);
dprintk("%s data server %s found, inc'ed ds_count to %d\n",
__func__, tmp_ds->ds_remotestr,
- atomic_read(&tmp_ds->ds_count));
+ refcount_read(&tmp_ds->ds_count));
ds = tmp_ds;
}
spin_unlock(&nfs4_ds_cache_lock);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index c9d24bae3025..43cadb28db6e 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1332,7 +1332,7 @@ static int nfs_parse_mount_options(char *raw,
mnt->options |= NFS_OPTION_MIGRATION;
break;
case Opt_nomigration:
- mnt->options &= NFS_OPTION_MIGRATION;
+ mnt->options &= ~NFS_OPTION_MIGRATION;
break;
/*
@@ -1456,18 +1456,21 @@ static int nfs_parse_mount_options(char *raw,
switch (token) {
case Opt_xprt_udp6:
protofamily = AF_INET6;
+ /* fall through */
case Opt_xprt_udp:
mnt->flags &= ~NFS_MOUNT_TCP;
mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
break;
case Opt_xprt_tcp6:
protofamily = AF_INET6;
+ /* fall through */
case Opt_xprt_tcp:
mnt->flags |= NFS_MOUNT_TCP;
mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
break;
case Opt_xprt_rdma6:
protofamily = AF_INET6;
+ /* fall through */
case Opt_xprt_rdma:
/* vector side protocols to TCP */
mnt->flags |= NFS_MOUNT_TCP;
@@ -1494,11 +1497,13 @@ static int nfs_parse_mount_options(char *raw,
switch (token) {
case Opt_xprt_udp6:
mountfamily = AF_INET6;
+ /* fall through */
case Opt_xprt_udp:
mnt->mount_server.protocol = XPRT_TRANSPORT_UDP;
break;
case Opt_xprt_tcp6:
mountfamily = AF_INET6;
+ /* fall through */
case Opt_xprt_tcp:
mnt->mount_server.protocol = XPRT_TRANSPORT_TCP;
break;
@@ -1988,9 +1993,9 @@ static int nfs23_validate_mount_data(void *options,
args->version = NFS_DEFAULT_VERSION;
switch (data->version) {
case 1:
- data->namlen = 0;
+ data->namlen = 0; /* fall through */
case 2:
- data->bsize = 0;
+ data->bsize = 0; /* fall through */
case 3:
if (data->flags & NFS_MOUNT_VER3)
goto out_no_v3;
@@ -1998,11 +2003,14 @@ static int nfs23_validate_mount_data(void *options,
memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
/* Turn off security negotiation */
extra_flags |= NFS_MOUNT_SECFLAVOUR;
+ /* fall through */
case 4:
if (data->flags & NFS_MOUNT_SECFLAVOUR)
goto out_no_sec;
+ /* fall through */
case 5:
memset(data->context, 0, sizeof(data->context));
+ /* fall through */
case 6:
if (data->flags & NFS_MOUNT_VER3) {
if (data->root.size > NFS3_FHSIZE || data->root.size == 0)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index babebbccae2a..5b5f464f6f2a 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -487,10 +487,8 @@ try_again:
}
ret = nfs_page_group_lock(head);
- if (ret < 0) {
- nfs_unlock_and_release_request(head);
- return ERR_PTR(ret);
- }
+ if (ret < 0)
+ goto release_request;
/* lock each request in the page group */
total_bytes = head->wb_bytes;
@@ -515,8 +513,7 @@ try_again:
if (ret < 0) {
nfs_unroll_locks(inode, head, subreq);
nfs_release_request(subreq);
- nfs_unlock_and_release_request(head);
- return ERR_PTR(ret);
+ goto release_request;
}
}
/*
@@ -532,8 +529,8 @@ try_again:
nfs_page_group_unlock(head);
nfs_unroll_locks(inode, head, subreq);
nfs_unlock_and_release_request(subreq);
- nfs_unlock_and_release_request(head);
- return ERR_PTR(-EIO);
+ ret = -EIO;
+ goto release_request;
}
}
@@ -576,6 +573,10 @@ try_again:
/* still holds ref on head from nfs_page_find_head_request
* and still has lock on head from lock loop */
return head;
+
+release_request:
+ nfs_unlock_and_release_request(head);
+ return ERR_PTR(ret);
}
static void nfs_write_error_remove_page(struct nfs_page *req)
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index 004af348fb80..f44d5eb74fcc 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -275,12 +275,6 @@ struct orangefs_kiocb_s {
/* orangefs kernel operation type */
struct orangefs_kernel_op_s *op;
- /* The user space buffers from/to which I/O is being staged */
- struct iovec *iov;
-
- /* number of elements in the iovector */
- unsigned long nr_segs;
-
/* set to indicate the type of the operation */
int rw;
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index c441f9387a1b..eb3b8d39fb61 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -22,7 +22,6 @@
#include <linux/ratelimit.h>
#include <linux/exportfs.h>
#include "overlayfs.h"
-#include "ovl_entry.h"
#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
@@ -486,6 +485,7 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
{
struct inode *udir = c->destdir->d_inode;
+ struct inode *inode;
struct dentry *newdentry = NULL;
struct dentry *temp = NULL;
int err;
@@ -508,7 +508,11 @@ static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
if (err)
goto out_cleanup;
- ovl_inode_update(d_inode(c->dentry), newdentry);
+ inode = d_inode(c->dentry);
+ ovl_inode_update(inode, newdentry);
+ if (S_ISDIR(inode->i_mode))
+ ovl_set_flag(OVL_WHITEOUTS, inode);
+
out:
dput(temp);
return err;
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index cc961a3bd3bd..e13921824c70 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -181,6 +181,11 @@ static bool ovl_type_origin(struct dentry *dentry)
return OVL_TYPE_ORIGIN(ovl_path_type(dentry));
}
+static bool ovl_may_have_whiteouts(struct dentry *dentry)
+{
+ return ovl_test_flag(OVL_WHITEOUTS, d_inode(dentry));
+}
+
static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
struct cattr *attr, struct dentry *hardlink)
{
@@ -300,7 +305,6 @@ static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry)
{
int err;
struct dentry *ret = NULL;
- enum ovl_path_type type = ovl_path_type(dentry);
LIST_HEAD(list);
err = ovl_check_empty_dir(dentry, &list);
@@ -313,13 +317,13 @@ static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry)
* When removing an empty opaque directory, then it makes no sense to
* replace it with an exact replica of itself.
*
- * If no upperdentry then skip clearing whiteouts.
+ * If upperdentry has whiteouts, clear them.
*
* Can race with copy-up, since we don't hold the upperdir mutex.
* Doesn't matter, since copy-up can't create a non-empty directory
* from an empty one.
*/
- if (OVL_TYPE_UPPER(type) && OVL_TYPE_MERGE(type))
+ if (!list_empty(&list))
ret = ovl_clear_empty(dentry, &list);
out_free:
@@ -698,8 +702,9 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
struct dentry *opaquedir = NULL;
int err;
- /* Redirect dir can be !ovl_lower_positive && OVL_TYPE_MERGE */
- if (is_dir && ovl_dentry_get_redirect(dentry)) {
+ /* Redirect/origin dir can be !ovl_lower_positive && not clean */
+ if (is_dir && (ovl_dentry_get_redirect(dentry) ||
+ ovl_may_have_whiteouts(dentry))) {
opaquedir = ovl_check_empty_and_clear(dentry);
err = PTR_ERR(opaquedir);
if (IS_ERR(opaquedir))
@@ -946,7 +951,8 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
old_cred = ovl_override_creds(old->d_sb);
- if (overwrite && new_is_dir && ovl_type_merge_or_lower(new)) {
+ if (overwrite && new_is_dir && (ovl_type_merge_or_lower(new) ||
+ ovl_may_have_whiteouts(new))) {
opaquedir = ovl_check_empty_and_clear(new);
err = PTR_ERR(opaquedir);
if (IS_ERR(opaquedir)) {
@@ -1069,9 +1075,10 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
drop_nlink(d_inode(new));
}
- ovl_dentry_version_inc(old->d_parent,
- !overwrite && ovl_type_origin(new));
- ovl_dentry_version_inc(new->d_parent, ovl_type_origin(old));
+ ovl_dentry_version_inc(old->d_parent, ovl_type_origin(old) ||
+ (!overwrite && ovl_type_origin(new)));
+ ovl_dentry_version_inc(new->d_parent, ovl_type_origin(old) ||
+ (d_inode(new) && ovl_type_origin(new)));
out_dput:
dput(newdentry);
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 321511ed8c42..00b6b294272a 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -15,6 +15,14 @@
#include <linux/ratelimit.h>
#include "overlayfs.h"
+
+static dev_t ovl_get_pseudo_dev(struct dentry *dentry)
+{
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ return oe->lowerstack[0].layer->pseudo_dev;
+}
+
int ovl_setattr(struct dentry *dentry, struct iattr *attr)
{
int err;
@@ -66,6 +74,7 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
struct path realpath;
const struct cred *old_cred;
bool is_dir = S_ISDIR(dentry->d_inode->i_mode);
+ bool samefs = ovl_same_sb(dentry->d_sb);
int err;
type = ovl_path_real(dentry, &realpath);
@@ -75,16 +84,13 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
goto out;
/*
- * When all layers are on the same fs, all real inode number are
- * unique, so we use the overlay st_dev, which is friendly to du -x.
- *
- * We also use st_ino of the copy up origin, if we know it.
- * This guaranties constant st_dev/st_ino across copy up.
+ * For non-dir or same fs, we use st_ino of the copy up origin, if we
+ * know it. This guaranties constant st_dev/st_ino across copy up.
*
* If filesystem supports NFS export ops, this also guaranties
* persistent st_ino across mount cycle.
*/
- if (ovl_same_sb(dentry->d_sb)) {
+ if (!is_dir || samefs) {
if (OVL_TYPE_ORIGIN(type)) {
struct kstat lowerstat;
u32 lowermask = STATX_INO | (!is_dir ? STATX_NLINK : 0);
@@ -95,7 +101,6 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
if (err)
goto out;
- WARN_ON_ONCE(stat->dev != lowerstat.dev);
/*
* Lower hardlinks may be broken on copy up to different
* upper files, so we cannot use the lower origin st_ino
@@ -107,17 +112,36 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
if (is_dir || lowerstat.nlink == 1 ||
ovl_test_flag(OVL_INDEX, d_inode(dentry)))
stat->ino = lowerstat.ino;
+
+ if (samefs)
+ WARN_ON_ONCE(stat->dev != lowerstat.dev);
+ else
+ stat->dev = ovl_get_pseudo_dev(dentry);
}
- stat->dev = dentry->d_sb->s_dev;
- } else if (is_dir) {
+ if (samefs) {
+ /*
+ * When all layers are on the same fs, all real inode
+ * number are unique, so we use the overlay st_dev,
+ * which is friendly to du -x.
+ */
+ stat->dev = dentry->d_sb->s_dev;
+ } else if (!OVL_TYPE_UPPER(type)) {
+ /*
+ * For non-samefs setup, to make sure that st_dev/st_ino
+ * pair is unique across the system, we use a unique
+ * anonymous st_dev for lower layer inode.
+ */
+ stat->dev = ovl_get_pseudo_dev(dentry);
+ }
+ } else {
/*
- * If not all layers are on the same fs the pair {real st_ino;
- * overlay st_dev} is not unique, so use the non persistent
- * overlay st_ino.
- *
* Always use the overlay st_dev for directories, so 'find
* -xdev' will scan the entire overlay mount and won't cross the
* overlay mount boundaries.
+ *
+ * If not all layers are on the same fs the pair {real st_ino;
+ * overlay st_dev} is not unique, so use the non persistent
+ * overlay st_ino for directories.
*/
stat->dev = dentry->d_sb->s_dev;
stat->ino = dentry->d_inode->i_ino;
@@ -409,6 +433,7 @@ static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode)
#ifdef CONFIG_LOCKDEP
static struct lock_class_key ovl_i_mutex_key[OVL_MAX_NESTING];
static struct lock_class_key ovl_i_mutex_dir_key[OVL_MAX_NESTING];
+ static struct lock_class_key ovl_i_lock_key[OVL_MAX_NESTING];
int depth = inode->i_sb->s_stack_depth - 1;
@@ -419,6 +444,8 @@ static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode)
lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_dir_key[depth]);
else
lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_key[depth]);
+
+ lockdep_set_class(&OVL_I(inode)->lock, &ovl_i_lock_key[depth]);
#endif
}
@@ -657,6 +684,16 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry,
if (upperdentry && ovl_is_impuredir(upperdentry))
ovl_set_flag(OVL_IMPURE, inode);
+ /* Check for non-merge dir that may have whiteouts */
+ if (S_ISDIR(realinode->i_mode)) {
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ if (((upperdentry && lowerdentry) || oe->numlower > 1) ||
+ ovl_check_origin_xattr(upperdentry ?: lowerdentry)) {
+ ovl_set_flag(OVL_WHITEOUTS, inode);
+ }
+ }
+
if (inode->i_state & I_NEW)
unlock_new_inode(inode);
out:
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index a12dc10bf726..625ed8066570 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -15,7 +15,6 @@
#include <linux/mount.h>
#include <linux/exportfs.h>
#include "overlayfs.h"
-#include "ovl_entry.h"
struct ovl_lookup_data {
struct qstr name;
@@ -286,16 +285,15 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
static int ovl_check_origin(struct dentry *upperdentry,
- struct path *lowerstack, unsigned int numlower,
- struct path **stackp, unsigned int *ctrp)
+ struct ovl_path *lower, unsigned int numlower,
+ struct ovl_path **stackp, unsigned int *ctrp)
{
struct vfsmount *mnt;
struct dentry *origin = NULL;
int i;
-
for (i = 0; i < numlower; i++) {
- mnt = lowerstack[i].mnt;
+ mnt = lower[i].layer->mnt;
origin = ovl_get_origin(upperdentry, mnt);
if (IS_ERR(origin))
return PTR_ERR(origin);
@@ -309,12 +307,12 @@ static int ovl_check_origin(struct dentry *upperdentry,
BUG_ON(*ctrp);
if (!*stackp)
- *stackp = kmalloc(sizeof(struct path), GFP_KERNEL);
+ *stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL);
if (!*stackp) {
dput(origin);
return -ENOMEM;
}
- **stackp = (struct path) { .dentry = origin, .mnt = mnt };
+ **stackp = (struct ovl_path){.dentry = origin, .layer = lower[i].layer};
*ctrp = 1;
return 0;
@@ -350,8 +348,8 @@ static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh)
*
* Return 0 on match, -ESTALE on mismatch, < 0 on error.
*/
-int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt,
- struct dentry *origin, bool is_upper, bool set)
+int ovl_verify_origin(struct dentry *dentry, struct dentry *origin,
+ bool is_upper, bool set)
{
struct inode *inode;
struct ovl_fh *fh;
@@ -384,13 +382,13 @@ fail:
* OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
* Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
*/
-int ovl_verify_index(struct dentry *index, struct path *lowerstack,
+int ovl_verify_index(struct dentry *index, struct ovl_path *lower,
unsigned int numlower)
{
struct ovl_fh *fh = NULL;
size_t len;
- struct path origin = { };
- struct path *stack = &origin;
+ struct ovl_path origin = { };
+ struct ovl_path *stack = &origin;
unsigned int ctr = 0;
int err;
@@ -429,7 +427,7 @@ int ovl_verify_index(struct dentry *index, struct path *lowerstack,
if (err)
goto fail;
- err = ovl_check_origin(index, lowerstack, numlower, &stack, &ctr);
+ err = ovl_check_origin(index, lower, numlower, &stack, &ctr);
if (!err && !ctr)
err = -ESTALE;
if (err)
@@ -568,11 +566,24 @@ int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
idx++;
}
BUG_ON(idx > oe->numlower);
- *path = oe->lowerstack[idx - 1];
+ path->dentry = oe->lowerstack[idx - 1].dentry;
+ path->mnt = oe->lowerstack[idx - 1].layer->mnt;
return (idx < oe->numlower) ? idx + 1 : -1;
}
+static int ovl_find_layer(struct ovl_fs *ofs, struct ovl_path *path)
+{
+ int i;
+
+ for (i = 0; i < ofs->numlower; i++) {
+ if (ofs->lower_layers[i].mnt == path->layer->mnt)
+ break;
+ }
+
+ return i;
+}
+
struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
@@ -581,7 +592,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
struct ovl_entry *poe = dentry->d_parent->d_fsdata;
struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
- struct path *stack = NULL;
+ struct ovl_path *stack = NULL;
struct dentry *upperdir, *upperdentry = NULL;
struct dentry *index = NULL;
unsigned int ctr = 0;
@@ -630,7 +641,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
err = ovl_check_origin(upperdentry, roe->lowerstack,
roe->numlower, &stack, &ctr);
if (err)
- goto out;
+ goto out_put_upper;
}
if (d.redirect) {
@@ -646,17 +657,17 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
if (!d.stop && poe->numlower) {
err = -ENOMEM;
- stack = kcalloc(ofs->numlower, sizeof(struct path),
+ stack = kcalloc(ofs->numlower, sizeof(struct ovl_path),
GFP_KERNEL);
if (!stack)
goto out_put_upper;
}
for (i = 0; !d.stop && i < poe->numlower; i++) {
- struct path lowerpath = poe->lowerstack[i];
+ struct ovl_path lower = poe->lowerstack[i];
d.last = i == poe->numlower - 1;
- err = ovl_lookup_layer(lowerpath.dentry, &d, &this);
+ err = ovl_lookup_layer(lower.dentry, &d, &this);
if (err)
goto out_put;
@@ -664,7 +675,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
continue;
stack[ctr].dentry = this;
- stack[ctr].mnt = lowerpath.mnt;
+ stack[ctr].layer = lower.layer;
ctr++;
if (d.stop)
@@ -674,10 +685,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
poe = roe;
/* Find the current layer on the root dentry */
- for (i = 0; i < poe->numlower; i++)
- if (poe->lowerstack[i].mnt == lowerpath.mnt)
- break;
- if (WARN_ON(i == poe->numlower))
+ i = ovl_find_layer(ofs, &lower);
+ if (WARN_ON(i == ofs->numlower))
break;
}
}
@@ -700,7 +709,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
goto out_put;
oe->opaque = upperopaque;
- memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);
+ memcpy(oe->lowerstack, stack, sizeof(struct ovl_path) * ctr);
dentry->d_fsdata = oe;
if (upperdentry)
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index d9a0edd4e57e..13eab09a6b6f 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/uuid.h>
+#include "ovl_entry.h"
enum ovl_path_type {
__OVL_PATH_UPPER = (1 << 0),
@@ -28,7 +29,10 @@ enum ovl_path_type {
#define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink"
enum ovl_flag {
+ /* Pure upper dir that may contain non pure upper entries */
OVL_IMPURE,
+ /* Non-merge dir that may contain whiteout entries */
+ OVL_WHITEOUTS,
OVL_INDEX,
};
@@ -223,6 +227,7 @@ bool ovl_is_whiteout(struct dentry *dentry);
struct file *ovl_path_open(struct path *path, int flags);
int ovl_copy_up_start(struct dentry *dentry);
void ovl_copy_up_end(struct dentry *dentry);
+bool ovl_check_origin_xattr(struct dentry *dentry);
bool ovl_check_dir_xattr(struct dentry *dentry, const char *name);
int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
const char *name, const void *value, size_t size,
@@ -244,9 +249,9 @@ static inline bool ovl_is_impuredir(struct dentry *dentry)
/* namei.c */
-int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt,
- struct dentry *origin, bool is_upper, bool set);
-int ovl_verify_index(struct dentry *index, struct path *lowerstack,
+int ovl_verify_origin(struct dentry *dentry, struct dentry *origin,
+ bool is_upper, bool set);
+int ovl_verify_index(struct dentry *index, struct ovl_path *lower,
unsigned int numlower);
int ovl_get_index_name(struct dentry *origin, struct qstr *name);
int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
@@ -263,7 +268,7 @@ int ovl_check_d_type_supported(struct path *realpath);
void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
struct dentry *dentry, int level);
int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
- struct path *lowerstack, unsigned int numlower);
+ struct ovl_path *lower, unsigned int numlower);
/* inode.c */
int ovl_set_nlink_upper(struct dentry *dentry);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 36b49bd09264..752bab645879 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -17,11 +17,21 @@ struct ovl_config {
bool index;
};
+struct ovl_layer {
+ struct vfsmount *mnt;
+ dev_t pseudo_dev;
+};
+
+struct ovl_path {
+ struct ovl_layer *layer;
+ struct dentry *dentry;
+};
+
/* private information held for overlayfs's superblock */
struct ovl_fs {
struct vfsmount *upper_mnt;
unsigned numlower;
- struct vfsmount **lower_mnt;
+ struct ovl_layer *lower_layers;
/* workbasedir is the path at workdir= mount option */
struct dentry *workbasedir;
/* workdir is the 'work' directory under workbasedir */
@@ -52,7 +62,7 @@ struct ovl_entry {
struct rcu_head rcu;
};
unsigned numlower;
- struct path lowerstack[];
+ struct ovl_path lowerstack[];
};
struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index c310e3ff7f3f..0daa4354fec4 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -26,6 +26,7 @@ struct ovl_cache_entry {
struct list_head l_node;
struct rb_node node;
struct ovl_cache_entry *next_maybe_whiteout;
+ bool is_upper;
bool is_whiteout;
char name[];
};
@@ -158,6 +159,7 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
/* Defer setting d_ino for upper entry to ovl_iterate() */
if (ovl_calc_d_ino(rdd, p))
p->ino = 0;
+ p->is_upper = rdd->is_upper;
p->is_whiteout = false;
if (d_type == DT_CHR) {
@@ -316,21 +318,37 @@ static inline int ovl_dir_read(struct path *realpath,
return err;
}
+/*
+ * Can we iterate real dir directly?
+ *
+ * Non-merge dir may contain whiteouts from a time it was a merge upper, before
+ * lower dir was removed under it and possibly before it was rotated from upper
+ * to lower layer.
+ */
+static bool ovl_dir_is_real(struct dentry *dir)
+{
+ return !ovl_test_flag(OVL_WHITEOUTS, d_inode(dir));
+}
+
static void ovl_dir_reset(struct file *file)
{
struct ovl_dir_file *od = file->private_data;
struct ovl_dir_cache *cache = od->cache;
struct dentry *dentry = file->f_path.dentry;
- enum ovl_path_type type = ovl_path_type(dentry);
+ bool is_real;
if (cache && ovl_dentry_version_get(dentry) != cache->version) {
ovl_cache_put(od, dentry);
od->cache = NULL;
od->cursor = NULL;
}
- WARN_ON(!od->is_real && !OVL_TYPE_MERGE(type));
- if (od->is_real && OVL_TYPE_MERGE(type))
+ is_real = ovl_dir_is_real(dentry);
+ if (od->is_real != is_real) {
+ /* is_real can only become false when dir is copied up */
+ if (WARN_ON(is_real))
+ return;
od->is_real = false;
+ }
}
static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
@@ -816,7 +834,7 @@ static int ovl_dir_open(struct inode *inode, struct file *file)
return PTR_ERR(realfile);
}
od->realfile = realfile;
- od->is_real = !OVL_TYPE_MERGE(type);
+ od->is_real = ovl_dir_is_real(file->f_path.dentry);
od->is_upper = OVL_TYPE_UPPER(type);
file->private_data = od;
@@ -835,7 +853,7 @@ const struct file_operations ovl_dir_operations = {
int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
{
int err;
- struct ovl_cache_entry *p;
+ struct ovl_cache_entry *p, *n;
struct rb_root root = RB_ROOT;
err = ovl_dir_read_merged(dentry, list, &root);
@@ -844,18 +862,29 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
err = 0;
- list_for_each_entry(p, list, l_node) {
- if (p->is_whiteout)
- continue;
+ list_for_each_entry_safe(p, n, list, l_node) {
+ /*
+ * Select whiteouts in upperdir, they should
+ * be cleared when deleting this directory.
+ */
+ if (p->is_whiteout) {
+ if (p->is_upper)
+ continue;
+ goto del_entry;
+ }
if (p->name[0] == '.') {
if (p->len == 1)
- continue;
+ goto del_entry;
if (p->len == 2 && p->name[1] == '.')
- continue;
+ goto del_entry;
}
err = -ENOTEMPTY;
break;
+
+del_entry:
+ list_del(&p->l_node);
+ kfree(p);
}
return err;
@@ -869,7 +898,7 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
list_for_each_entry(p, list, l_node) {
struct dentry *dentry;
- if (!p->is_whiteout)
+ if (WARN_ON(!p->is_whiteout || !p->is_upper))
continue;
dentry = lookup_one_len(p->name, upper, p->len);
@@ -985,7 +1014,7 @@ void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
}
int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
- struct path *lowerstack, unsigned int numlower)
+ struct ovl_path *lower, unsigned int numlower)
{
int err;
struct dentry *index = NULL;
@@ -1020,7 +1049,7 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
index = NULL;
break;
}
- err = ovl_verify_index(index, lowerstack, numlower);
+ err = ovl_verify_index(index, lower, numlower);
/* Cleanup stale and orphan index entries */
if (err && (err == -ESTALE || err == -ENOENT))
err = ovl_cleanup(dir, index);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index f5738e96a052..be03578181d2 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -18,7 +18,6 @@
#include <linux/seq_file.h>
#include <linux/posix_acl_xattr.h>
#include "overlayfs.h"
-#include "ovl_entry.h"
MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
MODULE_DESCRIPTION("Overlay filesystem");
@@ -39,15 +38,20 @@ module_param_named(index, ovl_index_def, bool, 0644);
MODULE_PARM_DESC(ovl_index_def,
"Default to on or off for the inodes index feature");
+static void ovl_entry_stack_free(struct ovl_entry *oe)
+{
+ unsigned int i;
+
+ for (i = 0; i < oe->numlower; i++)
+ dput(oe->lowerstack[i].dentry);
+}
+
static void ovl_dentry_release(struct dentry *dentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
if (oe) {
- unsigned int i;
-
- for (i = 0; i < oe->numlower; i++)
- dput(oe->lowerstack[i].dentry);
+ ovl_entry_stack_free(oe);
kfree_rcu(oe, rcu);
}
}
@@ -207,39 +211,48 @@ static void ovl_destroy_inode(struct inode *inode)
call_rcu(&inode->i_rcu, ovl_i_callback);
}
-static void ovl_put_super(struct super_block *sb)
+static void ovl_free_fs(struct ovl_fs *ofs)
{
- struct ovl_fs *ufs = sb->s_fs_info;
unsigned i;
- dput(ufs->indexdir);
- dput(ufs->workdir);
- if (ufs->workdir_locked)
- ovl_inuse_unlock(ufs->workbasedir);
- dput(ufs->workbasedir);
- if (ufs->upper_mnt && ufs->upperdir_locked)
- ovl_inuse_unlock(ufs->upper_mnt->mnt_root);
- mntput(ufs->upper_mnt);
- for (i = 0; i < ufs->numlower; i++)
- mntput(ufs->lower_mnt[i]);
- kfree(ufs->lower_mnt);
-
- kfree(ufs->config.lowerdir);
- kfree(ufs->config.upperdir);
- kfree(ufs->config.workdir);
- put_cred(ufs->creator_cred);
- kfree(ufs);
+ dput(ofs->indexdir);
+ dput(ofs->workdir);
+ if (ofs->workdir_locked)
+ ovl_inuse_unlock(ofs->workbasedir);
+ dput(ofs->workbasedir);
+ if (ofs->upperdir_locked)
+ ovl_inuse_unlock(ofs->upper_mnt->mnt_root);
+ mntput(ofs->upper_mnt);
+ for (i = 0; i < ofs->numlower; i++) {
+ mntput(ofs->lower_layers[i].mnt);
+ free_anon_bdev(ofs->lower_layers[i].pseudo_dev);
+ }
+ kfree(ofs->lower_layers);
+
+ kfree(ofs->config.lowerdir);
+ kfree(ofs->config.upperdir);
+ kfree(ofs->config.workdir);
+ if (ofs->creator_cred)
+ put_cred(ofs->creator_cred);
+ kfree(ofs);
+}
+
+static void ovl_put_super(struct super_block *sb)
+{
+ struct ovl_fs *ofs = sb->s_fs_info;
+
+ ovl_free_fs(ofs);
}
static int ovl_sync_fs(struct super_block *sb, int wait)
{
- struct ovl_fs *ufs = sb->s_fs_info;
+ struct ovl_fs *ofs = sb->s_fs_info;
struct super_block *upper_sb;
int ret;
- if (!ufs->upper_mnt)
+ if (!ofs->upper_mnt)
return 0;
- upper_sb = ufs->upper_mnt->mnt_sb;
+ upper_sb = ofs->upper_mnt->mnt_sb;
if (!upper_sb->s_op->sync_fs)
return 0;
@@ -277,9 +290,9 @@ static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
}
/* Will this overlay be forced to mount/remount ro? */
-static bool ovl_force_readonly(struct ovl_fs *ufs)
+static bool ovl_force_readonly(struct ovl_fs *ofs)
{
- return (!ufs->upper_mnt || !ufs->workdir);
+ return (!ofs->upper_mnt || !ofs->workdir);
}
/**
@@ -291,29 +304,29 @@ static bool ovl_force_readonly(struct ovl_fs *ufs)
static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
{
struct super_block *sb = dentry->d_sb;
- struct ovl_fs *ufs = sb->s_fs_info;
+ struct ovl_fs *ofs = sb->s_fs_info;
- seq_show_option(m, "lowerdir", ufs->config.lowerdir);
- if (ufs->config.upperdir) {
- seq_show_option(m, "upperdir", ufs->config.upperdir);
- seq_show_option(m, "workdir", ufs->config.workdir);
+ seq_show_option(m, "lowerdir", ofs->config.lowerdir);
+ if (ofs->config.upperdir) {
+ seq_show_option(m, "upperdir", ofs->config.upperdir);
+ seq_show_option(m, "workdir", ofs->config.workdir);
}
- if (ufs->config.default_permissions)
+ if (ofs->config.default_permissions)
seq_puts(m, ",default_permissions");
- if (ufs->config.redirect_dir != ovl_redirect_dir_def)
+ if (ofs->config.redirect_dir != ovl_redirect_dir_def)
seq_printf(m, ",redirect_dir=%s",
- ufs->config.redirect_dir ? "on" : "off");
- if (ufs->config.index != ovl_index_def)
+ ofs->config.redirect_dir ? "on" : "off");
+ if (ofs->config.index != ovl_index_def)
seq_printf(m, ",index=%s",
- ufs->config.index ? "on" : "off");
+ ofs->config.index ? "on" : "off");
return 0;
}
static int ovl_remount(struct super_block *sb, int *flags, char *data)
{
- struct ovl_fs *ufs = sb->s_fs_info;
+ struct ovl_fs *ofs = sb->s_fs_info;
- if (!(*flags & MS_RDONLY) && ovl_force_readonly(ufs))
+ if (!(*flags & MS_RDONLY) && ovl_force_readonly(ofs))
return -EROFS;
return 0;
@@ -451,13 +464,11 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
#define OVL_WORKDIR_NAME "work"
#define OVL_INDEXDIR_NAME "index"
-static struct dentry *ovl_workdir_create(struct super_block *sb,
- struct ovl_fs *ufs,
- struct dentry *dentry,
+static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
const char *name, bool persist)
{
- struct inode *dir = dentry->d_inode;
- struct vfsmount *mnt = ufs->upper_mnt;
+ struct inode *dir = ofs->workbasedir->d_inode;
+ struct vfsmount *mnt = ofs->upper_mnt;
struct dentry *work;
int err;
bool retried = false;
@@ -471,7 +482,7 @@ static struct dentry *ovl_workdir_create(struct super_block *sb,
locked = true;
retry:
- work = lookup_one_len(name, dentry, strlen(name));
+ work = lookup_one_len(name, ofs->workbasedir, strlen(name));
if (!IS_ERR(work)) {
struct iattr attr = {
@@ -541,8 +552,7 @@ out_dput:
dput(work);
out_err:
pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
- ufs->config.workdir, name, -err);
- sb->s_flags |= MS_RDONLY;
+ ofs->config.workdir, name, -err);
work = NULL;
goto out_unlock;
}
@@ -585,7 +595,7 @@ static int ovl_mount_dir_noesc(const char *name, struct path *path)
return 0;
out_put:
- path_put(path);
+ path_put_init(path);
out:
return err;
}
@@ -603,7 +613,7 @@ static int ovl_mount_dir(const char *name, struct path *path)
if (ovl_dentry_remote(path->dentry)) {
pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n",
tmp);
- path_put(path);
+ path_put_init(path);
err = -EINVAL;
}
kfree(tmp);
@@ -655,7 +665,7 @@ static int ovl_lower_dir(const char *name, struct path *path,
return 0;
out_put:
- path_put(path);
+ path_put_init(path);
out:
return err;
}
@@ -826,129 +836,269 @@ static const struct xattr_handler *ovl_xattr_handlers[] = {
NULL
};
-static int ovl_fill_super(struct super_block *sb, void *data, int silent)
+static int ovl_get_upper(struct ovl_fs *ofs, struct path *upperpath)
{
- struct path upperpath = { };
- struct path workpath = { };
- struct dentry *root_dentry;
- struct ovl_entry *oe;
- struct ovl_fs *ufs;
- struct path *stack = NULL;
- char *lowertmp;
- char *lower;
- unsigned int numlower;
- unsigned int stacklen = 0;
- unsigned int i;
- bool remote = false;
- struct cred *cred;
+ struct vfsmount *upper_mnt;
int err;
- err = -ENOMEM;
- ufs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
- if (!ufs)
+ err = ovl_mount_dir(ofs->config.upperdir, upperpath);
+ if (err)
goto out;
- ufs->config.redirect_dir = ovl_redirect_dir_def;
- ufs->config.index = ovl_index_def;
- err = ovl_parse_opt((char *) data, &ufs->config);
+ /* Upper fs should not be r/o */
+ if (sb_rdonly(upperpath->mnt->mnt_sb)) {
+ pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir);
if (err)
- goto out_free_config;
+ goto out;
+
+ err = -EBUSY;
+ if (ovl_inuse_trylock(upperpath->dentry)) {
+ ofs->upperdir_locked = true;
+ } else if (ofs->config.index) {
+ pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n");
+ goto out;
+ } else {
+ pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
+ }
+
+ upper_mnt = clone_private_mount(upperpath);
+ err = PTR_ERR(upper_mnt);
+ if (IS_ERR(upper_mnt)) {
+ pr_err("overlayfs: failed to clone upperpath\n");
+ goto out;
+ }
+
+ /* Don't inherit atime flags */
+ upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
+ ofs->upper_mnt = upper_mnt;
+ err = 0;
+out:
+ return err;
+}
+
+static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
+{
+ struct dentry *temp;
+ int err;
+
+ ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
+ if (!ofs->workdir)
+ return 0;
+
+ /*
+ * Upper should support d_type, else whiteouts are visible. Given
+ * workdir and upper are on same fs, we can do iterate_dir() on
+ * workdir. This check requires successful creation of workdir in
+ * previous step.
+ */
+ err = ovl_check_d_type_supported(workpath);
+ if (err < 0)
+ return err;
+
+ /*
+ * We allowed this configuration and don't want to break users over
+ * kernel upgrade. So warn instead of erroring out.
+ */
+ if (!err)
+ pr_warn("overlayfs: upper fs needs to support d_type.\n");
+
+ /* Check if upper/work fs supports O_TMPFILE */
+ temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0);
+ ofs->tmpfile = !IS_ERR(temp);
+ if (ofs->tmpfile)
+ dput(temp);
+ else
+ pr_warn("overlayfs: upper fs does not support tmpfile.\n");
+
+ /*
+ * Check if upper/work fs supports trusted.overlay.* xattr
+ */
+ err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0);
+ if (err) {
+ ofs->noxattr = true;
+ pr_warn("overlayfs: upper fs does not support xattr.\n");
+ } else {
+ vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE);
+ }
+
+ /* Check if upper/work fs supports file handles */
+ if (ofs->config.index &&
+ !ovl_can_decode_fh(ofs->workdir->d_sb)) {
+ ofs->config.index = false;
+ pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n");
+ }
+
+ return 0;
+}
+
+static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath)
+{
+ int err;
+ struct path workpath = { };
+
+ err = ovl_mount_dir(ofs->config.workdir, &workpath);
+ if (err)
+ goto out;
err = -EINVAL;
- if (!ufs->config.lowerdir) {
- if (!silent)
- pr_err("overlayfs: missing 'lowerdir'\n");
- goto out_free_config;
+ if (upperpath->mnt != workpath.mnt) {
+ pr_err("overlayfs: workdir and upperdir must reside under the same mount\n");
+ goto out;
+ }
+ if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) {
+ pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
+ goto out;
}
- sb->s_stack_depth = 0;
- sb->s_maxbytes = MAX_LFS_FILESIZE;
- if (ufs->config.upperdir) {
- if (!ufs->config.workdir) {
- pr_err("overlayfs: missing 'workdir'\n");
- goto out_free_config;
- }
+ err = -EBUSY;
+ if (ovl_inuse_trylock(workpath.dentry)) {
+ ofs->workdir_locked = true;
+ } else if (ofs->config.index) {
+ pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n");
+ goto out;
+ } else {
+ pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
+ }
- err = ovl_mount_dir(ufs->config.upperdir, &upperpath);
- if (err)
- goto out_free_config;
+ ofs->workbasedir = dget(workpath.dentry);
+ err = ovl_make_workdir(ofs, &workpath);
+ if (err)
+ goto out;
- /* Upper fs should not be r/o */
- if (sb_rdonly(upperpath.mnt->mnt_sb)) {
- pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n");
- err = -EINVAL;
- goto out_put_upperpath;
- }
+ err = 0;
+out:
+ path_put(&workpath);
- err = ovl_check_namelen(&upperpath, ufs, ufs->config.upperdir);
- if (err)
- goto out_put_upperpath;
-
- err = -EBUSY;
- if (ovl_inuse_trylock(upperpath.dentry)) {
- ufs->upperdir_locked = true;
- } else if (ufs->config.index) {
- pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n");
- goto out_put_upperpath;
- } else {
- pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
- }
+ return err;
+}
+
+static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe,
+ struct path *upperpath)
+{
+ int err;
- err = ovl_mount_dir(ufs->config.workdir, &workpath);
+ /* Verify lower root is upper root origin */
+ err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry,
+ false, true);
+ if (err) {
+ pr_err("overlayfs: failed to verify upper root origin\n");
+ goto out;
+ }
+
+ ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
+ if (ofs->indexdir) {
+ /* Verify upper root is index dir origin */
+ err = ovl_verify_origin(ofs->indexdir, upperpath->dentry,
+ true, true);
if (err)
- goto out_unlock_upperdentry;
+ pr_err("overlayfs: failed to verify index dir origin\n");
- err = -EINVAL;
- if (upperpath.mnt != workpath.mnt) {
- pr_err("overlayfs: workdir and upperdir must reside under the same mount\n");
- goto out_put_workpath;
- }
- if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) {
- pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
- goto out_put_workpath;
+ /* Cleanup bad/stale/orphan index entries */
+ if (!err)
+ err = ovl_indexdir_cleanup(ofs->indexdir,
+ ofs->upper_mnt,
+ oe->lowerstack,
+ oe->numlower);
+ }
+ if (err || !ofs->indexdir)
+ pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
+
+out:
+ return err;
+}
+
+static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack,
+ unsigned int numlower)
+{
+ int err;
+ unsigned int i;
+
+ err = -ENOMEM;
+ ofs->lower_layers = kcalloc(numlower, sizeof(struct ovl_layer),
+ GFP_KERNEL);
+ if (ofs->lower_layers == NULL)
+ goto out;
+ for (i = 0; i < numlower; i++) {
+ struct vfsmount *mnt;
+ dev_t dev;
+
+ err = get_anon_bdev(&dev);
+ if (err) {
+ pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n");
+ goto out;
}
- err = -EBUSY;
- if (ovl_inuse_trylock(workpath.dentry)) {
- ufs->workdir_locked = true;
- } else if (ufs->config.index) {
- pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n");
- goto out_put_workpath;
- } else {
- pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
+ mnt = clone_private_mount(&stack[i]);
+ err = PTR_ERR(mnt);
+ if (IS_ERR(mnt)) {
+ pr_err("overlayfs: failed to clone lowerpath\n");
+ free_anon_bdev(dev);
+ goto out;
}
+ /*
+ * Make lower layers R/O. That way fchmod/fchown on lower file
+ * will fail instead of modifying lower fs.
+ */
+ mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;
- ufs->workbasedir = workpath.dentry;
- sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth;
+ ofs->lower_layers[ofs->numlower].mnt = mnt;
+ ofs->lower_layers[ofs->numlower].pseudo_dev = dev;
+ ofs->numlower++;
+
+ /* Check if all lower layers are on same sb */
+ if (i == 0)
+ ofs->same_sb = mnt->mnt_sb;
+ else if (ofs->same_sb != mnt->mnt_sb)
+ ofs->same_sb = NULL;
}
+ err = 0;
+out:
+ return err;
+}
+
+static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
+ struct ovl_fs *ofs)
+{
+ int err;
+ char *lowertmp, *lower;
+ struct path *stack = NULL;
+ unsigned int stacklen, numlower = 0, i;
+ bool remote = false;
+ struct ovl_entry *oe;
+
err = -ENOMEM;
- lowertmp = kstrdup(ufs->config.lowerdir, GFP_KERNEL);
+ lowertmp = kstrdup(ofs->config.lowerdir, GFP_KERNEL);
if (!lowertmp)
- goto out_unlock_workdentry;
+ goto out_err;
err = -EINVAL;
stacklen = ovl_split_lowerdirs(lowertmp);
if (stacklen > OVL_MAX_STACK) {
pr_err("overlayfs: too many lower directories, limit is %d\n",
OVL_MAX_STACK);
- goto out_free_lowertmp;
- } else if (!ufs->config.upperdir && stacklen == 1) {
+ goto out_err;
+ } else if (!ofs->config.upperdir && stacklen == 1) {
pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n");
- goto out_free_lowertmp;
+ goto out_err;
}
err = -ENOMEM;
stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
if (!stack)
- goto out_free_lowertmp;
+ goto out_err;
err = -EINVAL;
lower = lowertmp;
for (numlower = 0; numlower < stacklen; numlower++) {
- err = ovl_lower_dir(lower, &stack[numlower], ufs,
+ err = ovl_lower_dir(lower, &stack[numlower], ofs,
&sb->s_stack_depth, &remote);
if (err)
- goto out_put_lowerpath;
+ goto out_err;
lower = strchr(lower, '\0') + 1;
}
@@ -957,190 +1107,144 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
sb->s_stack_depth++;
if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
pr_err("overlayfs: maximum fs stacking depth exceeded\n");
- goto out_put_lowerpath;
+ goto out_err;
}
- if (ufs->config.upperdir) {
- ufs->upper_mnt = clone_private_mount(&upperpath);
- err = PTR_ERR(ufs->upper_mnt);
- if (IS_ERR(ufs->upper_mnt)) {
- pr_err("overlayfs: failed to clone upperpath\n");
- goto out_put_lowerpath;
- }
+ err = ovl_get_lower_layers(ofs, stack, numlower);
+ if (err)
+ goto out_err;
+
+ err = -ENOMEM;
+ oe = ovl_alloc_entry(numlower);
+ if (!oe)
+ goto out_err;
+
+ for (i = 0; i < numlower; i++) {
+ oe->lowerstack[i].dentry = dget(stack[i].dentry);
+ oe->lowerstack[i].layer = &ofs->lower_layers[i];
+ }
- /* Don't inherit atime flags */
- ufs->upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
+ if (remote)
+ sb->s_d_op = &ovl_reval_dentry_operations;
+ else
+ sb->s_d_op = &ovl_dentry_operations;
- sb->s_time_gran = ufs->upper_mnt->mnt_sb->s_time_gran;
+out:
+ for (i = 0; i < numlower; i++)
+ path_put(&stack[i]);
+ kfree(stack);
+ kfree(lowertmp);
- ufs->workdir = ovl_workdir_create(sb, ufs, workpath.dentry,
- OVL_WORKDIR_NAME, false);
- /*
- * Upper should support d_type, else whiteouts are visible.
- * Given workdir and upper are on same fs, we can do
- * iterate_dir() on workdir. This check requires successful
- * creation of workdir in previous step.
- */
- if (ufs->workdir) {
- struct dentry *temp;
-
- err = ovl_check_d_type_supported(&workpath);
- if (err < 0)
- goto out_put_workdir;
-
- /*
- * We allowed this configuration and don't want to
- * break users over kernel upgrade. So warn instead
- * of erroring out.
- */
- if (!err)
- pr_warn("overlayfs: upper fs needs to support d_type.\n");
-
- /* Check if upper/work fs supports O_TMPFILE */
- temp = ovl_do_tmpfile(ufs->workdir, S_IFREG | 0);
- ufs->tmpfile = !IS_ERR(temp);
- if (ufs->tmpfile)
- dput(temp);
- else
- pr_warn("overlayfs: upper fs does not support tmpfile.\n");
-
- /*
- * Check if upper/work fs supports trusted.overlay.*
- * xattr
- */
- err = ovl_do_setxattr(ufs->workdir, OVL_XATTR_OPAQUE,
- "0", 1, 0);
- if (err) {
- ufs->noxattr = true;
- pr_warn("overlayfs: upper fs does not support xattr.\n");
- } else {
- vfs_removexattr(ufs->workdir, OVL_XATTR_OPAQUE);
- }
+ return oe;
- /* Check if upper/work fs supports file handles */
- if (ufs->config.index &&
- !ovl_can_decode_fh(ufs->workdir->d_sb)) {
- ufs->config.index = false;
- pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n");
- }
- }
- }
+out_err:
+ oe = ERR_PTR(err);
+ goto out;
+}
+
+static int ovl_fill_super(struct super_block *sb, void *data, int silent)
+{
+ struct path upperpath = { };
+ struct dentry *root_dentry;
+ struct ovl_entry *oe;
+ struct ovl_fs *ofs;
+ struct cred *cred;
+ int err;
err = -ENOMEM;
- ufs->lower_mnt = kcalloc(numlower, sizeof(struct vfsmount *), GFP_KERNEL);
- if (ufs->lower_mnt == NULL)
- goto out_put_workdir;
- for (i = 0; i < numlower; i++) {
- struct vfsmount *mnt = clone_private_mount(&stack[i]);
+ ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
+ if (!ofs)
+ goto out;
- err = PTR_ERR(mnt);
- if (IS_ERR(mnt)) {
- pr_err("overlayfs: failed to clone lowerpath\n");
- goto out_put_lower_mnt;
- }
- /*
- * Make lower_mnt R/O. That way fchmod/fchown on lower file
- * will fail instead of modifying lower fs.
- */
- mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;
+ ofs->creator_cred = cred = prepare_creds();
+ if (!cred)
+ goto out_err;
- ufs->lower_mnt[ufs->numlower] = mnt;
- ufs->numlower++;
+ ofs->config.redirect_dir = ovl_redirect_dir_def;
+ ofs->config.index = ovl_index_def;
+ err = ovl_parse_opt((char *) data, &ofs->config);
+ if (err)
+ goto out_err;
- /* Check if all lower layers are on same sb */
- if (i == 0)
- ufs->same_sb = mnt->mnt_sb;
- else if (ufs->same_sb != mnt->mnt_sb)
- ufs->same_sb = NULL;
+ err = -EINVAL;
+ if (!ofs->config.lowerdir) {
+ if (!silent)
+ pr_err("overlayfs: missing 'lowerdir'\n");
+ goto out_err;
}
- /* If the upper fs is nonexistent, we mark overlayfs r/o too */
- if (!ufs->upper_mnt)
- sb->s_flags |= MS_RDONLY;
- else if (ufs->upper_mnt->mnt_sb != ufs->same_sb)
- ufs->same_sb = NULL;
-
- if (!(ovl_force_readonly(ufs)) && ufs->config.index) {
- /* Verify lower root is upper root origin */
- err = ovl_verify_origin(upperpath.dentry, ufs->lower_mnt[0],
- stack[0].dentry, false, true);
- if (err) {
- pr_err("overlayfs: failed to verify upper root origin\n");
- goto out_put_lower_mnt;
+ sb->s_stack_depth = 0;
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
+ if (ofs->config.upperdir) {
+ if (!ofs->config.workdir) {
+ pr_err("overlayfs: missing 'workdir'\n");
+ goto out_err;
}
- ufs->indexdir = ovl_workdir_create(sb, ufs, workpath.dentry,
- OVL_INDEXDIR_NAME, true);
- if (ufs->indexdir) {
- /* Verify upper root is index dir origin */
- err = ovl_verify_origin(ufs->indexdir, ufs->upper_mnt,
- upperpath.dentry, true, true);
- if (err)
- pr_err("overlayfs: failed to verify index dir origin\n");
+ err = ovl_get_upper(ofs, &upperpath);
+ if (err)
+ goto out_err;
- /* Cleanup bad/stale/orphan index entries */
- if (!err)
- err = ovl_indexdir_cleanup(ufs->indexdir,
- ufs->upper_mnt,
- stack, numlower);
- }
- if (err || !ufs->indexdir)
- pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
+ err = ovl_get_workdir(ofs, &upperpath);
if (err)
- goto out_put_indexdir;
+ goto out_err;
+
+ if (!ofs->workdir)
+ sb->s_flags |= MS_RDONLY;
+
+ sb->s_stack_depth = ofs->upper_mnt->mnt_sb->s_stack_depth;
+ sb->s_time_gran = ofs->upper_mnt->mnt_sb->s_time_gran;
+
}
+ oe = ovl_get_lowerstack(sb, ofs);
+ err = PTR_ERR(oe);
+ if (IS_ERR(oe))
+ goto out_err;
- /* Show index=off/on in /proc/mounts for any of the reasons above */
- if (!ufs->indexdir)
- ufs->config.index = false;
+ /* If the upper fs is nonexistent, we mark overlayfs r/o too */
+ if (!ofs->upper_mnt)
+ sb->s_flags |= MS_RDONLY;
+ else if (ofs->upper_mnt->mnt_sb != ofs->same_sb)
+ ofs->same_sb = NULL;
- if (remote)
- sb->s_d_op = &ovl_reval_dentry_operations;
- else
- sb->s_d_op = &ovl_dentry_operations;
+ if (!(ovl_force_readonly(ofs)) && ofs->config.index) {
+ err = ovl_get_indexdir(ofs, oe, &upperpath);
+ if (err)
+ goto out_free_oe;
- err = -ENOMEM;
- ufs->creator_cred = cred = prepare_creds();
- if (!cred)
- goto out_put_indexdir;
+ if (!ofs->indexdir)
+ sb->s_flags |= MS_RDONLY;
+ }
+
+ /* Show index=off/on in /proc/mounts for any of the reasons above */
+ if (!ofs->indexdir)
+ ofs->config.index = false;
/* Never override disk quota limits or use reserved space */
cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
- err = -ENOMEM;
- oe = ovl_alloc_entry(numlower);
- if (!oe)
- goto out_put_cred;
-
sb->s_magic = OVERLAYFS_SUPER_MAGIC;
sb->s_op = &ovl_super_operations;
sb->s_xattr = ovl_xattr_handlers;
- sb->s_fs_info = ufs;
+ sb->s_fs_info = ofs;
sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK;
+ err = -ENOMEM;
root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
if (!root_dentry)
goto out_free_oe;
mntput(upperpath.mnt);
- for (i = 0; i < numlower; i++)
- mntput(stack[i].mnt);
- mntput(workpath.mnt);
- kfree(lowertmp);
-
if (upperpath.dentry) {
oe->has_upper = true;
if (ovl_is_impuredir(upperpath.dentry))
ovl_set_flag(OVL_IMPURE, d_inode(root_dentry));
}
- for (i = 0; i < numlower; i++) {
- oe->lowerstack[i].dentry = stack[i].dentry;
- oe->lowerstack[i].mnt = ufs->lower_mnt[i];
- }
- kfree(stack);
root_dentry->d_fsdata = oe;
+ /* Root is always merge -> can have whiteouts */
+ ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry));
ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
ovl_dentry_lower(root_dentry));
@@ -1149,39 +1253,11 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
return 0;
out_free_oe:
+ ovl_entry_stack_free(oe);
kfree(oe);
-out_put_cred:
- put_cred(ufs->creator_cred);
-out_put_indexdir:
- dput(ufs->indexdir);
-out_put_lower_mnt:
- for (i = 0; i < ufs->numlower; i++)
- mntput(ufs->lower_mnt[i]);
- kfree(ufs->lower_mnt);
-out_put_workdir:
- dput(ufs->workdir);
- mntput(ufs->upper_mnt);
-out_put_lowerpath:
- for (i = 0; i < numlower; i++)
- path_put(&stack[i]);
- kfree(stack);
-out_free_lowertmp:
- kfree(lowertmp);
-out_unlock_workdentry:
- if (ufs->workdir_locked)
- ovl_inuse_unlock(workpath.dentry);
-out_put_workpath:
- path_put(&workpath);
-out_unlock_upperdentry:
- if (ufs->upperdir_locked)
- ovl_inuse_unlock(upperpath.dentry);
-out_put_upperpath:
+out_err:
path_put(&upperpath);
-out_free_config:
- kfree(ufs->config.lowerdir);
- kfree(ufs->config.upperdir);
- kfree(ufs->config.workdir);
- kfree(ufs);
+ ovl_free_fs(ofs);
out:
return err;
}
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index b9b239fa5cfd..d6bb1c9f5e7a 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -17,7 +17,6 @@
#include <linux/namei.h>
#include <linux/ratelimit.h>
#include "overlayfs.h"
-#include "ovl_entry.h"
int ovl_want_write(struct dentry *dentry)
{
@@ -125,7 +124,12 @@ void ovl_path_lower(struct dentry *dentry, struct path *path)
{
struct ovl_entry *oe = dentry->d_fsdata;
- *path = oe->numlower ? oe->lowerstack[0] : (struct path) { };
+ if (oe->numlower) {
+ path->mnt = oe->lowerstack[0].layer->mnt;
+ path->dentry = oe->lowerstack[0].dentry;
+ } else {
+ *path = (struct path) { };
+ }
}
enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
@@ -329,6 +333,19 @@ void ovl_copy_up_end(struct dentry *dentry)
mutex_unlock(&OVL_I(d_inode(dentry))->lock);
}
+bool ovl_check_origin_xattr(struct dentry *dentry)
+{
+ int res;
+
+ res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0);
+
+ /* Zero size value means "copied up but origin unknown" */
+ if (res >= 0)
+ return true;
+
+ return false;
+}
+
bool ovl_check_dir_xattr(struct dentry *dentry, const char *name)
{
int res;
diff --git a/fs/proc/cpuinfo.c b/fs/proc/cpuinfo.c
index e0f867cd8553..96f1087e372c 100644
--- a/fs/proc/cpuinfo.c
+++ b/fs/proc/cpuinfo.c
@@ -1,12 +1,18 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/cpufreq.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+__weak void arch_freq_prepare_all(void)
+{
+}
+
extern const struct seq_operations cpuinfo_op;
static int cpuinfo_open(struct inode *inode, struct file *file)
{
+ arch_freq_prepare_all();
return seq_open(file, &cpuinfo_op);
}
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 086e491faf04..423159abd501 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -651,7 +651,7 @@ static int pstore_write_user_compat(struct pstore_record *record,
return -EINVAL;
record->buf = memdup_user(buf, record->size);
- if (unlikely(IS_ERR(record->buf))) {
+ if (IS_ERR(record->buf)) {
ret = PTR_ERR(record->buf);
goto out;
}
diff --git a/fs/read_write.c b/fs/read_write.c
index 0046d72efe94..f8547b82dfb3 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -635,27 +635,6 @@ SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf,
return ret;
}
-/*
- * Reduce an iovec's length in-place. Return the resulting number of segments
- */
-unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
-{
- unsigned long seg = 0;
- size_t len = 0;
-
- while (seg < nr_segs) {
- seg++;
- if (len + iov->iov_len >= to) {
- iov->iov_len = to - len;
- break;
- }
- len += iov->iov_len;
- iov++;
- }
- return seg;
-}
-EXPORT_SYMBOL(iov_shorten);
-
static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
loff_t *ppos, int type, rwf_t flags)
{
diff --git a/fs/select.c b/fs/select.c
index 063067e606ca..6de493bb42a4 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -292,8 +292,7 @@ static int poll_select_copy_remaining(struct timespec64 *end_time,
void __user *p,
int timeval, int ret)
{
- struct timespec64 rts64;
- struct timespec rts;
+ struct timespec64 rts;
struct timeval rtv;
if (!p)
@@ -306,23 +305,22 @@ static int poll_select_copy_remaining(struct timespec64 *end_time,
if (!end_time->tv_sec && !end_time->tv_nsec)
return ret;
- ktime_get_ts64(&rts64);
- rts64 = timespec64_sub(*end_time, rts64);
- if (rts64.tv_sec < 0)
- rts64.tv_sec = rts64.tv_nsec = 0;
+ ktime_get_ts64(&rts);
+ rts = timespec64_sub(*end_time, rts);
+ if (rts.tv_sec < 0)
+ rts.tv_sec = rts.tv_nsec = 0;
- rts = timespec64_to_timespec(rts64);
if (timeval) {
if (sizeof(rtv) > sizeof(rtv.tv_sec) + sizeof(rtv.tv_usec))
memset(&rtv, 0, sizeof(rtv));
- rtv.tv_sec = rts64.tv_sec;
- rtv.tv_usec = rts64.tv_nsec / NSEC_PER_USEC;
+ rtv.tv_sec = rts.tv_sec;
+ rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
if (!copy_to_user(p, &rtv, sizeof(rtv)))
return ret;
- } else if (!copy_to_user(p, &rts, sizeof(rts)))
+ } else if (!put_timespec64(&rts, p))
return ret;
/*
@@ -705,17 +703,15 @@ static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp,
const sigset_t __user *sigmask, size_t sigsetsize)
{
sigset_t ksigmask, sigsaved;
- struct timespec ts;
- struct timespec64 ts64, end_time, *to = NULL;
+ struct timespec64 ts, end_time, *to = NULL;
int ret;
if (tsp) {
- if (copy_from_user(&ts, tsp, sizeof(ts)))
+ if (get_timespec64(&ts, tsp))
return -EFAULT;
- ts64 = timespec_to_timespec64(ts);
to = &end_time;
- if (poll_select_set_timeout(to, ts64.tv_sec, ts64.tv_nsec))
+ if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
return -EINVAL;
}
@@ -1052,12 +1048,11 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,
size_t, sigsetsize)
{
sigset_t ksigmask, sigsaved;
- struct timespec ts;
- struct timespec64 end_time, *to = NULL;
+ struct timespec64 ts, end_time, *to = NULL;
int ret;
if (tsp) {
- if (copy_from_user(&ts, tsp, sizeof(ts)))
+ if (get_timespec64(&ts, tsp))
return -EFAULT;
to = &end_time;
@@ -1103,10 +1098,10 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,
#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t))
static
-int compat_poll_select_copy_remaining(struct timespec *end_time, void __user *p,
+int compat_poll_select_copy_remaining(struct timespec64 *end_time, void __user *p,
int timeval, int ret)
{
- struct timespec ts;
+ struct timespec64 ts;
if (!p)
return ret;
@@ -1118,8 +1113,8 @@ int compat_poll_select_copy_remaining(struct timespec *end_time, void __user *p,
if (!end_time->tv_sec && !end_time->tv_nsec)
return ret;
- ktime_get_ts(&ts);
- ts = timespec_sub(*end_time, ts);
+ ktime_get_ts64(&ts);
+ ts = timespec64_sub(*end_time, ts);
if (ts.tv_sec < 0)
ts.tv_sec = ts.tv_nsec = 0;
@@ -1132,12 +1127,7 @@ int compat_poll_select_copy_remaining(struct timespec *end_time, void __user *p,
if (!copy_to_user(p, &rtv, sizeof(rtv)))
return ret;
} else {
- struct compat_timespec rts;
-
- rts.tv_sec = ts.tv_sec;
- rts.tv_nsec = ts.tv_nsec;
-
- if (!copy_to_user(p, &rts, sizeof(rts)))
+ if (!compat_put_timespec64(&ts, p))
return ret;
}
/*
@@ -1195,7 +1185,7 @@ int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
*/
static int compat_core_sys_select(int n, compat_ulong_t __user *inp,
compat_ulong_t __user *outp, compat_ulong_t __user *exp,
- struct timespec *end_time)
+ struct timespec64 *end_time)
{
fd_set_bits fds;
void *bits;
@@ -1268,7 +1258,7 @@ COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp,
compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
struct compat_timeval __user *, tvp)
{
- struct timespec end_time, *to = NULL;
+ struct timespec64 end_time, *to = NULL;
struct compat_timeval tv;
int ret;
@@ -1312,14 +1302,12 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp,
struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask,
compat_size_t sigsetsize)
{
- compat_sigset_t ss32;
sigset_t ksigmask, sigsaved;
- struct compat_timespec ts;
- struct timespec end_time, *to = NULL;
+ struct timespec64 ts, end_time, *to = NULL;
int ret;
if (tsp) {
- if (copy_from_user(&ts, tsp, sizeof(ts)))
+ if (compat_get_timespec64(&ts, tsp))
return -EFAULT;
to = &end_time;
@@ -1330,9 +1318,8 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp,
if (sigmask) {
if (sigsetsize != sizeof(compat_sigset_t))
return -EINVAL;
- if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
+ if (get_compat_sigset(&ksigmask, sigmask))
return -EFAULT;
- sigset_from_compat(&ksigmask, &ss32);
sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
@@ -1381,14 +1368,12 @@ COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds,
unsigned int, nfds, struct compat_timespec __user *, tsp,
const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize)
{
- compat_sigset_t ss32;
sigset_t ksigmask, sigsaved;
- struct compat_timespec ts;
- struct timespec end_time, *to = NULL;
+ struct timespec64 ts, end_time, *to = NULL;
int ret;
if (tsp) {
- if (copy_from_user(&ts, tsp, sizeof(ts)))
+ if (compat_get_timespec64(&ts, tsp))
return -EFAULT;
to = &end_time;
@@ -1399,9 +1384,8 @@ COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds,
if (sigmask) {
if (sigsetsize != sizeof(compat_sigset_t))
return -EINVAL;
- if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
+ if (get_compat_sigset(&ksigmask, sigmask))
return -EFAULT;
- sigset_from_compat(&ksigmask, &ss32);
sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 1c667af86da5..5f1ff8756595 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -313,15 +313,13 @@ COMPAT_SYSCALL_DEFINE4(signalfd4, int, ufd,
compat_size_t, sigsetsize,
int, flags)
{
- compat_sigset_t ss32;
sigset_t tmp;
sigset_t __user *ksigmask;
if (sigsetsize != sizeof(compat_sigset_t))
return -EINVAL;
- if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
+ if (get_compat_sigset(&tmp, sigmask))
return -EFAULT;
- sigset_from_compat(&tmp, &ss32);
ksigmask = compat_alloc_user_space(sizeof(sigset_t));
if (copy_to_user(ksigmask, &tmp, sizeof(sigset_t)))
return -EFAULT;
diff --git a/fs/statfs.c b/fs/statfs.c
index c25dd9a26cc1..b072a8bab71a 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -217,7 +217,7 @@ SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user
return error;
}
-int vfs_ustat(dev_t dev, struct kstatfs *sbuf)
+static int vfs_ustat(dev_t dev, struct kstatfs *sbuf)
{
struct super_block *s = user_get_super(dev);
int err;
diff --git a/fs/super.c b/fs/super.c
index 994db21f59bf..d4e33e8f1e6f 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -155,21 +155,19 @@ static void destroy_super_rcu(struct rcu_head *head)
schedule_work(&s->destroy_work);
}
-/**
- * destroy_super - frees a superblock
- * @s: superblock to free
- *
- * Frees a superblock.
- */
-static void destroy_super(struct super_block *s)
+/* Free a superblock that has never been seen by anyone */
+static void destroy_unused_super(struct super_block *s)
{
+ if (!s)
+ return;
+ up_write(&s->s_umount);
list_lru_destroy(&s->s_dentry_lru);
list_lru_destroy(&s->s_inode_lru);
security_sb_free(s);
- WARN_ON(!list_empty(&s->s_mounts));
put_user_ns(s->s_user_ns);
kfree(s->s_subtype);
- call_rcu(&s->rcu, destroy_super_rcu);
+ /* no delays needed */
+ destroy_super_work(&s->destroy_work);
}
/**
@@ -257,7 +255,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
return s;
fail:
- destroy_super(s);
+ destroy_unused_super(s);
return NULL;
}
@@ -266,11 +264,17 @@ fail:
/*
* Drop a superblock's refcount. The caller must hold sb_lock.
*/
-static void __put_super(struct super_block *sb)
+static void __put_super(struct super_block *s)
{
- if (!--sb->s_count) {
- list_del_init(&sb->s_list);
- destroy_super(sb);
+ if (!--s->s_count) {
+ list_del_init(&s->s_list);
+ WARN_ON(s->s_dentry_lru.node);
+ WARN_ON(s->s_inode_lru.node);
+ WARN_ON(!list_empty(&s->s_mounts));
+ security_sb_free(s);
+ put_user_ns(s->s_user_ns);
+ kfree(s->s_subtype);
+ call_rcu(&s->rcu, destroy_super_rcu);
}
}
@@ -485,19 +489,12 @@ retry:
continue;
if (user_ns != old->s_user_ns) {
spin_unlock(&sb_lock);
- if (s) {
- up_write(&s->s_umount);
- destroy_super(s);
- }
+ destroy_unused_super(s);
return ERR_PTR(-EBUSY);
}
if (!grab_super(old))
goto retry;
- if (s) {
- up_write(&s->s_umount);
- destroy_super(s);
- s = NULL;
- }
+ destroy_unused_super(s);
return old;
}
}
@@ -512,8 +509,7 @@ retry:
err = set(s, data);
if (err) {
spin_unlock(&sb_lock);
- up_write(&s->s_umount);
- destroy_super(s);
+ destroy_unused_super(s);
return ERR_PTR(err);
}
s->s_type = type;
diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c
index 343a94246f5b..19e546a41251 100644
--- a/fs/xfs/libxfs/xfs_iext_tree.c
+++ b/fs/xfs/libxfs/xfs_iext_tree.c
@@ -302,7 +302,7 @@ xfs_iext_rec_cmp(
xfs_fileoff_t offset)
{
uint64_t rec_offset = rec->lo & XFS_IEXT_STARTOFF_MASK;
- u32 rec_len = rec->hi & XFS_IEXT_LENGTH_MASK;
+ uint32_t rec_len = rec->hi & XFS_IEXT_LENGTH_MASK;
if (rec_offset > offset)
return 1;
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index 996f035ee205..349d9f8edb89 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -274,7 +274,7 @@ struct xfs_inode_log_format {
uint64_t ilf_ino; /* inode number */
union {
uint32_t ilfu_rdev; /* rdev value for dev inode*/
- u8 __pad[16]; /* unused */
+ uint8_t __pad[16]; /* unused */
} ilf_u;
int64_t ilf_blkno; /* blkno of inode buffer */
int32_t ilf_len; /* len of inode buffer */
@@ -295,7 +295,7 @@ struct xfs_inode_log_format_32 {
uint64_t ilf_ino; /* inode number */
union {
uint32_t ilfu_rdev; /* rdev value for dev inode*/
- u8 __pad[16]; /* unused */
+ uint8_t __pad[16]; /* unused */
} ilf_u;
int64_t ilf_blkno; /* blkno of inode buffer */
int32_t ilf_len; /* len of inode buffer */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index d8226f7a5dde..61d1cb7dc10d 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2357,6 +2357,7 @@ retry:
*/
if (ip->i_ino != inum + i) {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ rcu_read_unlock();
continue;
}
}