From 3e004eea56b4f2cb6768ebe9000c830eda0c71b1 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Tue, 17 Nov 2015 14:40:09 +0800 Subject: fs/block_dev.c: make sb_is_blkdev_sb return bool when CONFIG_BLOCK undefined Currently when CONFIG_BLOCK is defined sb_is_blkdev_sb returns bool, while when CONFIG_BLOCK is not defined it returns int. Let's keep consistent to make sb_is_blkdev_sb return bool as well when CONFIG_BLOCK isn't defined. No functional change. Signed-off-by: Yaowei Bai Signed-off-by: Al Viro --- include/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3aa514254161..11505af0d358 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2291,9 +2291,9 @@ static inline void iterate_bdevs(void (*f)(struct block_device *, void *), void { } -static inline int sb_is_blkdev_sb(struct super_block *sb) +static inline bool sb_is_blkdev_sb(struct super_block *sb) { - return 0; + return false; } #endif extern int sync_filesystem(struct super_block *); -- cgit v1.2.3 From 25ab4c9b1ccb64b1433cecd3f19f28fe300c1576 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Tue, 17 Nov 2015 14:40:10 +0800 Subject: fs/namespace.c: path_is_under can be boolean This patch makes path_is_under return bool to improve readability due to this particular function only using either one or zero as its return value. No functional change. Signed-off-by: Yaowei Bai Signed-off-by: Al Viro --- fs/namespace.c | 4 ++-- include/linux/fs.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/namespace.c b/fs/namespace.c index 0570729c87fd..b27156f2e68b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2939,9 +2939,9 @@ bool is_path_reachable(struct mount *mnt, struct dentry *dentry, return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry); } -int path_is_under(struct path *path1, struct path *path2) +bool path_is_under(struct path *path1, struct path *path2) { - int res; + bool res; read_seqlock_excl(&mount_lock); res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2); read_sequnlock_excl(&mount_lock); diff --git a/include/linux/fs.h b/include/linux/fs.h index 11505af0d358..aab8094656e4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2533,7 +2533,7 @@ extern struct file * open_exec(const char *); /* fs/dcache.c -- generic fs support functions */ extern int is_subdir(struct dentry *, struct dentry *); -extern int path_is_under(struct path *, struct path *); +extern bool path_is_under(struct path *, struct path *); extern char *file_path(struct file *, char *, int); -- cgit v1.2.3 From a6e5787fc8fc9c88290a7bceed07aa4d14029fa7 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Tue, 17 Nov 2015 14:40:11 +0800 Subject: fs/dcache.c: is_subdir can be boolean This patch makes is_subdir return bool to improve readability due to this particular function only using either one or zero as its return value. No functional change. Signed-off-by: Yaowei Bai Signed-off-by: Al Viro --- fs/dcache.c | 14 +++++++------- include/linux/fs.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/fs/dcache.c b/fs/dcache.c index 5c33aeb0f68f..670f7896945b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -3303,18 +3303,18 @@ out: * @new_dentry: new dentry * @old_dentry: old dentry * - * Returns 1 if new_dentry is a subdirectory of the parent (at any depth). - * Returns 0 otherwise. + * Returns true if new_dentry is a subdirectory of the parent (at any depth). + * Returns false otherwise. * Caller must ensure that "new_dentry" is pinned before calling is_subdir() */ -int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) +bool is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) { - int result; + bool result; unsigned seq; if (new_dentry == old_dentry) - return 1; + return true; do { /* for restarting inner loop in case of seq retry */ @@ -3325,9 +3325,9 @@ int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) */ rcu_read_lock(); if (d_ancestor(old_dentry, new_dentry)) - result = 1; + result = true; else - result = 0; + result = false; rcu_read_unlock(); } while (read_seqretry(&rename_lock, seq)); diff --git a/include/linux/fs.h b/include/linux/fs.h index aab8094656e4..4b23def18aa9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2532,7 +2532,7 @@ extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *); extern struct file * open_exec(const char *); /* fs/dcache.c -- generic fs support functions */ -extern int is_subdir(struct dentry *, struct dentry *); +extern bool is_subdir(struct dentry *, struct dentry *); extern bool path_is_under(struct path *, struct path *); extern char *file_path(struct file *, char *, int); -- cgit v1.2.3 From 0e3ef1fe453c9f29c30d040cd2559c5cac724e93 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Thu, 19 Nov 2015 21:00:11 +0800 Subject: fs/bad_inode.c: is_bad_inode can be boolean This patch makes is_bad_inode return bool to improve readability due to this particular function only using either one or zero as its return value. No functional change. Signed-off-by: Yaowei Bai Signed-off-by: Al Viro --- fs/bad_inode.c | 2 +- include/linux/fs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 861b1e1c4777..103f5d7c3083 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -192,7 +192,7 @@ EXPORT_SYMBOL(make_bad_inode); * Returns true if the inode in question has been marked as bad. */ -int is_bad_inode(struct inode *inode) +bool is_bad_inode(struct inode *inode) { return (inode->i_op == &bad_inode_ops); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 4b23def18aa9..6b33ac447612 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2371,7 +2371,7 @@ extern void init_special_inode(struct inode *, umode_t, dev_t); /* Invalid inode operations -- fs/bad_inode.c */ extern void make_bad_inode(struct inode *); -extern int is_bad_inode(struct inode *); +extern bool is_bad_inode(struct inode *); #ifdef CONFIG_BLOCK /* -- cgit v1.2.3 From d37177bacdf7cbcdb23a513cbb475fa241f8083c Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Thu, 19 Nov 2015 21:00:12 +0800 Subject: fs/attr.c: is_sxid can be boolean This patch makes is_sxid return bool to improve readability due to this particular function only using either one or zero as its return value. No functional change. Signed-off-by: Yaowei Bai Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6b33ac447612..bd1447661e3c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2963,7 +2963,7 @@ int __init get_filesystem_list(char *buf); #define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \ (flag & __FMODE_NONOTIFY))) -static inline int is_sxid(umode_t mode) +static inline bool is_sxid(umode_t mode) { return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP)); } -- cgit v1.2.3 From b25472f9b96159cc0b9b7ed449448805973cd789 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 5 Dec 2015 22:04:48 -0500 Subject: new helpers: no_seek_end_llseek{,_size}() Signed-off-by: Al Viro --- arch/sparc/kernel/mdesc.c | 20 ++-------------- arch/x86/kernel/cpuid.c | 24 +------------------- arch/x86/kernel/msr.c | 24 +------------------- drivers/char/nwflash.c | 31 +------------------------ drivers/net/wireless/ti/wlcore/debugfs.c | 17 +------------- drivers/s390/char/vmur.c | 15 +----------- drivers/s390/char/zcore.c | 13 +---------- drivers/usb/core/devices.c | 26 +-------------------- drivers/usb/core/devio.c | 26 +-------------------- drivers/usb/host/uhci-debug.c | 23 ++----------------- drivers/usb/misc/sisusbvga/sisusb.c | 16 +------------ fs/read_write.c | 39 ++++++++++++++++++++++++++++++++ include/linux/fs.h | 2 ++ 13 files changed, 54 insertions(+), 222 deletions(-) (limited to 'include') diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index 6f80936e0eea..11228861d9b4 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -1033,25 +1033,9 @@ static ssize_t mdesc_read(struct file *file, char __user *buf, static loff_t mdesc_llseek(struct file *file, loff_t offset, int whence) { - struct mdesc_handle *hp; - - switch (whence) { - case SEEK_CUR: - offset += file->f_pos; - break; - case SEEK_SET: - break; - default: - return -EINVAL; - } - - hp = file->private_data; - if (offset > hp->handle_size) - return -EINVAL; - else - file->f_pos = offset; + struct mdesc_handle *hp = file->private_data; - return offset; + return no_seek_end_llseek_size(file, offset, whence, hp->handle_size); } /* mdesc_close() - /dev/mdesc is being closed, release the reference to diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index bd3507da39f0..2836de390f95 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -58,28 +58,6 @@ static void cpuid_smp_cpuid(void *cmd_block) &cmd->eax, &cmd->ebx, &cmd->ecx, &cmd->edx); } -static loff_t cpuid_seek(struct file *file, loff_t offset, int orig) -{ - loff_t ret; - struct inode *inode = file->f_mapping->host; - - mutex_lock(&inode->i_mutex); - switch (orig) { - case 0: - file->f_pos = offset; - ret = file->f_pos; - break; - case 1: - file->f_pos += offset; - ret = file->f_pos; - break; - default: - ret = -EINVAL; - } - mutex_unlock(&inode->i_mutex); - return ret; -} - static ssize_t cpuid_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -132,7 +110,7 @@ static int cpuid_open(struct inode *inode, struct file *file) */ static const struct file_operations cpuid_fops = { .owner = THIS_MODULE, - .llseek = cpuid_seek, + .llseek = no_seek_end_llseek, .read = cpuid_read, .open = cpuid_open, }; diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 113e70784854..64f9616f93f1 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -45,28 +45,6 @@ static struct class *msr_class; -static loff_t msr_seek(struct file *file, loff_t offset, int orig) -{ - loff_t ret; - struct inode *inode = file_inode(file); - - mutex_lock(&inode->i_mutex); - switch (orig) { - case SEEK_SET: - file->f_pos = offset; - ret = file->f_pos; - break; - case SEEK_CUR: - file->f_pos += offset; - ret = file->f_pos; - break; - default: - ret = -EINVAL; - } - mutex_unlock(&inode->i_mutex); - return ret; -} - static ssize_t msr_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -194,7 +172,7 @@ static int msr_open(struct inode *inode, struct file *file) */ static const struct file_operations msr_fops = { .owner = THIS_MODULE, - .llseek = msr_seek, + .llseek = no_seek_end_llseek, .read = msr_read, .write = msr_write, .open = msr_open, diff --git a/drivers/char/nwflash.c b/drivers/char/nwflash.c index e371480d3639..dbe598de9b74 100644 --- a/drivers/char/nwflash.c +++ b/drivers/char/nwflash.c @@ -277,36 +277,7 @@ static loff_t flash_llseek(struct file *file, loff_t offset, int orig) printk(KERN_DEBUG "flash_llseek: offset=0x%X, orig=0x%X.\n", (unsigned int) offset, orig); - switch (orig) { - case 0: - if (offset < 0) { - ret = -EINVAL; - break; - } - - if ((unsigned int) offset > gbFlashSize) { - ret = -EINVAL; - break; - } - - file->f_pos = (unsigned int) offset; - ret = file->f_pos; - break; - case 1: - if ((file->f_pos + offset) > gbFlashSize) { - ret = -EINVAL; - break; - } - if ((file->f_pos + offset) < 0) { - ret = -EINVAL; - break; - } - file->f_pos += offset; - ret = file->f_pos; - break; - default: - ret = -EINVAL; - } + ret = no_seek_end_llseek_size(file, offset, orig, gbFlashSize); mutex_unlock(&flash_mutex); return ret; } diff --git a/drivers/net/wireless/ti/wlcore/debugfs.c b/drivers/net/wireless/ti/wlcore/debugfs.c index eb43f94a1597..be72306f8c69 100644 --- a/drivers/net/wireless/ti/wlcore/debugfs.c +++ b/drivers/net/wireless/ti/wlcore/debugfs.c @@ -1205,26 +1205,11 @@ err_out: static loff_t dev_mem_seek(struct file *file, loff_t offset, int orig) { - loff_t ret; - /* only requests of dword-aligned size and offset are supported */ if (offset % 4) return -EINVAL; - switch (orig) { - case SEEK_SET: - file->f_pos = offset; - ret = file->f_pos; - break; - case SEEK_CUR: - file->f_pos += offset; - ret = file->f_pos; - break; - default: - ret = -EINVAL; - } - - return ret; + return no_seek_end_llseek(file, offset, orig); } static const struct file_operations dev_mem_ops = { diff --git a/drivers/s390/char/vmur.c b/drivers/s390/char/vmur.c index 0efb27f6f199..6c30e93ab8fa 100644 --- a/drivers/s390/char/vmur.c +++ b/drivers/s390/char/vmur.c @@ -782,24 +782,11 @@ static int ur_release(struct inode *inode, struct file *file) static loff_t ur_llseek(struct file *file, loff_t offset, int whence) { - loff_t newpos; - if ((file->f_flags & O_ACCMODE) != O_RDONLY) return -ESPIPE; /* seek allowed only for reader */ if (offset % PAGE_SIZE) return -ESPIPE; /* only multiples of 4K allowed */ - switch (whence) { - case 0: /* SEEK_SET */ - newpos = offset; - break; - case 1: /* SEEK_CUR */ - newpos = file->f_pos + offset; - break; - default: - return -EINVAL; - } - file->f_pos = newpos; - return newpos; + return no_seek_end_llseek(file, offset, whence); } static const struct file_operations ur_fops = { diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 823f41fc4bbd..3339b862ec17 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -385,18 +385,7 @@ static loff_t zcore_lseek(struct file *file, loff_t offset, int orig) loff_t rc; mutex_lock(&zcore_mutex); - switch (orig) { - case 0: - file->f_pos = offset; - rc = file->f_pos; - break; - case 1: - file->f_pos += offset; - rc = file->f_pos; - break; - default: - rc = -EINVAL; - } + rc = no_seek_end_llseek(file, offset, orig); mutex_unlock(&zcore_mutex); return rc; } diff --git a/drivers/usb/core/devices.c b/drivers/usb/core/devices.c index 2a3bbdf7eb94..cffa0a0d7de2 100644 --- a/drivers/usb/core/devices.c +++ b/drivers/usb/core/devices.c @@ -661,32 +661,8 @@ static unsigned int usb_device_poll(struct file *file, return 0; } -static loff_t usb_device_lseek(struct file *file, loff_t offset, int orig) -{ - loff_t ret; - - mutex_lock(&file_inode(file)->i_mutex); - - switch (orig) { - case 0: - file->f_pos = offset; - ret = file->f_pos; - break; - case 1: - file->f_pos += offset; - ret = file->f_pos; - break; - case 2: - default: - ret = -EINVAL; - } - - mutex_unlock(&file_inode(file)->i_mutex); - return ret; -} - const struct file_operations usbfs_devices_fops = { - .llseek = usb_device_lseek, + .llseek = no_seek_end_llseek, .read = usb_device_read, .poll = usb_device_poll, }; diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 38ae877c46e3..dbc3e143453a 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -157,30 +157,6 @@ static int connected(struct usb_dev_state *ps) ps->dev->state != USB_STATE_NOTATTACHED); } -static loff_t usbdev_lseek(struct file *file, loff_t offset, int orig) -{ - loff_t ret; - - mutex_lock(&file_inode(file)->i_mutex); - - switch (orig) { - case 0: - file->f_pos = offset; - ret = file->f_pos; - break; - case 1: - file->f_pos += offset; - ret = file->f_pos; - break; - case 2: - default: - ret = -EINVAL; - } - - mutex_unlock(&file_inode(file)->i_mutex); - return ret; -} - static ssize_t usbdev_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { @@ -2366,7 +2342,7 @@ static unsigned int usbdev_poll(struct file *file, const struct file_operations usbdev_file_operations = { .owner = THIS_MODULE, - .llseek = usbdev_lseek, + .llseek = no_seek_end_llseek, .read = usbdev_read, .poll = usbdev_poll, .unlocked_ioctl = usbdev_ioctl, diff --git a/drivers/usb/host/uhci-debug.c b/drivers/usb/host/uhci-debug.c index 1b28a000d5c6..9c6635d43db0 100644 --- a/drivers/usb/host/uhci-debug.c +++ b/drivers/usb/host/uhci-debug.c @@ -584,27 +584,8 @@ static int uhci_debug_open(struct inode *inode, struct file *file) static loff_t uhci_debug_lseek(struct file *file, loff_t off, int whence) { - struct uhci_debug *up; - loff_t new = -1; - - up = file->private_data; - - /* - * XXX: atomic 64bit seek access, but that needs to be fixed in the VFS - */ - switch (whence) { - case 0: - new = off; - break; - case 1: - new = file->f_pos + off; - break; - } - - if (new < 0 || new > up->size) - return -EINVAL; - - return (file->f_pos = new); + struct uhci_debug *up = file->private_data; + return no_seek_end_llseek_size(file, off, whence, up->size); } static ssize_t uhci_debug_read(struct file *file, char __user *buf, diff --git a/drivers/usb/misc/sisusbvga/sisusb.c b/drivers/usb/misc/sisusbvga/sisusb.c index 306d6852ebc7..8efbabacc84e 100644 --- a/drivers/usb/misc/sisusbvga/sisusb.c +++ b/drivers/usb/misc/sisusbvga/sisusb.c @@ -2825,21 +2825,7 @@ sisusb_lseek(struct file *file, loff_t offset, int orig) return -ENODEV; } - switch (orig) { - case 0: - file->f_pos = offset; - ret = file->f_pos; - /* never negative, no force_successful_syscall needed */ - break; - case 1: - file->f_pos += offset; - ret = file->f_pos; - /* never negative, no force_successful_syscall needed */ - break; - default: - /* seeking relative to "end of file" is not supported */ - ret = -EINVAL; - } + ret = no_seek_end_llseek(file, offset, orig); mutex_unlock(&sisusb->lock); return ret; diff --git a/fs/read_write.c b/fs/read_write.c index 819ef3faf1bb..acb171331278 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -170,6 +170,45 @@ loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t si } EXPORT_SYMBOL(fixed_size_llseek); +/** + * no_seek_end_llseek - llseek implementation for fixed-sized devices + * @file: file structure to seek on + * @offset: file offset to seek to + * @whence: type of seek + * + */ +loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence) +{ + switch (whence) { + case SEEK_SET: case SEEK_CUR: + return generic_file_llseek_size(file, offset, whence, + ~0ULL, 0); + default: + return -EINVAL; + } +} +EXPORT_SYMBOL(no_seek_end_llseek); + +/** + * no_seek_end_llseek_size - llseek implementation for fixed-sized devices + * @file: file structure to seek on + * @offset: file offset to seek to + * @whence: type of seek + * @size: maximal offset allowed + * + */ +loff_t no_seek_end_llseek_size(struct file *file, loff_t offset, int whence, loff_t size) +{ + switch (whence) { + case SEEK_SET: case SEEK_CUR: + return generic_file_llseek_size(file, offset, whence, + size, 0); + default: + return -EINVAL; + } +} +EXPORT_SYMBOL(no_seek_end_llseek_size); + /** * noop_llseek - No Operation Performed llseek implementation * @file: file structure to seek on diff --git a/include/linux/fs.h b/include/linux/fs.h index bd1447661e3c..fb0fa224d8e8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2660,6 +2660,8 @@ extern loff_t generic_file_llseek_size(struct file *file, loff_t offset, int whence, loff_t maxsize, loff_t eof); extern loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size); +extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t); +extern loff_t no_seek_end_llseek(struct file *, loff_t, int); extern int generic_file_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp); -- cgit v1.2.3 From e9d408e107db9a554b36c3a79f67b37dd3e16da0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 24 Dec 2015 00:06:05 -0500 Subject: new helper: memdup_user_nul() Similar to memdup_user(), except that allocated buffer is one byte longer and '\0' is stored after the copied data. Signed-off-by: Al Viro --- include/linux/string.h | 1 + mm/util.c | 31 +++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) (limited to 'include') diff --git a/include/linux/string.h b/include/linux/string.h index 9ef7795e65e4..9eebc66d957a 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -10,6 +10,7 @@ extern char *strndup_user(const char __user *, long); extern void *memdup_user(const void __user *, size_t); +extern void *memdup_user_nul(const void __user *, size_t); /* * Include machine specific inline routines diff --git a/mm/util.c b/mm/util.c index 9af1c12b310c..2d28f7930043 100644 --- a/mm/util.c +++ b/mm/util.c @@ -176,6 +176,37 @@ char *strndup_user(const char __user *s, long n) } EXPORT_SYMBOL(strndup_user); +/** + * memdup_user_nul - duplicate memory region from user space and NUL-terminate + * + * @src: source address in user space + * @len: number of bytes to copy + * + * Returns an ERR_PTR() on failure. + */ +void *memdup_user_nul(const void __user *src, size_t len) +{ + char *p; + + /* + * Always use GFP_KERNEL, since copy_from_user() can sleep and + * cause pagefault, which makes it pointless to use GFP_NOFS + * or GFP_ATOMIC. + */ + p = kmalloc_track_caller(len + 1, GFP_KERNEL); + if (!p) + return ERR_PTR(-ENOMEM); + + if (copy_from_user(p, src, len)) { + kfree(p); + return ERR_PTR(-EFAULT); + } + p[len] = '\0'; + + return p; +} +EXPORT_SYMBOL(memdup_user_nul); + void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev, struct rb_node *rb_parent) { -- cgit v1.2.3 From bbddca8e8fac07ece3938e03526b5d00fa791a4c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jan 2016 16:08:20 -0500 Subject: nfsd: don't hold i_mutex over userspace upcalls We need information about exports when crossing mountpoints during lookup or NFSv4 readdir. If we don't already have that information cached, we may have to ask (and wait for) rpc.mountd. In both cases we currently hold the i_mutex on the parent of the directory we're asking rpc.mountd about. We've seen situations where rpc.mountd performs some operation on that directory that tries to take the i_mutex again, resulting in deadlock. With some care, we may be able to avoid that in rpc.mountd. But it seems better just to avoid holding a mutex while waiting on userspace. It appears that lookup_one_len is pretty much the only operation that needs the i_mutex. So we could just drop the i_mutex elsewhere and do something like mutex_lock() lookup_one_len() mutex_unlock() In many cases though the lookup would have been cached and not required the i_mutex, so it's more efficient to create a lookup_one_len() variant that only takes the i_mutex when necessary. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields Signed-off-by: Al Viro --- fs/namei.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfsd/nfs3xdr.c | 2 +- fs/nfsd/nfs4xdr.c | 8 +++--- fs/nfsd/vfs.c | 23 +++++++---------- include/linux/namei.h | 1 + 5 files changed, 86 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/fs/namei.c b/fs/namei.c index 45c702edce3c..1067f7a0287a 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2272,6 +2272,8 @@ EXPORT_SYMBOL(vfs_path_lookup); * * Note that this routine is purely a helper for filesystem usage and should * not be called by generic code. + * + * The caller must hold base->i_mutex. */ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) { @@ -2315,6 +2317,75 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) } EXPORT_SYMBOL(lookup_one_len); +/** + * lookup_one_len_unlocked - filesystem helper to lookup single pathname component + * @name: pathname component to lookup + * @base: base directory to lookup from + * @len: maximum length @len should be interpreted to + * + * Note that this routine is purely a helper for filesystem usage and should + * not be called by generic code. + * + * Unlike lookup_one_len, it should be called without the parent + * i_mutex held, and will take the i_mutex itself if necessary. + */ +struct dentry *lookup_one_len_unlocked(const char *name, + struct dentry *base, int len) +{ + struct qstr this; + unsigned int c; + int err; + struct dentry *ret; + + this.name = name; + this.len = len; + this.hash = full_name_hash(name, len); + if (!len) + return ERR_PTR(-EACCES); + + if (unlikely(name[0] == '.')) { + if (len < 2 || (len == 2 && name[1] == '.')) + return ERR_PTR(-EACCES); + } + + while (len--) { + c = *(const unsigned char *)name++; + if (c == '/' || c == '\0') + return ERR_PTR(-EACCES); + } + /* + * See if the low-level filesystem might want + * to use its own hash.. + */ + if (base->d_flags & DCACHE_OP_HASH) { + int err = base->d_op->d_hash(base, &this); + if (err < 0) + return ERR_PTR(err); + } + + err = inode_permission(base->d_inode, MAY_EXEC); + if (err) + return ERR_PTR(err); + + /* + * __d_lookup() is used to try to get a quick answer and avoid the + * mutex. A false-negative does no harm. + */ + ret = __d_lookup(base, &this); + if (ret && unlikely(ret->d_flags & DCACHE_OP_REVALIDATE)) { + dput(ret); + ret = NULL; + } + if (ret) + return ret; + + mutex_lock(&base->d_inode->i_mutex); + ret = __lookup_hash(&this, base, 0); + mutex_unlock(&base->d_inode->i_mutex); + return ret; +} +EXPORT_SYMBOL(lookup_one_len_unlocked); + int user_path_at_empty(int dfd, const char __user *name, unsigned flags, struct path *path, int *empty) { diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 00575d776d91..2246454dec76 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -823,7 +823,7 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, } else dchild = dget(dparent); } else - dchild = lookup_one_len(name, dparent, namlen); + dchild = lookup_one_len_unlocked(name, dparent, namlen); if (IS_ERR(dchild)) return rv; if (d_mountpoint(dchild)) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 51c9e9ca39a4..325521ce389a 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2838,14 +2838,14 @@ nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd, __be32 nfserr; int ignore_crossmnt = 0; - dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen); + dentry = lookup_one_len_unlocked(name, cd->rd_fhp->fh_dentry, namlen); if (IS_ERR(dentry)) return nfserrno(PTR_ERR(dentry)); if (d_really_is_negative(dentry)) { /* - * nfsd_buffered_readdir drops the i_mutex between - * readdir and calling this callback, leaving a window - * where this directory entry could have gone away. + * we're not holding the i_mutex here, so there's + * a window where this directory entry could have gone + * away. */ dput(dentry); return nfserr_noent; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 994d66fbb446..4212aaacbb55 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -217,10 +217,16 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, host_err = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out_nfserr; - /* - * check if we have crossed a mount point ... - */ if (nfsd_mountpoint(dentry, exp)) { + /* + * We don't need the i_mutex after all. It's + * still possible we could open this (regular + * files can be mountpoints too), but the + * i_mutex is just there to prevent renames of + * something that we might be about to delegate, + * and a mountpoint won't be renamed: + */ + fh_unlock(fhp); if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) { dput(dentry); goto out_nfserr; @@ -1809,7 +1815,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func, offset = *offsetp; while (1) { - struct inode *dir_inode = file_inode(file); unsigned int reclen; cdp->err = nfserr_eof; /* will be cleared on successful read */ @@ -1828,15 +1833,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func, if (!size) break; - /* - * Various filldir functions may end up calling back into - * lookup_one_len() and the file system's ->lookup() method. - * These expect i_mutex to be held, as it would within readdir. - */ - host_err = mutex_lock_killable(&dir_inode->i_mutex); - if (host_err) - break; - de = (struct buffered_dirent *)buf.dirent; while (size > 0) { offset = de->offset; @@ -1853,7 +1849,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func, size -= reclen; de = (struct buffered_dirent *)((char *)de + reclen); } - mutex_unlock(&dir_inode->i_mutex); if (size > 0) /* We bailed out early */ break; diff --git a/include/linux/namei.h b/include/linux/namei.h index d8c6334cd150..d0f25d81b46a 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -77,6 +77,7 @@ extern struct dentry *kern_path_locked(const char *, struct path *); extern int kern_path_mountpoint(int, const char *, struct path *, unsigned int); extern struct dentry *lookup_one_len(const char *, struct dentry *, int); +extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int); extern int follow_down_one(struct path *); extern int follow_down(struct path *); -- cgit v1.2.3