From 0040e606e35a0db80fc3fac04ccc7c7176a8e2b1 Mon Sep 17 00:00:00 2001 From: Christoph Jaeger Date: Sat, 12 Apr 2014 13:33:13 +0200 Subject: btrfs: fix use-after-free in mount_subvol() Pointer 'newargs' is used after the memory that it points to has already been freed. Picked up by Coverity - CID 1201425. Fixes: 0723a0473f ("btrfs: allow mounting btrfs subvolumes with different ro/rw options") Signed-off-by: Christoph Jaeger Signed-off-by: Chris Mason --- fs/btrfs/super.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 994c40955315..53bc3733d483 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1186,7 +1186,6 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags, return ERR_PTR(-ENOMEM); mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, newargs); - kfree(newargs); if (PTR_RET(mnt) == -EBUSY) { if (flags & MS_RDONLY) { @@ -1196,17 +1195,22 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags, int r; mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, device_name, newargs); - if (IS_ERR(mnt)) + if (IS_ERR(mnt)) { + kfree(newargs); return ERR_CAST(mnt); + } r = btrfs_remount(mnt->mnt_sb, &flags, NULL); if (r < 0) { /* FIXME: release vfsmount mnt ??*/ + kfree(newargs); return ERR_PTR(r); } } } + kfree(newargs); + if (IS_ERR(mnt)) return ERR_CAST(mnt); -- cgit v1.2.3 From f1c6bb2cb8b81013e8979806f8e15e3d53efb96d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 15 Apr 2014 06:17:49 -0400 Subject: locks: allow __break_lease to sleep even when break_time is 0 A fl->fl_break_time of 0 has a special meaning to the lease break code that basically means "never break the lease". knfsd uses this to ensure that leases don't disappear out from under it. Unfortunately, the code in __break_lease can end up passing this value to wait_event_interruptible as a timeout, which prevents it from going to sleep at all. This makes __break_lease to spin in a tight loop and causes soft lockups. Fix this by ensuring that we pass a minimum value of 1 as a timeout instead. Cc: Cc: J. Bruce Fields Reported-by: Terry Barnaby Signed-off-by: Jeff Layton --- fs/locks.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/locks.c b/fs/locks.c index 13fc7a6d380a..b380f5543614 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1391,11 +1391,10 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) restart: break_time = flock->fl_break_time; - if (break_time != 0) { + if (break_time != 0) break_time -= jiffies; - if (break_time == 0) - break_time++; - } + if (break_time == 0) + break_time++; locks_insert_block(flock, new_fl); spin_unlock(&inode->i_lock); error = wait_event_interruptible_timeout(new_fl->fl_wait, -- cgit v1.2.3 From 4991a628a789dc5954e98e79476d9808812292ec Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 15 Apr 2014 08:44:12 -0400 Subject: locks: allow __break_lease to sleep even when break_time is 0 A fl->fl_break_time of 0 has a special meaning to the lease break code that basically means "never break the lease". knfsd uses this to ensure that leases don't disappear out from under it. Unfortunately, the code in __break_lease can end up passing this value to wait_event_interruptible as a timeout, which prevents it from going to sleep at all. This causes __break_lease to spin in a tight loop and causes soft lockups. Fix this by ensuring that we pass a minimum value of 1 as a timeout instead. Cc: Cc: J. Bruce Fields Reported-by: Terry Barnaby Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/locks.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/locks.c b/fs/locks.c index 13fc7a6d380a..b380f5543614 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1391,11 +1391,10 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) restart: break_time = flock->fl_break_time; - if (break_time != 0) { + if (break_time != 0) break_time -= jiffies; - if (break_time == 0) - break_time++; - } + if (break_time == 0) + break_time++; locks_insert_block(flock, new_fl); spin_unlock(&inode->i_lock); error = wait_event_interruptible_timeout(new_fl->fl_wait, -- cgit v1.2.3 From 3758cf7e14b753838fe754ede3862af10b35fdac Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 15 Apr 2014 08:51:48 -0400 Subject: nfsd: set timeparms.to_maxval in setup_callback_client ...otherwise the logic in the timeout handling doesn't work correctly. Spotted-by: Trond Myklebust Cc: stable@vger.kernel.org Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 39c8ef875f91..2c73cae9899d 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -654,9 +654,11 @@ static struct rpc_clnt *create_backchannel_client(struct rpc_create_args *args) static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses) { + int maxtime = max_cb_time(clp->net); struct rpc_timeout timeparms = { - .to_initval = max_cb_time(clp->net), + .to_initval = maxtime, .to_retries = 0, + .to_maxval = maxtime, }; struct rpc_create_args args = { .net = clp->net, -- cgit v1.2.3 From fc208d026be0c7d60db9118583fc62f6ca97743d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 9 Apr 2014 11:07:01 -0400 Subject: Revert "nfsd4: fix nfs4err_resource in 4.1 case" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since we're still limiting attributes to a page, the result here is that a large getattr result will return NFS4ERR_REP_TOO_BIG/TOO_BIG_TO_CACHE instead of NFS4ERR_RESOURCE. Both error returns are wrong, and the real bug here is the arbitrary limit on getattr results, fixed by as-yet out-of-tree patches. But at a minimum we can make life easier for clients by sticking to one broken behavior in released kernels instead of two.... Trond says: one immediate consequence of this patch will be that NFSv4.1 clients will now report EIO instead of EREMOTEIO if they hit the problem. That may make debugging a little less obvious. Another consequence will be that if we ever do try to add client side handling of NFS4ERR_REP_TOO_BIG, then we now have to deal with the “handle existing buggy server” syndrome. Reported-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 2723c1badd01..18881f34737a 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3627,14 +3627,6 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) /* nfsd4_check_resp_size guarantees enough room for error status */ if (!op->status) op->status = nfsd4_check_resp_size(resp, 0); - if (op->status == nfserr_resource && nfsd4_has_session(&resp->cstate)) { - struct nfsd4_slot *slot = resp->cstate.slot; - - if (slot->sl_flags & NFSD4_SLOT_CACHETHIS) - op->status = nfserr_rep_too_big_to_cache; - else - op->status = nfserr_rep_too_big; - } if (so) { so->so_replay.rp_status = op->status; so->so_replay.rp_buflen = (char *)resp->p - (char *)(statp+1); -- cgit v1.2.3 From 0d3f7a2dd2f5cf9642982515e020c1aee2cf7af6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 22 Apr 2014 08:23:58 -0400 Subject: locks: rename file-private locks to "open file description locks" File-private locks have been merged into Linux for v3.15, and *now* people are commenting that the name and macro definitions for the new file-private locks suck. ...and I can't even disagree. The names and command macros do suck. We're going to have to live with these for a long time, so it's important that we be happy with the names before we're stuck with them. The consensus on the lists so far is that they should be rechristened as "open file description locks". The name isn't a big deal for the kernel, but the command macros are not visually distinct enough from the traditional POSIX lock macros. The glibc and documentation folks are recommending that we change them to look like F_OFD_{GETLK|SETLK|SETLKW}. That lessens the chance that a programmer will typo one of the commands wrong, and also makes it easier to spot this difference when reading code. This patch makes the following changes that I think are necessary before v3.15 ships: 1) rename the command macros to their new names. These end up in the uapi headers and so are part of the external-facing API. It turns out that glibc doesn't actually use the fcntl.h uapi header, but it's hard to be sure that something else won't. Changing it now is safest. 2) make the the /proc/locks output display these as type "OFDLCK" Cc: Michael Kerrisk Cc: Christoph Hellwig Cc: Carlos O'Donell Cc: Stefan Metzmacher Cc: Andy Lutomirski Cc: Frank Filz Cc: Theodore Ts'o Signed-off-by: Jeff Layton --- arch/arm/kernel/sys_oabi-compat.c | 6 +++--- fs/compat.c | 14 +++++++------- fs/fcntl.c | 12 ++++++------ fs/locks.c | 14 +++++++------- include/uapi/asm-generic/fcntl.h | 20 ++++++++++---------- security/selinux/hooks.c | 6 +++--- 6 files changed, 36 insertions(+), 36 deletions(-) (limited to 'fs') diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index 702bd329d9d0..e90a3148f385 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -203,9 +203,9 @@ asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd, int ret; switch (cmd) { - case F_GETLKP: - case F_SETLKP: - case F_SETLKPW: + case F_OFD_GETLK: + case F_OFD_SETLK: + case F_OFD_SETLKW: case F_GETLK64: case F_SETLK64: case F_SETLKW64: diff --git a/fs/compat.c b/fs/compat.c index ca926ad0430c..66d3d3c6b4b2 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -457,9 +457,9 @@ COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, case F_GETLK64: case F_SETLK64: case F_SETLKW64: - case F_GETLKP: - case F_SETLKP: - case F_SETLKPW: + case F_OFD_GETLK: + case F_OFD_SETLK: + case F_OFD_SETLKW: ret = get_compat_flock64(&f, compat_ptr(arg)); if (ret != 0) break; @@ -468,7 +468,7 @@ COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, conv_cmd = convert_fcntl_cmd(cmd); ret = sys_fcntl(fd, conv_cmd, (unsigned long)&f); set_fs(old_fs); - if ((conv_cmd == F_GETLK || conv_cmd == F_GETLKP) && ret == 0) { + if ((conv_cmd == F_GETLK || conv_cmd == F_OFD_GETLK) && ret == 0) { /* need to return lock information - see above for commentary */ if (f.l_start > COMPAT_LOFF_T_MAX) ret = -EOVERFLOW; @@ -493,9 +493,9 @@ COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, case F_GETLK64: case F_SETLK64: case F_SETLKW64: - case F_GETLKP: - case F_SETLKP: - case F_SETLKPW: + case F_OFD_GETLK: + case F_OFD_SETLK: + case F_OFD_SETLKW: return -EINVAL; } return compat_sys_fcntl64(fd, cmd, arg); diff --git a/fs/fcntl.c b/fs/fcntl.c index 9ead1596399a..72c82f69b01b 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -274,15 +274,15 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, break; #if BITS_PER_LONG != 32 /* 32-bit arches must use fcntl64() */ - case F_GETLKP: + case F_OFD_GETLK: #endif case F_GETLK: err = fcntl_getlk(filp, cmd, (struct flock __user *) arg); break; #if BITS_PER_LONG != 32 /* 32-bit arches must use fcntl64() */ - case F_SETLKP: - case F_SETLKPW: + case F_OFD_SETLK: + case F_OFD_SETLKW: #endif /* Fallthrough */ case F_SETLK: @@ -399,13 +399,13 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, switch (cmd) { case F_GETLK64: - case F_GETLKP: + case F_OFD_GETLK: err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg); break; case F_SETLK64: case F_SETLKW64: - case F_SETLKP: - case F_SETLKPW: + case F_OFD_SETLK: + case F_OFD_SETLKW: err = fcntl_setlk64(fd, f.file, cmd, (struct flock64 __user *) arg); break; diff --git a/fs/locks.c b/fs/locks.c index b380f5543614..e1023b504279 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1941,7 +1941,7 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l) if (error) goto out; - if (cmd == F_GETLKP) { + if (cmd == F_OFD_GETLK) { error = -EINVAL; if (flock.l_pid != 0) goto out; @@ -2076,7 +2076,7 @@ again: * FL_FILE_PVT flag and override the owner. */ switch (cmd) { - case F_SETLKP: + case F_OFD_SETLK: error = -EINVAL; if (flock.l_pid != 0) goto out; @@ -2085,7 +2085,7 @@ again: file_lock->fl_flags |= FL_FILE_PVT; file_lock->fl_owner = (fl_owner_t)filp; break; - case F_SETLKPW: + case F_OFD_SETLKW: error = -EINVAL; if (flock.l_pid != 0) goto out; @@ -2143,7 +2143,7 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l) if (error) goto out; - if (cmd == F_GETLKP) { + if (cmd == F_OFD_GETLK) { error = -EINVAL; if (flock.l_pid != 0) goto out; @@ -2211,7 +2211,7 @@ again: * FL_FILE_PVT flag and override the owner. */ switch (cmd) { - case F_SETLKP: + case F_OFD_SETLK: error = -EINVAL; if (flock.l_pid != 0) goto out; @@ -2220,7 +2220,7 @@ again: file_lock->fl_flags |= FL_FILE_PVT; file_lock->fl_owner = (fl_owner_t)filp; break; - case F_SETLKPW: + case F_OFD_SETLKW: error = -EINVAL; if (flock.l_pid != 0) goto out; @@ -2413,7 +2413,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, if (fl->fl_flags & FL_ACCESS) seq_printf(f, "ACCESS"); else if (IS_FILE_PVT(fl)) - seq_printf(f, "FLPVT "); + seq_printf(f, "OFDLCK"); else seq_printf(f, "POSIX "); diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h index a9b13f8b3595..7543b3e51331 100644 --- a/include/uapi/asm-generic/fcntl.h +++ b/include/uapi/asm-generic/fcntl.h @@ -133,20 +133,20 @@ #endif /* - * fd "private" POSIX locks. + * Open File Description Locks * - * Usually POSIX locks held by a process are released on *any* close and are + * Usually record locks held by a process are released on *any* close and are * not inherited across a fork(). * - * These cmd values will set locks that conflict with normal POSIX locks, but - * are "owned" by the opened file, not the process. This means that they are - * inherited across fork() like BSD (flock) locks, and they are only released - * automatically when the last reference to the the open file against which - * they were acquired is put. + * These cmd values will set locks that conflict with process-associated + * record locks, but are "owned" by the open file description, not the + * process. This means that they are inherited across fork() like BSD (flock) + * locks, and they are only released automatically when the last reference to + * the the open file against which they were acquired is put. */ -#define F_GETLKP 36 -#define F_SETLKP 37 -#define F_SETLKPW 38 +#define F_OFD_GETLK 36 +#define F_OFD_SETLK 37 +#define F_OFD_SETLKW 38 #define F_OWNER_TID 0 #define F_OWNER_PID 1 diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index b4beb77967b1..2c7341dbc5d6 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3317,9 +3317,9 @@ static int selinux_file_fcntl(struct file *file, unsigned int cmd, case F_GETLK: case F_SETLK: case F_SETLKW: - case F_GETLKP: - case F_SETLKP: - case F_SETLKPW: + case F_OFD_GETLK: + case F_OFD_SETLK: + case F_OFD_SETLKW: #if BITS_PER_LONG == 32 case F_GETLK64: case F_SETLK64: -- cgit v1.2.3 From cff2fce58b2b0f59089e7edcdc38803d65057b9f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 22 Apr 2014 08:24:32 -0400 Subject: locks: rename FL_FILE_PVT and IS_FILE_PVT to use "*_OFDLCK" instead File-private locks have been re-christened as "open file description" locks. Finish the symbol name cleanup in the internal implementation. Signed-off-by: Jeff Layton --- fs/locks.c | 34 +++++++++++++++++----------------- include/linux/fs.h | 2 +- 2 files changed, 18 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/locks.c b/fs/locks.c index e1023b504279..e663aeac579e 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -135,7 +135,7 @@ #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) #define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG)) -#define IS_FILE_PVT(fl) (fl->fl_flags & FL_FILE_PVT) +#define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK) static bool lease_breaking(struct file_lock *fl) { @@ -564,7 +564,7 @@ static void __locks_insert_block(struct file_lock *blocker, BUG_ON(!list_empty(&waiter->fl_block)); waiter->fl_next = blocker; list_add_tail(&waiter->fl_block, &blocker->fl_block); - if (IS_POSIX(blocker) && !IS_FILE_PVT(blocker)) + if (IS_POSIX(blocker) && !IS_OFDLCK(blocker)) locks_insert_global_blocked(waiter); } @@ -759,12 +759,12 @@ EXPORT_SYMBOL(posix_test_lock); * of tasks (such as posix threads) sharing the same open file table. * To handle those cases, we just bail out after a few iterations. * - * For FL_FILE_PVT locks, the owner is the filp, not the files_struct. + * For FL_OFDLCK locks, the owner is the filp, not the files_struct. * Because the owner is not even nominally tied to a thread of * execution, the deadlock detection below can't reasonably work well. Just * skip it for those. * - * In principle, we could do a more limited deadlock detection on FL_FILE_PVT + * In principle, we could do a more limited deadlock detection on FL_OFDLCK * locks that just checks for the case where two tasks are attempting to * upgrade from read to write locks on the same inode. */ @@ -791,9 +791,9 @@ static int posix_locks_deadlock(struct file_lock *caller_fl, /* * This deadlock detector can't reasonably detect deadlocks with - * FL_FILE_PVT locks, since they aren't owned by a process, per-se. + * FL_OFDLCK locks, since they aren't owned by a process, per-se. */ - if (IS_FILE_PVT(caller_fl)) + if (IS_OFDLCK(caller_fl)) return 0; while ((block_fl = what_owner_is_waiting_for(block_fl))) { @@ -1890,7 +1890,7 @@ EXPORT_SYMBOL_GPL(vfs_test_lock); static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) { - flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid; + flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid; #if BITS_PER_LONG == 32 /* * Make sure we can represent the posix lock via @@ -1912,7 +1912,7 @@ static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) #if BITS_PER_LONG == 32 static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl) { - flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid; + flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid; flock->l_start = fl->fl_start; flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : fl->fl_end - fl->fl_start + 1; @@ -1947,7 +1947,7 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l) goto out; cmd = F_GETLK; - file_lock.fl_flags |= FL_FILE_PVT; + file_lock.fl_flags |= FL_OFDLCK; file_lock.fl_owner = (fl_owner_t)filp; } @@ -2073,7 +2073,7 @@ again: /* * If the cmd is requesting file-private locks, then set the - * FL_FILE_PVT flag and override the owner. + * FL_OFDLCK flag and override the owner. */ switch (cmd) { case F_OFD_SETLK: @@ -2082,7 +2082,7 @@ again: goto out; cmd = F_SETLK; - file_lock->fl_flags |= FL_FILE_PVT; + file_lock->fl_flags |= FL_OFDLCK; file_lock->fl_owner = (fl_owner_t)filp; break; case F_OFD_SETLKW: @@ -2091,7 +2091,7 @@ again: goto out; cmd = F_SETLKW; - file_lock->fl_flags |= FL_FILE_PVT; + file_lock->fl_flags |= FL_OFDLCK; file_lock->fl_owner = (fl_owner_t)filp; /* Fallthrough */ case F_SETLKW: @@ -2149,7 +2149,7 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l) goto out; cmd = F_GETLK64; - file_lock.fl_flags |= FL_FILE_PVT; + file_lock.fl_flags |= FL_OFDLCK; file_lock.fl_owner = (fl_owner_t)filp; } @@ -2208,7 +2208,7 @@ again: /* * If the cmd is requesting file-private locks, then set the - * FL_FILE_PVT flag and override the owner. + * FL_OFDLCK flag and override the owner. */ switch (cmd) { case F_OFD_SETLK: @@ -2217,7 +2217,7 @@ again: goto out; cmd = F_SETLK64; - file_lock->fl_flags |= FL_FILE_PVT; + file_lock->fl_flags |= FL_OFDLCK; file_lock->fl_owner = (fl_owner_t)filp; break; case F_OFD_SETLKW: @@ -2226,7 +2226,7 @@ again: goto out; cmd = F_SETLKW64; - file_lock->fl_flags |= FL_FILE_PVT; + file_lock->fl_flags |= FL_OFDLCK; file_lock->fl_owner = (fl_owner_t)filp; /* Fallthrough */ case F_SETLKW64: @@ -2412,7 +2412,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, if (IS_POSIX(fl)) { if (fl->fl_flags & FL_ACCESS) seq_printf(f, "ACCESS"); - else if (IS_FILE_PVT(fl)) + else if (IS_OFDLCK(fl)) seq_printf(f, "OFDLCK"); else seq_printf(f, "POSIX "); diff --git a/include/linux/fs.h b/include/linux/fs.h index 7a9c5bca2b76..878031227c57 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -815,7 +815,7 @@ static inline struct file *get_file(struct file *f) #define FL_SLEEP 128 /* A blocking lock */ #define FL_DOWNGRADE_PENDING 256 /* Lease is being downgraded */ #define FL_UNLOCK_PENDING 512 /* Lease is being broken */ -#define FL_FILE_PVT 1024 /* lock is private to the file */ +#define FL_OFDLCK 1024 /* lock is "owned" by struct file */ /* * Special return value from posix_lock_file() and vfs_lock_file() for -- cgit v1.2.3 From c5f7d0bb29df2e1848a236e58e201daf5b4e0f21 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 15 Apr 2014 10:41:00 +0800 Subject: btrfs: Change the hole range to a more accurate value. Commit 3ac0d7b96a268a98bd474cab8bce3a9f125aaccf fixed the btrfs expanding write problem but the hole punched is sometimes too large for some iovec, which has unmapped data ranges. This patch will change to hole range to a more accurate value using the counts checked by the write check routines. Reported-by: Al Viro Signed-off-by: Qu Wenruo Signed-off-by: Chris Mason --- fs/btrfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 23f6a9d9f104..e7e78fa9085e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1783,7 +1783,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, start_pos = round_down(pos, root->sectorsize); if (start_pos > i_size_read(inode)) { /* Expand hole size to cover write data, preventing empty gap */ - end_pos = round_up(pos + iov->iov_len, root->sectorsize); + end_pos = round_up(pos + count, root->sectorsize); err = btrfs_cont_expand(inode, i_size_read(inode), end_pos); if (err) { mutex_unlock(&inode->i_mutex); -- cgit v1.2.3 From 3f9e3df8da3c51649c15db249978a10f7374236a Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 15 Apr 2014 18:50:17 +0200 Subject: btrfs: replace error code from btrfs_drop_extents There's a case which clone does not handle and used to BUG_ON instead, (testcase xfstests/btrfs/035), now returns EINVAL. This error code is confusing to the ioctl caller, as it normally signifies errorneous arguments. Change it to ENOPNOTSUPP which allows a fall back to copy instead of clone. This does not affect the common reflink operation. Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/file.c | 6 +++--- fs/btrfs/ioctl.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e7e78fa9085e..1eee3f79d75f 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -805,7 +805,7 @@ next_slot: if (start > key.offset && end < extent_end) { BUG_ON(del_nr > 0); if (extent_type == BTRFS_FILE_EXTENT_INLINE) { - ret = -EINVAL; + ret = -EOPNOTSUPP; break; } @@ -851,7 +851,7 @@ next_slot: */ if (start <= key.offset && end < extent_end) { if (extent_type == BTRFS_FILE_EXTENT_INLINE) { - ret = -EINVAL; + ret = -EOPNOTSUPP; break; } @@ -877,7 +877,7 @@ next_slot: if (start > key.offset && end >= extent_end) { BUG_ON(del_nr > 0); if (extent_type == BTRFS_FILE_EXTENT_INLINE) { - ret = -EINVAL; + ret = -EOPNOTSUPP; break; } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f2e8fcc279a3..7b001abc73c7 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3088,7 +3088,7 @@ process_slot: new_key.offset + datal, 1); if (ret) { - if (ret != -EINVAL) + if (ret != -EOPNOTSUPP) btrfs_abort_transaction(trans, root, ret); btrfs_end_transaction(trans, root); @@ -3163,7 +3163,7 @@ process_slot: new_key.offset + datal, 1); if (ret) { - if (ret != -EINVAL) + if (ret != -EOPNOTSUPP) btrfs_abort_transaction(trans, root, ret); btrfs_end_transaction(trans, root); -- cgit v1.2.3 From 9d89ce658718d9d21465666127a15ca2c82c4ea7 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 23 Apr 2014 19:33:33 +0800 Subject: Btrfs: move btrfs_{set,clear}_and_info() to ctree.h Signed-off-by: Wang Shilong Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 14 ++++++++++++++ fs/btrfs/super.c | 14 -------------- 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ad1a5943a923..e7c9e1c540f4 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2058,6 +2058,20 @@ struct btrfs_ioctl_defrag_range_args { #define btrfs_raw_test_opt(o, opt) ((o) & BTRFS_MOUNT_##opt) #define btrfs_test_opt(root, opt) ((root)->fs_info->mount_opt & \ BTRFS_MOUNT_##opt) +#define btrfs_set_and_info(root, opt, fmt, args...) \ +{ \ + if (!btrfs_test_opt(root, opt)) \ + btrfs_info(root->fs_info, fmt, ##args); \ + btrfs_set_opt(root->fs_info->mount_opt, opt); \ +} + +#define btrfs_clear_and_info(root, opt, fmt, args...) \ +{ \ + if (btrfs_test_opt(root, opt)) \ + btrfs_info(root->fs_info, fmt, ##args); \ + btrfs_clear_opt(root->fs_info->mount_opt, opt); \ +} + /* * Inode flags */ diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 53bc3733d483..363404b9713f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -385,20 +385,6 @@ static match_table_t tokens = { {Opt_err, NULL}, }; -#define btrfs_set_and_info(root, opt, fmt, args...) \ -{ \ - if (!btrfs_test_opt(root, opt)) \ - btrfs_info(root->fs_info, fmt, ##args); \ - btrfs_set_opt(root->fs_info->mount_opt, opt); \ -} - -#define btrfs_clear_and_info(root, opt, fmt, args...) \ -{ \ - if (btrfs_test_opt(root, opt)) \ - btrfs_info(root->fs_info, fmt, ##args); \ - btrfs_clear_opt(root->fs_info->mount_opt, opt); \ -} - /* * Regular mount options parser. Everything that is needed only when * reading in a new superblock is parsed here. -- cgit v1.2.3 From e60efa84252c059bde5f65fccc6af94478d39e3b Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 23 Apr 2014 19:33:34 +0800 Subject: Btrfs: avoid triggering bug_on() when we fail to start inode caching task When running stress test(including snapshots,balance,fstress), we trigger the following BUG_ON() which is because we fail to start inode caching task. [ 181.131945] kernel BUG at fs/btrfs/inode-map.c:179! [ 181.137963] invalid opcode: 0000 [#1] SMP [ 181.217096] CPU: 11 PID: 2532 Comm: btrfs Not tainted 3.14.0 #1 [ 181.240521] task: ffff88013b621b30 ti: ffff8800b6ada000 task.ti: ffff8800b6ada000 [ 181.367506] Call Trace: [ 181.371107] [] btrfs_return_ino+0x9e/0x110 [btrfs] [ 181.379191] [] btrfs_evict_inode+0x46b/0x4c0 [btrfs] [ 181.387464] [] ? autoremove_wake_function+0x40/0x40 [ 181.395642] [] evict+0x9e/0x190 [ 181.401882] [] iput+0xf3/0x180 [ 181.408025] [] btrfs_orphan_cleanup+0x1ee/0x430 [btrfs] [ 181.416614] [] btrfs_mksubvol.isra.29+0x3bd/0x450 [btrfs] [ 181.425399] [] btrfs_ioctl_snap_create_transid+0x186/0x190 [btrfs] [ 181.435059] [] btrfs_ioctl_snap_create_v2+0xeb/0x130 [btrfs] [ 181.444148] [] btrfs_ioctl+0xf76/0x2b90 [btrfs] [ 181.451971] [] ? handle_mm_fault+0x475/0xe80 [ 181.459509] [] ? __do_page_fault+0x1ec/0x520 [ 181.467046] [] ? do_mmap_pgoff+0x2f5/0x3c0 [ 181.474393] [] do_vfs_ioctl+0x2d8/0x4b0 [ 181.481450] [] SyS_ioctl+0x81/0xa0 [ 181.488021] [] system_call_fastpath+0x16/0x1b We should avoid triggering BUG_ON() here, instead, we output warning messages and clear inode_cache option. Signed-off-by: Wang Shilong Signed-off-by: Chris Mason --- fs/btrfs/inode-map.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index cc8ca193d830..8ad529e3e67e 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -176,7 +176,11 @@ static void start_caching(struct btrfs_root *root) tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n", root->root_key.objectid); - BUG_ON(IS_ERR(tsk)); /* -ENOMEM */ + if (IS_ERR(tsk)) { + btrfs_warn(root->fs_info, "failed to start inode caching task"); + btrfs_clear_and_info(root, CHANGE_INODE_CACHE, + "disabling inode map caching"); + } } int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid) -- cgit v1.2.3 From 28c16cbbc32781224309e50cc99c684f2498bc59 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 23 Apr 2014 19:33:35 +0800 Subject: Btrfs: fix possible memory leaks in open_ctree() Fix possible memory leaks in the following error handling paths: read_tree_block() btrfs_recover_log_trees btrfs_commit_super() btrfs_find_orphan_roots() btrfs_cleanup_fs_roots() Signed-off-by: Wang Shilong Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6d1ac7d46f81..0e4fb4a5c7f0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2864,7 +2864,7 @@ retry_root_backup: printk(KERN_ERR "BTRFS: failed to read log tree\n"); free_extent_buffer(log_tree_root->node); kfree(log_tree_root); - goto fail_trans_kthread; + goto fail_qgroup; } /* returns with log_tree_root freed on success */ ret = btrfs_recover_log_trees(log_tree_root); @@ -2873,24 +2873,24 @@ retry_root_backup: "Failed to recover log tree"); free_extent_buffer(log_tree_root->node); kfree(log_tree_root); - goto fail_trans_kthread; + goto fail_qgroup; } if (sb->s_flags & MS_RDONLY) { ret = btrfs_commit_super(tree_root); if (ret) - goto fail_trans_kthread; + goto fail_qgroup; } } ret = btrfs_find_orphan_roots(tree_root); if (ret) - goto fail_trans_kthread; + goto fail_qgroup; if (!(sb->s_flags & MS_RDONLY)) { ret = btrfs_cleanup_fs_roots(fs_info); if (ret) - goto fail_trans_kthread; + goto fail_qgroup; ret = btrfs_recover_relocation(tree_root); if (ret < 0) { -- cgit v1.2.3 From 1c70d8fb4dfa95bee491816b2a6767b5ca1080e7 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 23 Apr 2014 19:33:36 +0800 Subject: Btrfs: fix inode caching vs tree log Currently, with inode cache enabled, we will reuse its inode id immediately after unlinking file, we may hit something like following: |->iput inode |->return inode id into inode cache |->create dir,fsync |->power off An easy way to reproduce this problem is: mkfs.btrfs -f /dev/sdb mount /dev/sdb /mnt -o inode_cache,commit=100 dd if=/dev/zero of=/mnt/data bs=1M count=10 oflag=sync inode_id=`ls -i /mnt/data | awk '{print $1}'` rm -f /mnt/data i=1 while [ 1 ] do mkdir /mnt/dir_$i test1=`stat /mnt/dir_$i | grep Inode: | awk '{print $4}'` if [ $test1 -eq $inode_id ] then dd if=/dev/zero of=/mnt/dir_$i/data bs=1M count=1 oflag=sync echo b > /proc/sysrq-trigger fi sleep 1 i=$(($i+1)) done mount /dev/sdb /mnt umount /dev/sdb btrfs check /dev/sdb We fix this problem by adding unlinked inode's id into pinned tree, and we can not reuse them until committing transaction. Cc: stable@vger.kernel.org Signed-off-by: Miao Xie Signed-off-by: Wang Shilong Signed-off-by: Chris Mason --- fs/btrfs/inode-map.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 8ad529e3e67e..86935f5ae291 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -209,24 +209,14 @@ again: void btrfs_return_ino(struct btrfs_root *root, u64 objectid) { - struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; struct btrfs_free_space_ctl *pinned = root->free_ino_pinned; if (!btrfs_test_opt(root, INODE_MAP_CACHE)) return; - again: if (root->cached == BTRFS_CACHE_FINISHED) { - __btrfs_add_free_space(ctl, objectid, 1); + __btrfs_add_free_space(pinned, objectid, 1); } else { - /* - * If we are in the process of caching free ino chunks, - * to avoid adding the same inode number to the free_ino - * tree twice due to cross transaction, we'll leave it - * in the pinned tree until a transaction is committed - * or the caching work is done. - */ - down_write(&root->fs_info->commit_root_sem); spin_lock(&root->cache_lock); if (root->cached == BTRFS_CACHE_FINISHED) { @@ -238,11 +228,7 @@ again: start_caching(root); - if (objectid <= root->cache_progress || - objectid >= root->highest_objectid) - __btrfs_add_free_space(ctl, objectid, 1); - else - __btrfs_add_free_space(pinned, objectid, 1); + __btrfs_add_free_space(pinned, objectid, 1); up_write(&root->fs_info->commit_root_sem); } -- cgit v1.2.3 From 9ce49a0b4ff7f13961d8d106ffae959823d2e758 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 24 Apr 2014 15:15:28 +0100 Subject: Btrfs: use correct key when repeating search for extent item If skinny metadata is enabled and our first tree search fails to find a skinny extent item, we may repeat a tree search for a "fat" extent item (if the previous item in the leaf is not the "fat" extent we're looking for). However we were not setting the new key's objectid to the right value, as we previously used the same key variable to peek at the previous item in the leaf, which has a different objectid. So just set the right objectid to avoid modifying/deleting a wrong item if we repeat the tree search. Signed-off-by: Filipe David Borba Manana Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1306487c82cf..678cb352902c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1542,6 +1542,7 @@ again: ret = 0; } if (ret) { + key.objectid = bytenr; key.type = BTRFS_EXTENT_ITEM_KEY; key.offset = num_bytes; btrfs_release_path(path); @@ -5719,6 +5720,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, if (ret > 0 && skinny_metadata) { skinny_metadata = false; + key.objectid = bytenr; key.type = BTRFS_EXTENT_ITEM_KEY; key.offset = num_bytes; btrfs_release_path(path); -- cgit v1.2.3 From f8213bdc89719bad895a02c62c4a85066ff76720 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 24 Apr 2014 15:15:29 +0100 Subject: Btrfs: correctly set profile flags on seqlock retry If we had to retry on the profiles seqlock (due to a concurrent write), we would set bits on the input flags that corresponded both to the current profile and to previous values of the profile. Signed-off-by: Filipe David Borba Manana Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 678cb352902c..5590af92094b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3543,11 +3543,13 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) return extended_to_chunk(flags | tmp); } -static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) +static u64 get_alloc_profile(struct btrfs_root *root, u64 orig_flags) { unsigned seq; + u64 flags; do { + flags = orig_flags; seq = read_seqbegin(&root->fs_info->profiles_lock); if (flags & BTRFS_BLOCK_GROUP_DATA) -- cgit v1.2.3 From c1befb885939cdaaf420c10bbe9ff57aa00446ea Mon Sep 17 00:00:00 2001 From: Jianyu Zhan Date: Thu, 17 Apr 2014 17:52:10 +0800 Subject: kernfs: fix a subdir count leak Currently kernfs_link_sibling() increates parent->dir.subdirs before adding the node into parent's chidren rb tree. Because it is possible that kernfs_link_sibling() couldn't find a suitable slot and bail out, this leads to a mismatch between elevated subdir count with actual children node numbers. This patches fix this problem, by moving the subdir accouting after the actual addtion happening. Signed-off-by: Jianyu Zhan Acked-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 78f3403300af..ac127cd008bf 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -232,9 +232,6 @@ static int kernfs_link_sibling(struct kernfs_node *kn) struct rb_node **node = &kn->parent->dir.children.rb_node; struct rb_node *parent = NULL; - if (kernfs_type(kn) == KERNFS_DIR) - kn->parent->dir.subdirs++; - while (*node) { struct kernfs_node *pos; int result; @@ -249,9 +246,15 @@ static int kernfs_link_sibling(struct kernfs_node *kn) else return -EEXIST; } + /* add new node and rebalance the tree */ rb_link_node(&kn->rb, parent, node); rb_insert_color(&kn->rb, &kn->parent->dir.children); + + /* successfully added, account subdir number */ + if (kernfs_type(kn) == KERNFS_DIR) + kn->parent->dir.subdirs++; + return 0; } -- cgit v1.2.3 From b44b2140265ddfde03acbe809336111d31adb0d1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 20 Apr 2014 08:29:21 -0400 Subject: kernfs: add back missing error check in kernfs_fop_mmap() While updating how mmap enabled kernfs files are handled by lockdep, 9b2db6e18945 ("sysfs: bail early from kernfs_file_mmap() to avoid spurious lockdep warning") inadvertently dropped error return check from kernfs_file_mmap(). The intention was just dropping "if (ops->mmap)" check as the control won't reach the point if the mmap callback isn't implemented, but I mistakenly removed the error return check together with it. This led to Xorg crash on i810 which was reported and bisected to the commit and then to the specific change by Tobias. Signed-off-by: Tejun Heo Reported-and-bisected-by: Tobias Powalowski Tested-by: Tobias Powalowski References: http://lkml.kernel.org/g/533D01BD.1010200@googlemail.com Cc: stable # 3.14 Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/file.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 8034706a7af8..e01ea4a14a01 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -484,6 +484,8 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma) ops = kernfs_ops(of->kn); rc = ops->mmap(of, vma); + if (rc) + goto out_put; /* * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() -- cgit v1.2.3 From cfd4a535b68faf651b238586011f5bae128391c4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 26 Apr 2014 05:02:03 -0700 Subject: Btrfs: limit the path size in send to PATH_MAX fs_path_ensure_buf is used to make sure our path buffers for send are big enough for the path names as we construct them. The buffer size is limited to 32K by the length field in the struct. But bugs in the path construction can end up trying to build a huge buffer, and we'll do invalid memmmoves when the buffer length field wraps. This patch is step one, preventing the overflows. Signed-off-by: Chris Mason --- fs/btrfs/send.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 1ac3ca98c429..eb6537a08c1b 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -349,6 +349,11 @@ static int fs_path_ensure_buf(struct fs_path *p, int len) if (p->buf_len >= len) return 0; + if (len > PATH_MAX) { + WARN_ON(1); + return -ENOMEM; + } + path_len = p->end - p->start; old_buf_len = p->buf_len; -- cgit v1.2.3