summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJeff Layton <jlayton@redhat.com>2014-02-03 12:13:10 -0500
committerJeff Layton <jlayton@redhat.com>2014-03-31 08:24:43 -0400
commit5d50ffd7c31dab47c6b828841ca1ec70a1b40169 (patch)
tree59e96edd1c263f82012387fe7b6f290db4fb8416 /fs
parent57b65325fe34ec4c917bc4e555144b4a94d9e1f7 (diff)
downloadlinux-5d50ffd7c31dab47c6b828841ca1ec70a1b40169.tar.bz2
locks: add new fcntl cmd values for handling file private locks
Due to some unfortunate history, POSIX locks have very strange and unhelpful semantics. The thing that usually catches people by surprise is that they are dropped whenever the process closes any file descriptor associated with the inode. This is extremely problematic for people developing file servers that need to implement byte-range locks. Developers often need a "lock management" facility to ensure that file descriptors are not closed until all of the locks associated with the inode are finished. Additionally, "classic" POSIX locks are owned by the process. Locks taken between threads within the same process won't conflict with one another, which renders them useless for synchronization between threads. This patchset adds a new type of lock that attempts to address these issues. These locks conflict with classic POSIX read/write locks, but have semantics that are more like BSD locks with respect to inheritance and behavior on close. This is implemented primarily by changing how fl_owner field is set for these locks. Instead of having them owned by the files_struct of the process, they are instead owned by the filp on which they were acquired. Thus, they are inherited across fork() and are only released when the last reference to a filp is put. These new semantics prevent them from being merged with classic POSIX locks, even if they are acquired by the same process. These locks will also conflict with classic POSIX locks even if they are acquired by the same process or on the same file descriptor. The new locks are managed using a new set of cmd values to the fcntl() syscall. The initial implementation of this converts these values to "classic" cmd values at a fairly high level, and the details are not exposed to the underlying filesystem. We may eventually want to push this handing out to the lower filesystem code but for now I don't see any need for it. Also, note that with this implementation the new cmd values are only available via fcntl64() on 32-bit arches. There's little need to add support for legacy apps on a new interface like this. Signed-off-by: Jeff Layton <jlayton@redhat.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/compat.c35
-rw-r--r--fs/fcntl.c35
-rw-r--r--fs/locks.c54
3 files changed, 104 insertions, 20 deletions
diff --git a/fs/compat.c b/fs/compat.c
index 6af20de2c1a3..f340dcf11f68 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -399,12 +399,28 @@ static int put_compat_flock64(struct flock *kfl, struct compat_flock64 __user *u
}
#endif
+static unsigned int
+convert_fcntl_cmd(unsigned int cmd)
+{
+ switch (cmd) {
+ case F_GETLK64:
+ return F_GETLK;
+ case F_SETLK64:
+ return F_SETLK;
+ case F_SETLKW64:
+ return F_SETLKW;
+ }
+
+ return cmd;
+}
+
asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
unsigned long arg)
{
mm_segment_t old_fs;
struct flock f;
long ret;
+ unsigned int conv_cmd;
switch (cmd) {
case F_GETLK:
@@ -441,16 +457,18 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
case F_GETLK64:
case F_SETLK64:
case F_SETLKW64:
+ case F_GETLKP:
+ case F_SETLKP:
+ case F_SETLKPW:
ret = get_compat_flock64(&f, compat_ptr(arg));
if (ret != 0)
break;
old_fs = get_fs();
set_fs(KERNEL_DS);
- ret = sys_fcntl(fd, (cmd == F_GETLK64) ? F_GETLK :
- ((cmd == F_SETLK64) ? F_SETLK : F_SETLKW),
- (unsigned long)&f);
+ conv_cmd = convert_fcntl_cmd(cmd);
+ ret = sys_fcntl(fd, conv_cmd, (unsigned long)&f);
set_fs(old_fs);
- if (cmd == F_GETLK64 && ret == 0) {
+ if ((conv_cmd == F_GETLK || conv_cmd == F_GETLKP) && ret == 0) {
/* need to return lock information - see above for commentary */
if (f.l_start > COMPAT_LOFF_T_MAX)
ret = -EOVERFLOW;
@@ -471,8 +489,15 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd,
unsigned long arg)
{
- if ((cmd == F_GETLK64) || (cmd == F_SETLK64) || (cmd == F_SETLKW64))
+ switch (cmd) {
+ case F_GETLK64:
+ case F_SETLK64:
+ case F_SETLKW64:
+ case F_GETLKP:
+ case F_SETLKP:
+ case F_SETLKPW:
return -EINVAL;
+ }
return compat_sys_fcntl64(fd, cmd, arg);
}
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 7ef7f2d2b608..9ead1596399a 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -272,9 +272,19 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
case F_SETFL:
err = setfl(fd, filp, arg);
break;
+#if BITS_PER_LONG != 32
+ /* 32-bit arches must use fcntl64() */
+ case F_GETLKP:
+#endif
case F_GETLK:
err = fcntl_getlk(filp, cmd, (struct flock __user *) arg);
break;
+#if BITS_PER_LONG != 32
+ /* 32-bit arches must use fcntl64() */
+ case F_SETLKP:
+ case F_SETLKPW:
+#endif
+ /* Fallthrough */
case F_SETLK:
case F_SETLKW:
err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg);
@@ -388,17 +398,20 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
goto out1;
switch (cmd) {
- case F_GETLK64:
- err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg);
- break;
- case F_SETLK64:
- case F_SETLKW64:
- err = fcntl_setlk64(fd, f.file, cmd,
- (struct flock64 __user *) arg);
- break;
- default:
- err = do_fcntl(fd, cmd, arg, f.file);
- break;
+ case F_GETLK64:
+ case F_GETLKP:
+ err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg);
+ break;
+ case F_SETLK64:
+ case F_SETLKW64:
+ case F_SETLKP:
+ case F_SETLKPW:
+ err = fcntl_setlk64(fd, f.file, cmd,
+ (struct flock64 __user *) arg);
+ break;
+ default:
+ err = do_fcntl(fd, cmd, arg, f.file);
+ break;
}
out1:
fdput(f);
diff --git a/fs/locks.c b/fs/locks.c
index ed9fb769b88e..3b54b98236ee 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1930,6 +1930,12 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)
if (error)
goto out;
+ if (cmd == F_GETLKP) {
+ cmd = F_GETLK;
+ file_lock.fl_flags |= FL_FILE_PVT;
+ file_lock.fl_owner = (fl_owner_t)filp;
+ }
+
error = vfs_test_lock(filp, &file_lock);
if (error)
goto out;
@@ -2049,10 +2055,26 @@ again:
error = flock_to_posix_lock(filp, file_lock, &flock);
if (error)
goto out;
- if (cmd == F_SETLKW) {
+
+ /*
+ * If the cmd is requesting file-private locks, then set the
+ * FL_FILE_PVT flag and override the owner.
+ */
+ switch (cmd) {
+ case F_SETLKP:
+ cmd = F_SETLK;
+ file_lock->fl_flags |= FL_FILE_PVT;
+ file_lock->fl_owner = (fl_owner_t)filp;
+ break;
+ case F_SETLKPW:
+ cmd = F_SETLKW;
+ file_lock->fl_flags |= FL_FILE_PVT;
+ file_lock->fl_owner = (fl_owner_t)filp;
+ /* Fallthrough */
+ case F_SETLKW:
file_lock->fl_flags |= FL_SLEEP;
}
-
+
error = do_lock_file_wait(filp, cmd, file_lock);
/*
@@ -2098,6 +2120,12 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
if (error)
goto out;
+ if (cmd == F_GETLKP) {
+ cmd = F_GETLK64;
+ file_lock.fl_flags |= FL_FILE_PVT;
+ file_lock.fl_owner = (fl_owner_t)filp;
+ }
+
error = vfs_test_lock(filp, &file_lock);
if (error)
goto out;
@@ -2150,10 +2178,26 @@ again:
error = flock64_to_posix_lock(filp, file_lock, &flock);
if (error)
goto out;
- if (cmd == F_SETLKW64) {
+
+ /*
+ * If the cmd is requesting file-private locks, then set the
+ * FL_FILE_PVT flag and override the owner.
+ */
+ switch (cmd) {
+ case F_SETLKP:
+ cmd = F_SETLK64;
+ file_lock->fl_flags |= FL_FILE_PVT;
+ file_lock->fl_owner = (fl_owner_t)filp;
+ break;
+ case F_SETLKPW:
+ cmd = F_SETLKW64;
+ file_lock->fl_flags |= FL_FILE_PVT;
+ file_lock->fl_owner = (fl_owner_t)filp;
+ /* Fallthrough */
+ case F_SETLKW64:
file_lock->fl_flags |= FL_SLEEP;
}
-
+
error = do_lock_file_wait(filp, cmd, file_lock);
/*
@@ -2221,6 +2265,8 @@ void locks_remove_file(struct file *filp)
if (!inode->i_flock)
return;
+ locks_remove_posix(filp, (fl_owner_t)filp);
+
if (filp->f_op->flock) {
struct file_lock fl = {
.fl_pid = current->tgid,