From 5eebde23223aeb0ad2d9e3be6590ff8bbfab0fc2 Mon Sep 17 00:00:00 2001 From: Suresh Jayaraman Date: Thu, 23 Sep 2010 08:55:58 -0400 Subject: nfs: introduce mount option '-olocal_lock' to make locks local NFS clients since 2.6.12 support flock locks by emulating fcntl byte-range locks. Due to this, some windows applications which seem to use both flock (share mode lock mapped as flock by Samba) and fcntl locks sequentially on the same file, can't lock as they falsely assume the file is already locked. The problem was reported on a setup with windows clients accessing excel files on a Samba exported share which is originally a NFS mount from a NetApp filer. Older NFS clients (< 2.6.12) did not see this problem as flock locks were considered local. To support legacy flock behavior, this patch adds a mount option "-olocal_lock=" which can take the following values: 'none' - Neither flock locks nor POSIX locks are local 'flock' - flock locks are local 'posix' - fcntl/POSIX locks are local 'all' - Both flock locks and POSIX locks are local Testing: - This patch was tested by using -olocal_lock option with different values and the NLM calls were noted from the network packet captured. 'none' - NLM calls were seen during both flock() and fcntl(), flock lock was granted, fcntl was denied 'flock' - no NLM calls for flock(), NLM call was seen for fcntl(), granted 'posix' - NLM call was seen for flock() - granted, no NLM call for fcntl() 'all' - no NLM calls were seen during both flock() and fcntl() - No bugs were seen during NFSv4 locking/unlocking in general and NFSv4 reboot recovery. Cc: Neil Brown Signed-off-by: Suresh Jayaraman Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 45 ++++++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 13 deletions(-) (limited to 'fs/nfs/file.c') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index eb51bd6201da..59cbe1ba0511 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -684,7 +684,8 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, return ret; } -static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) +static int +do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) { struct inode *inode = filp->f_mapping->host; int status = 0; @@ -699,7 +700,7 @@ static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) if (nfs_have_delegation(inode, FMODE_READ)) goto out_noconflict; - if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) + if (is_local) goto out_noconflict; status = NFS_PROTO(inode)->lock(filp, cmd, fl); @@ -730,7 +731,8 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl) return res; } -static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) +static int +do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) { struct inode *inode = filp->f_mapping->host; int status; @@ -745,15 +747,19 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) * If we're signalled while cleaning up locks on process exit, we * still need to complete the unlock. */ - /* Use local locking if mounted with "-onolock" */ - if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) + /* + * Use local locking if mounted with "-onolock" or with appropriate + * "-olocal_lock=" + */ + if (!is_local) status = NFS_PROTO(inode)->lock(filp, cmd, fl); else status = do_vfs_lock(filp, fl); return status; } -static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) +static int +do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) { struct inode *inode = filp->f_mapping->host; int status; @@ -766,8 +772,11 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) if (status != 0) goto out; - /* Use local locking if mounted with "-onolock" */ - if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) + /* + * Use local locking if mounted with "-onolock" or with appropriate + * "-olocal_lock=" + */ + if (!is_local) status = NFS_PROTO(inode)->lock(filp, cmd, fl); else status = do_vfs_lock(filp, fl); @@ -791,6 +800,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) { struct inode *inode = filp->f_mapping->host; int ret = -ENOLCK; + int is_local = 0; dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n", filp->f_path.dentry->d_parent->d_name.name, @@ -804,6 +814,9 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) goto out_err; + if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FCNTL) + is_local = 1; + if (NFS_PROTO(inode)->lock_check_bounds != NULL) { ret = NFS_PROTO(inode)->lock_check_bounds(fl); if (ret < 0) @@ -811,11 +824,11 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) } if (IS_GETLK(cmd)) - ret = do_getlk(filp, cmd, fl); + ret = do_getlk(filp, cmd, fl, is_local); else if (fl->fl_type == F_UNLCK) - ret = do_unlk(filp, cmd, fl); + ret = do_unlk(filp, cmd, fl, is_local); else - ret = do_setlk(filp, cmd, fl); + ret = do_setlk(filp, cmd, fl, is_local); out_err: return ret; } @@ -825,6 +838,9 @@ out_err: */ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) { + struct inode *inode = filp->f_mapping->host; + int is_local = 0; + dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n", filp->f_path.dentry->d_parent->d_name.name, filp->f_path.dentry->d_name.name, @@ -833,14 +849,17 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) if (!(fl->fl_flags & FL_FLOCK)) return -ENOLCK; + if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK) + is_local = 1; + /* We're simulating flock() locks using posix locks on the server */ fl->fl_owner = (fl_owner_t)filp; fl->fl_start = 0; fl->fl_end = OFFSET_MAX; if (fl->fl_type == F_UNLCK) - return do_unlk(filp, cmd, fl); - return do_setlk(filp, cmd, fl); + return do_unlk(filp, cmd, fl, is_local); + return do_setlk(filp, cmd, fl, is_local); } /* -- cgit v1.2.3 From bc4866b6e0b44f8ea0df22a16e5927714beb4983 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Oct 2010 17:59:08 -0400 Subject: NFS: Don't SIGBUS if nfs_vm_page_mkwrite races with a cache invalidation In the case where we lock the page, and then find out that the page has been thrown out of the page cache, we should just return VM_FAULT_NOPAGE. This is what block_page_mkwrite() does in these situations. Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- fs/nfs/file.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'fs/nfs/file.c') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 59cbe1ba0511..39672b731736 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -551,7 +551,7 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) struct file *filp = vma->vm_file; struct dentry *dentry = filp->f_path.dentry; unsigned pagelen; - int ret = -EINVAL; + int ret = VM_FAULT_NOPAGE; struct address_space *mapping; dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n", @@ -567,21 +567,20 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (mapping != dentry->d_inode->i_mapping) goto out_unlock; - ret = 0; pagelen = nfs_page_length(page); if (pagelen == 0) goto out_unlock; - ret = nfs_flush_incompatible(filp, page); - if (ret != 0) - goto out_unlock; + ret = VM_FAULT_LOCKED; + if (nfs_flush_incompatible(filp, page) == 0 && + nfs_updatepage(filp, page, 0, pagelen) == 0) + goto out; - ret = nfs_updatepage(filp, page, 0, pagelen); + ret = VM_FAULT_SIGBUS; out_unlock: - if (!ret) - return VM_FAULT_LOCKED; unlock_page(page); - return VM_FAULT_SIGBUS; +out: + return ret; } static const struct vm_operations_struct nfs_file_vm_ops = { -- cgit v1.2.3 From 6b96724e507fecc3e6440e86426fe4f44359ed66 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Tue, 12 Oct 2010 16:30:05 -0700 Subject: Revalidate caches on lock Instead of blindly zapping the caches, attempt to revalidate them if the server has indicated that it uses high resolution timestamps. NFSv4 should be able to always revalidate the cache since the protocol requires the update of the change attribute on modification of the data. In reality, there are servers (the Linux NFS server for example) that do not obey this requirement and use ctime as the basis for change attribute. Long term, the server needs to be fixed. At this time, and to be on the safe side, continue zapping caches if the server indicates that it does not have a high resolution timestamp. Signed-off-by: Ricardo Labiaga Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 2 ++ fs/nfs/file.c | 19 ++++++++++++++++--- fs/nfs/nfs3xdr.c | 3 ++- include/linux/nfs_fs_sb.h | 1 + include/linux/nfs_xdr.h | 1 + 5 files changed, 22 insertions(+), 4 deletions(-) (limited to 'fs/nfs/file.c') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index da2f2f024a4d..a63bce8d0596 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -915,6 +915,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo * server->maxfilesize = fsinfo->maxfilesize; + server->time_delta = fsinfo->time_delta; + /* We're airborne Set socket buffersize */ rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 39672b731736..c3f2477c16c1 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -757,6 +757,11 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) return status; } +static int +is_time_granular(struct timespec *ts) { + return ((ts->tv_sec == 0) && (ts->tv_nsec <= 1000)); +} + static int do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) { @@ -781,13 +786,21 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) status = do_vfs_lock(filp, fl); if (status < 0) goto out; + /* - * Make sure we clear the cache whenever we try to get the lock. + * Revalidate the cache if the server has time stamps granular + * enough to detect subsecond changes. Otherwise, clear the + * cache to prevent missing any changes. + * * This makes locking act as a cache coherency point. */ nfs_sync_mapping(filp->f_mapping); - if (!nfs_have_delegation(inode, FMODE_READ)) - nfs_zap_caches(inode); + if (!nfs_have_delegation(inode, FMODE_READ)) { + if (is_time_granular(&NFS_SERVER(inode)->time_delta)) + __nfs_revalidate_inode(NFS_SERVER(inode), inode); + else + nfs_zap_caches(inode); + } out: return status; } diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 31a44df40aea..d9a5e832c257 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1044,8 +1044,9 @@ nfs3_xdr_fsinfores(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *res) res->wtmult = ntohl(*p++); res->dtpref = ntohl(*p++); p = xdr_decode_hyper(p, &res->maxfilesize); + p = xdr_decode_time3(p, &res->time_delta); - /* ignore time_delta and properties */ + /* ignore properties */ res->lease_time = 0; return 0; } diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index c82ee7cd6288..5eef862ec187 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -124,6 +124,7 @@ struct nfs_server { struct nfs_fsid fsid; __u64 maxfilesize; /* maximum file size */ + struct timespec time_delta; /* smallest time granularity */ unsigned long mount_time; /* when this fs was mounted */ dev_t s_dev; /* superblock dev numbers */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index efe2eab8ac94..da7a1300dc60 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -112,6 +112,7 @@ struct nfs_fsinfo { __u32 wtmult; /* writes should be multiple of this */ __u32 dtpref; /* pref. readdir transfer size */ __u64 maxfilesize; + struct timespec time_delta; /* server time granularity */ __u32 lease_time; /* in seconds */ }; -- cgit v1.2.3