From 63c6764ce4c650245a41a95a2235207d25ca4fde Mon Sep 17 00:00:00 2001 From: Yoshinori Sato Date: Fri, 14 Oct 2005 15:59:11 -0700 Subject: [PATCH] nommu build error fix "proc_smaps_operations" is not defined in case of "CONFIG_MMU=n". Signed-off-by: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index 3b33f94020db..a170450aadb1 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -103,7 +103,9 @@ enum pid_directory_inos { PROC_TGID_NUMA_MAPS, PROC_TGID_MOUNTS, PROC_TGID_WCHAN, +#ifdef CONFIG_MMU PROC_TGID_SMAPS, +#endif #ifdef CONFIG_SCHEDSTATS PROC_TGID_SCHEDSTAT, #endif @@ -141,7 +143,9 @@ enum pid_directory_inos { PROC_TID_NUMA_MAPS, PROC_TID_MOUNTS, PROC_TID_WCHAN, +#ifdef CONFIG_MMU PROC_TID_SMAPS, +#endif #ifdef CONFIG_SCHEDSTATS PROC_TID_SCHEDSTAT, #endif @@ -195,7 +199,9 @@ static struct pid_entry tgid_base_stuff[] = { E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO), E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO), E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO), +#ifdef CONFIG_MMU E(PROC_TGID_SMAPS, "smaps", S_IFREG|S_IRUGO), +#endif #ifdef CONFIG_SECURITY E(PROC_TGID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), #endif @@ -235,7 +241,9 @@ static struct pid_entry tid_base_stuff[] = { E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO), E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO), E(PROC_TID_MOUNTS, "mounts", S_IFREG|S_IRUGO), +#ifdef CONFIG_MMU E(PROC_TID_SMAPS, "smaps", S_IFREG|S_IRUGO), +#endif #ifdef CONFIG_SECURITY E(PROC_TID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), #endif @@ -630,6 +638,7 @@ static struct file_operations proc_numa_maps_operations = { }; #endif +#ifdef CONFIG_MMU extern struct seq_operations proc_pid_smaps_op; static int smaps_open(struct inode *inode, struct file *file) { @@ -648,6 +657,7 @@ static struct file_operations proc_smaps_operations = { .llseek = seq_lseek, .release = seq_release, }; +#endif extern struct seq_operations mounts_op; static int mounts_open(struct inode *inode, struct file *file) @@ -1681,10 +1691,12 @@ static struct dentry *proc_pident_lookup(struct inode *dir, case PROC_TGID_MOUNTS: inode->i_fop = &proc_mounts_operations; break; +#ifdef CONFIG_MMU case PROC_TID_SMAPS: case PROC_TGID_SMAPS: inode->i_fop = &proc_smaps_operations; break; +#endif #ifdef CONFIG_SECURITY case PROC_TID_ATTR: inode->i_nlink = 2; -- cgit v1.2.3 From b3c52da33ce95747b1bff86cce716d4f1397f14a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 17 Oct 2005 06:02:00 -0400 Subject: [PATCH] NFS: Fix cache consistency races If the data cache has been marked as potentially invalid by nfs_refresh_inode, we should invalidate it rather than assume that changes are due to our own activity. Also ensure that we always start with a valid cache before declaring it to be protected by a delegation. Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- fs/nfs/delegation.c | 4 ++++ fs/nfs/file.c | 3 ++- fs/nfs/inode.c | 7 ++----- 3 files changed, 8 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index d7f7eb669d03..4a36839f0bbd 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -85,6 +85,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct struct nfs_delegation *delegation; int status = 0; + /* Ensure we first revalidate the attributes and page cache! */ + if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR))) + __nfs_revalidate_inode(NFS_SERVER(inode), inode); + delegation = nfs_alloc_delegation(); if (delegation == NULL) return -ENOMEM; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index f6b9eda925c5..6bdcfa95de94 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -137,7 +137,8 @@ static int nfs_revalidate_file(struct inode *inode, struct file *filp) struct nfs_inode *nfsi = NFS_I(inode); int retval = 0; - if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) || nfs_attribute_timeout(inode)) + if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR)) + || nfs_attribute_timeout(inode)) retval = __nfs_revalidate_inode(NFS_SERVER(inode), inode); nfs_revalidate_mapping(inode, filp->f_mapping); return 0; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6922469d6fc5..6be46d21c01e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1226,10 +1226,6 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) loff_t cur_size, new_isize; int data_unstable; - /* Do we hold a delegation? */ - if (nfs_have_delegation(inode, FMODE_READ)) - return 0; - spin_lock(&inode->i_lock); /* Are we in the process of updating data on the server? */ @@ -1350,7 +1346,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign nfsi->read_cache_jiffies = fattr->timestamp; /* Are we racing with known updates of the metadata on the server? */ - data_unstable = ! nfs_verify_change_attribute(inode, verifier); + data_unstable = ! (nfs_verify_change_attribute(inode, verifier) || + (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)); /* Check if our cached file size is stale */ new_isize = nfs_size_to_loff_t(fattr->size); -- cgit v1.2.3 From 6ce969171d5187f7621be68c0ebbc7fb02ec53f1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 17 Oct 2005 06:03:23 -0400 Subject: [PATCH] NFS: Fix Oopsable/unnecessary i_count manipulations in nfs_wait_on_inode() Oopsable since nfs_wait_on_inode() can get called as part of iput_final(). Unnecessary since the caller had better be damned sure that the inode won't disappear from underneath it anyway. Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- fs/nfs/inode.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6be46d21c01e..d4eadeea128e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -877,12 +877,10 @@ static int nfs_wait_on_inode(struct inode *inode) sigset_t oldmask; int error; - atomic_inc(&inode->i_count); rpc_clnt_sigmask(clnt, &oldmask); error = wait_on_bit_lock(&nfsi->flags, NFS_INO_REVALIDATING, nfs_wait_schedule, TASK_INTERRUPTIBLE); rpc_clnt_sigunmask(clnt, &oldmask); - iput(inode); return error; } -- cgit v1.2.3 From b65574fec5db1211bce7fc8bec7a2b32486e0670 Mon Sep 17 00:00:00 2001 From: David McCullough Date: Mon, 17 Oct 2005 16:43:29 -0700 Subject: [PATCH] output of /proc/maps on nommu systems is incomplete Currently you do not get all the map entries on nommu systems because the start function doesn't index into the list using the value of "pos". Signed-off-by: David McCullough Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/nommu.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index f3bf016d5ee3..cff10ab1af63 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -91,6 +91,7 @@ static void *nommu_vma_list_start(struct seq_file *m, loff_t *_pos) next = _rb; break; } + pos--; } return next; -- cgit v1.2.3 From 4faa5285283fad081443e3612ca426a311bb6c7e Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Mon, 17 Oct 2005 16:43:33 -0700 Subject: [PATCH] aio: revert lock_kiocb() lock_kiocb() was introduced to serialize retrying and cancellation. In the process of doing so it tried to sleep waiting for KIF_LOCKED while holding the ctx_lock spinlock. Recent fixes have ensured that multiple concurrent retries won't be attempted for a given iocb. Cancel has other problems and has no significant in-tree users that have been complaining about it. So for the immediate future we'll revert sleeping with the lock held and will address proper cancellation and retry serialization in the future. Signed-off-by: Zach Brown Acked-by: Benjamin LaHaise Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 26 +------------------------- include/linux/aio.h | 7 ++++++- 2 files changed, 7 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index d6b1551342b7..9fe7216457d8 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -398,7 +398,7 @@ static struct kiocb fastcall *__aio_get_req(struct kioctx *ctx) if (unlikely(!req)) return NULL; - req->ki_flags = 1 << KIF_LOCKED; + req->ki_flags = 0; req->ki_users = 2; req->ki_key = 0; req->ki_ctx = ctx; @@ -547,25 +547,6 @@ struct kioctx *lookup_ioctx(unsigned long ctx_id) return ioctx; } -static int lock_kiocb_action(void *param) -{ - schedule(); - return 0; -} - -static inline void lock_kiocb(struct kiocb *iocb) -{ - wait_on_bit_lock(&iocb->ki_flags, KIF_LOCKED, lock_kiocb_action, - TASK_UNINTERRUPTIBLE); -} - -static inline void unlock_kiocb(struct kiocb *iocb) -{ - kiocbClearLocked(iocb); - smp_mb__after_clear_bit(); - wake_up_bit(&iocb->ki_flags, KIF_LOCKED); -} - /* * use_mm * Makes the calling kernel thread take on the specified @@ -796,9 +777,7 @@ static int __aio_run_iocbs(struct kioctx *ctx) * Hold an extra reference while retrying i/o. */ iocb->ki_users++; /* grab extra reference */ - lock_kiocb(iocb); aio_run_iocb(iocb); - unlock_kiocb(iocb); if (__aio_put_req(ctx, iocb)) /* drop extra ref */ put_ioctx(ctx); } @@ -1542,7 +1521,6 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, spin_lock_irq(&ctx->ctx_lock); aio_run_iocb(req); - unlock_kiocb(req); if (!list_empty(&ctx->run_list)) { /* drain the run list */ while (__aio_run_iocbs(ctx)) @@ -1674,7 +1652,6 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb, if (NULL != cancel) { struct io_event tmp; pr_debug("calling cancel\n"); - lock_kiocb(kiocb); memset(&tmp, 0, sizeof(tmp)); tmp.obj = (u64)(unsigned long)kiocb->ki_obj.user; tmp.data = kiocb->ki_user_data; @@ -1686,7 +1663,6 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb, if (copy_to_user(result, &tmp, sizeof(tmp))) ret = -EFAULT; } - unlock_kiocb(kiocb); } else ret = -EINVAL; diff --git a/include/linux/aio.h b/include/linux/aio.h index 60def658b246..0decf66117c1 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -24,7 +24,12 @@ struct kioctx; #define KIOCB_SYNC_KEY (~0U) /* ki_flags bits */ -#define KIF_LOCKED 0 +/* + * This may be used for cancel/retry serialization in the future, but + * for now it's unused and we probably don't want modules to even + * think they can use it. + */ +/* #define KIF_LOCKED 0 */ #define KIF_KICKED 1 #define KIF_CANCELLED 2 -- cgit v1.2.3 From cee54fc944422c44e476736c045a9e8053cb0644 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:12 -0700 Subject: NFSv4: Add functions to order RPC calls NFSv4 file state-changing functions such as OPEN, CLOSE, LOCK,... are all labelled with "sequence identifiers" in order to prevent the server from reordering RPC requests, as this could cause its file state to become out of sync with the client. Currently the NFS client code enforces this ordering locally using semaphores to restrict access to structures until the RPC call is done. This, of course, only works with synchronous RPC calls, since the user process must first grab the semaphore. By dropping semaphores, and instead teaching the RPC engine to hold the RPC calls until they are ready to be sent, we can extend this process to work nicely with asynchronous RPC calls too. This patch adds a new list called "rpc_sequence" that defines the order of the RPC calls to be sent. We add one such list for each state_owner. When an RPC call is ready to be sent, it checks if it is top of the rpc_sequence list. If so, it proceeds. If not, it goes back to sleep, and loops until it hits top of the list. Once the RPC call has completed, it can then bump the sequence id counter, and remove itself from the rpc_sequence list, and then wake up the next sleeper. Note that the state_owner sequence ids and lock_owner sequence ids are all indexed to the same rpc_sequence list, so OPEN, LOCK,... requests are all ordered w.r.t. each other. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 42 ++++++++++++-- fs/nfs/nfs4proc.c | 111 +++++++++++++++++++++++------------- fs/nfs/nfs4state.c | 145 +++++++++++++++++++++++++++++++++++++++--------- fs/nfs/nfs4xdr.c | 43 +++++++++++--- include/linux/nfs_xdr.h | 15 ++--- 5 files changed, 273 insertions(+), 83 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index ec1a22d7b876..6ac6708484f5 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -92,6 +92,35 @@ struct nfs4_client { unsigned char cl_id_uniquifier; }; +/* + * struct rpc_sequence ensures that RPC calls are sent in the exact + * order that they appear on the list. + */ +struct rpc_sequence { + struct rpc_wait_queue wait; /* RPC call delay queue */ + spinlock_t lock; /* Protects the list */ + struct list_head list; /* Defines sequence of RPC calls */ +}; + +#define NFS_SEQID_CONFIRMED 1 +struct nfs_seqid_counter { + struct rpc_sequence *sequence; + int flags; + u32 counter; +}; + +struct nfs_seqid { + struct list_head list; + struct nfs_seqid_counter *sequence; + struct rpc_task *task; +}; + +static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status) +{ + if (seqid_mutating_err(-status)) + seqid->flags |= NFS_SEQID_CONFIRMED; +} + /* * NFS4 state_owners and lock_owners are simply labels for ordered * sequences of RPC calls. Their sole purpose is to provide once-only @@ -106,12 +135,13 @@ struct nfs4_state_owner { struct nfs4_client *so_client; u32 so_id; /* 32-bit identifier, unique */ struct semaphore so_sema; - u32 so_seqid; /* protected by so_sema */ atomic_t so_count; struct rpc_cred *so_cred; /* Associated cred */ struct list_head so_states; struct list_head so_delegations; + struct nfs_seqid_counter so_seqid; + struct rpc_sequence so_sequence; }; /* @@ -132,7 +162,7 @@ struct nfs4_lock_state { fl_owner_t ls_owner; /* POSIX lock owner */ #define NFS_LOCK_INITIALIZED 1 int ls_flags; - u32 ls_seqid; + struct nfs_seqid_counter ls_seqid; u32 ls_id; nfs4_stateid ls_stateid; atomic_t ls_count; @@ -224,12 +254,16 @@ extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state extern void nfs4_put_open_state(struct nfs4_state *); extern void nfs4_close_state(struct nfs4_state *, mode_t); extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode); -extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp); extern void nfs4_schedule_state_recovery(struct nfs4_client *); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); -extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls); extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); +extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter); +extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); +extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid); +extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); +extern void nfs_free_seqid(struct nfs_seqid *seqid); + extern const nfs4_stateid zero_stateid; /* nfs4xdr.c */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9701ca8c9428..9ba89e7cdd28 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -218,7 +218,6 @@ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *st struct nfs_delegation *delegation = NFS_I(inode)->delegation; struct nfs_openargs o_arg = { .fh = NFS_FH(inode), - .seqid = sp->so_seqid, .id = sp->so_id, .open_flags = state->state, .clientid = server->nfs4_state->cl_clientid, @@ -245,8 +244,13 @@ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *st } o_arg.u.delegation_type = delegation->type; } + o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); + if (o_arg.seqid == NULL) + return -ENOMEM; status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - nfs4_increment_seqid(status, sp); + /* Confirm the sequence as being established */ + nfs_confirm_seqid(&sp->so_seqid, status); + nfs_increment_open_seqid(status, o_arg.seqid); if (status == 0) { memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); if (o_res.delegation_type != 0) { @@ -256,6 +260,7 @@ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *st nfs_async_inode_return_delegation(inode, &o_res.stateid); } } + nfs_free_seqid(o_arg.seqid); clear_bit(NFS_DELEGATED_STATE, &state->flags); /* Ensure we update the inode attributes */ NFS_CACHEINV(inode); @@ -307,16 +312,20 @@ static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state goto out; if (state->state == 0) goto out; - arg.seqid = sp->so_seqid; + arg.seqid = nfs_alloc_seqid(&sp->so_seqid); + status = -ENOMEM; + if (arg.seqid == NULL) + goto out; arg.open_flags = state->state; memcpy(arg.u.delegation.data, state->stateid.data, sizeof(arg.u.delegation.data)); status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - nfs4_increment_seqid(status, sp); + nfs_increment_open_seqid(status, arg.seqid); if (status >= 0) { memcpy(state->stateid.data, res.stateid.data, sizeof(state->stateid.data)); clear_bit(NFS_DELEGATED_STATE, &state->flags); } + nfs_free_seqid(arg.seqid); out: up(&sp->so_sema); dput(parent); @@ -345,11 +354,11 @@ int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) return err; } -static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid) +static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid) { struct nfs_open_confirmargs arg = { .fh = fh, - .seqid = sp->so_seqid, + .seqid = seqid, .stateid = *stateid, }; struct nfs_open_confirmres res; @@ -362,7 +371,9 @@ static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nf int status; status = rpc_call_sync(clnt, &msg, RPC_TASK_NOINTR); - nfs4_increment_seqid(status, sp); + /* Confirm the sequence as being established */ + nfs_confirm_seqid(&sp->so_seqid, status); + nfs_increment_open_seqid(status, seqid); if (status >= 0) memcpy(stateid, &res.stateid, sizeof(*stateid)); return status; @@ -380,21 +391,21 @@ static int _nfs4_proc_open(struct inode *dir, struct nfs4_state_owner *sp, stru int status; /* Update sequence id. The caller must serialize! */ - o_arg->seqid = sp->so_seqid; o_arg->id = sp->so_id; o_arg->clientid = sp->so_client->cl_clientid; status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - nfs4_increment_seqid(status, sp); + nfs_increment_open_seqid(status, o_arg->seqid); if (status != 0) goto out; update_changeattr(dir, &o_res->cinfo); if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { status = _nfs4_proc_open_confirm(server->client, &o_res->fh, - sp, &o_res->stateid); + sp, &o_res->stateid, o_arg->seqid); if (status != 0) goto out; } + nfs_confirm_seqid(&sp->so_seqid, 0); if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) status = server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr); out: @@ -465,6 +476,10 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st set_bit(NFS_DELEGATED_STATE, &state->flags); goto out; } + o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); + status = -ENOMEM; + if (o_arg.seqid == NULL) + goto out; status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); if (status != 0) goto out_nodeleg; @@ -490,6 +505,7 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res); } out_nodeleg: + nfs_free_seqid(o_arg.seqid); clear_bit(NFS_DELEGATED_STATE, &state->flags); out: dput(parent); @@ -667,6 +683,9 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st /* Serialization for the sequence id */ down(&sp->so_sema); + o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); + if (o_arg.seqid == NULL) + return -ENOMEM; status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); if (status != 0) goto out_err; @@ -681,6 +700,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st update_open_stateid(state, &o_res.stateid, flags); if (o_res.delegation_type != 0) nfs_inode_set_delegation(inode, cred, &o_res); + nfs_free_seqid(o_arg.seqid); up(&sp->so_sema); nfs4_put_state_owner(sp); up_read(&clp->cl_sem); @@ -690,6 +710,7 @@ out_err: if (sp != NULL) { if (state != NULL) nfs4_put_open_state(state); + nfs_free_seqid(o_arg.seqid); up(&sp->so_sema); nfs4_put_state_owner(sp); } @@ -718,7 +739,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, * It is actually a sign of a bug on the client or on the server. * * If we receive a BAD_SEQID error in the particular case of - * doing an OPEN, we assume that nfs4_increment_seqid() will + * doing an OPEN, we assume that nfs_increment_open_seqid() will * have unhashed the old state_owner for us, and that we can * therefore safely retry using a new one. We should still warn * the user though... @@ -799,7 +820,7 @@ static void nfs4_close_done(struct rpc_task *task) /* hmm. we are done with the inode, and in the process of freeing * the state_owner. we keep this around to process errors */ - nfs4_increment_seqid(task->tk_status, sp); + nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid); switch (task->tk_status) { case 0: memcpy(&state->stateid, &calldata->res.stateid, @@ -818,6 +839,7 @@ static void nfs4_close_done(struct rpc_task *task) } state->state = calldata->arg.open_flags; nfs4_put_open_state(state); + nfs_free_seqid(calldata->arg.seqid); up(&sp->so_sema); nfs4_put_state_owner(sp); up_read(&server->nfs4_state->cl_sem); @@ -865,7 +887,11 @@ int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode) calldata->state = state; calldata->arg.fh = NFS_FH(inode); /* Serialization for the sequence id */ - calldata->arg.seqid = state->owner->so_seqid; + calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); + if (calldata->arg.seqid == NULL) { + kfree(calldata); + return -ENOMEM; + } calldata->arg.open_flags = mode; memcpy(&calldata->arg.stateid, &state->stateid, sizeof(calldata->arg.stateid)); @@ -2729,15 +2755,19 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock /* We might have lost the locks! */ if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) goto out; - luargs.seqid = lsp->ls_seqid; - memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid)); + luargs.seqid = nfs_alloc_seqid(&lsp->ls_seqid); + status = -ENOMEM; + if (luargs.seqid == NULL) + goto out; + memcpy(luargs.stateid.data, lsp->ls_stateid.data, sizeof(luargs.stateid.data)); arg.u.locku = &luargs; status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - nfs4_increment_lock_seqid(status, lsp); + nfs_increment_lock_seqid(status, luargs.seqid); if (status == 0) - memcpy(&lsp->ls_stateid, &res.u.stateid, - sizeof(lsp->ls_stateid)); + memcpy(lsp->ls_stateid.data, res.u.stateid.data, + sizeof(lsp->ls_stateid.data)); + nfs_free_seqid(luargs.seqid); out: up(&state->lock_sema); if (status == 0) @@ -2783,9 +2813,13 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r .reclaim = reclaim, .new_lock_owner = 0, }; - int status; + struct nfs_seqid *lock_seqid; + int status = -ENOMEM; - if (!(lsp->ls_flags & NFS_LOCK_INITIALIZED)) { + lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid); + if (lock_seqid == NULL) + return -ENOMEM; + if (!(lsp->ls_seqid.flags & NFS_SEQID_CONFIRMED)) { struct nfs4_state_owner *owner = state->owner; struct nfs_open_to_lock otl = { .lock_owner = { @@ -2793,39 +2827,40 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r }, }; - otl.lock_seqid = lsp->ls_seqid; + otl.lock_seqid = lock_seqid; otl.lock_owner.id = lsp->ls_id; memcpy(&otl.open_stateid, &state->stateid, sizeof(otl.open_stateid)); largs.u.open_lock = &otl; largs.new_lock_owner = 1; arg.u.lock = &largs; down(&owner->so_sema); - otl.open_seqid = owner->so_seqid; - status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - /* increment open_owner seqid on success, and - * seqid mutating errors */ - nfs4_increment_seqid(status, owner); - up(&owner->so_sema); - if (status == 0) { - lsp->ls_flags |= NFS_LOCK_INITIALIZED; - lsp->ls_seqid++; + otl.open_seqid = nfs_alloc_seqid(&owner->so_seqid); + if (otl.open_seqid != NULL) { + status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); + /* increment seqid on success, and seqid mutating errors */ + nfs_increment_open_seqid(status, otl.open_seqid); + nfs_free_seqid(otl.open_seqid); } + up(&owner->so_sema); + if (status == 0) + nfs_confirm_seqid(&lsp->ls_seqid, 0); } else { - struct nfs_exist_lock el = { - .seqid = lsp->ls_seqid, - }; + struct nfs_exist_lock el; memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid)); largs.u.exist_lock = ⪙ arg.u.lock = &largs; + el.seqid = lock_seqid; status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - /* increment seqid on success, and * seqid mutating errors*/ - nfs4_increment_lock_seqid(status, lsp); } + /* increment seqid on success, and seqid mutating errors*/ + nfs_increment_lock_seqid(status, lock_seqid); /* save the returned stateid. */ - if (status == 0) - memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(nfs4_stateid)); - else if (status == -NFS4ERR_DENIED) + if (status == 0) { + memcpy(lsp->ls_stateid.data, res.u.stateid.data, sizeof(lsp->ls_stateid.data)); + lsp->ls_flags |= NFS_LOCK_INITIALIZED; + } else if (status == -NFS4ERR_DENIED) status = -EAGAIN; + nfs_free_seqid(lock_seqid); return status; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index afe587d82f1e..f535c219cf3a 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -264,13 +264,16 @@ nfs4_alloc_state_owner(void) { struct nfs4_state_owner *sp; - sp = kmalloc(sizeof(*sp),GFP_KERNEL); + sp = kzalloc(sizeof(*sp),GFP_KERNEL); if (!sp) return NULL; init_MUTEX(&sp->so_sema); - sp->so_seqid = 0; /* arbitrary */ INIT_LIST_HEAD(&sp->so_states); INIT_LIST_HEAD(&sp->so_delegations); + rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue"); + sp->so_seqid.sequence = &sp->so_sequence; + spin_lock_init(&sp->so_sequence.lock); + INIT_LIST_HEAD(&sp->so_sequence.list); atomic_set(&sp->so_count, 1); return sp; } @@ -553,12 +556,10 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f struct nfs4_lock_state *lsp; struct nfs4_client *clp = state->owner->so_client; - lsp = kmalloc(sizeof(*lsp), GFP_KERNEL); + lsp = kzalloc(sizeof(*lsp), GFP_KERNEL); if (lsp == NULL) return NULL; - lsp->ls_flags = 0; - lsp->ls_seqid = 0; /* arbitrary */ - memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data)); + lsp->ls_seqid.sequence = &state->owner->so_sequence; atomic_set(&lsp->ls_count, 1); lsp->ls_owner = fl_owner; spin_lock(&clp->cl_lock); @@ -673,29 +674,102 @@ void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t f nfs4_put_lock_state(lsp); } -/* -* Called with state->lock_sema and clp->cl_sem held. -*/ -void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp) +struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter) +{ + struct rpc_sequence *sequence = counter->sequence; + struct nfs_seqid *new; + + new = kmalloc(sizeof(*new), GFP_KERNEL); + if (new != NULL) { + new->sequence = counter; + new->task = NULL; + spin_lock(&sequence->lock); + list_add_tail(&new->list, &sequence->list); + spin_unlock(&sequence->lock); + } + return new; +} + +void nfs_free_seqid(struct nfs_seqid *seqid) { - if (status == NFS_OK || seqid_mutating_err(-status)) - lsp->ls_seqid++; + struct rpc_sequence *sequence = seqid->sequence->sequence; + struct rpc_task *next = NULL; + + spin_lock(&sequence->lock); + list_del(&seqid->list); + if (!list_empty(&sequence->list)) { + next = list_entry(sequence->list.next, struct nfs_seqid, list)->task; + if (next) + rpc_wake_up_task(next); + } + spin_unlock(&sequence->lock); + kfree(seqid); } /* -* Called with sp->so_sema and clp->cl_sem held. -* -* Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or -* failed with a seqid incrementing error - -* see comments nfs_fs.h:seqid_mutating_error() -*/ -void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp) -{ - if (status == NFS_OK || seqid_mutating_err(-status)) - sp->so_seqid++; - /* If the server returns BAD_SEQID, unhash state_owner here */ - if (status == -NFS4ERR_BAD_SEQID) + * Called with sp->so_sema and clp->cl_sem held. + * + * Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or + * failed with a seqid incrementing error - + * see comments nfs_fs.h:seqid_mutating_error() + */ +static inline void nfs_increment_seqid(int status, struct nfs_seqid *seqid) +{ + switch (status) { + case 0: + break; + case -NFS4ERR_BAD_SEQID: + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_BADXDR: + case -NFS4ERR_RESOURCE: + case -NFS4ERR_NOFILEHANDLE: + /* Non-seqid mutating errors */ + return; + }; + /* + * Note: no locking needed as we are guaranteed to be first + * on the sequence list + */ + seqid->sequence->counter++; +} + +void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) +{ + if (status == -NFS4ERR_BAD_SEQID) { + struct nfs4_state_owner *sp = container_of(seqid->sequence, + struct nfs4_state_owner, so_seqid); nfs4_drop_state_owner(sp); + } + return nfs_increment_seqid(status, seqid); +} + +/* + * Called with ls->lock_sema and clp->cl_sem held. + * + * Increment the seqid if the LOCK/LOCKU succeeded, or + * failed with a seqid incrementing error - + * see comments nfs_fs.h:seqid_mutating_error() + */ +void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid) +{ + return nfs_increment_seqid(status, seqid); +} + +int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) +{ + struct rpc_sequence *sequence = seqid->sequence->sequence; + int status = 0; + + spin_lock(&sequence->lock); + if (sequence->list.next != &seqid->list) { + seqid->task = task; + rpc_sleep_on(&sequence->wait, task, NULL, NULL); + status = -EAGAIN; + } + spin_unlock(&sequence->lock); + return status; } static int reclaimer(void *); @@ -791,8 +865,6 @@ static int nfs4_reclaim_open_state(struct nfs4_state_recovery_ops *ops, struct n if (state->state == 0) continue; status = ops->recover_open(sp, state); - list_for_each_entry(lock, &state->lock_states, ls_locks) - lock->ls_flags &= ~NFS_LOCK_INITIALIZED; if (status >= 0) { status = nfs4_reclaim_locks(ops, state); if (status < 0) @@ -831,6 +903,26 @@ out_err: return status; } +static void nfs4_state_mark_reclaim(struct nfs4_client *clp) +{ + struct nfs4_state_owner *sp; + struct nfs4_state *state; + struct nfs4_lock_state *lock; + + /* Reset all sequence ids to zero */ + list_for_each_entry(sp, &clp->cl_state_owners, so_list) { + sp->so_seqid.counter = 0; + sp->so_seqid.flags = 0; + list_for_each_entry(state, &sp->so_states, open_states) { + list_for_each_entry(lock, &state->lock_states, ls_locks) { + lock->ls_seqid.counter = 0; + lock->ls_seqid.flags = 0; + lock->ls_flags &= ~NFS_LOCK_INITIALIZED; + } + } + } +} + static int reclaimer(void *ptr) { struct reclaimer_args *args = (struct reclaimer_args *)ptr; @@ -864,6 +956,7 @@ restart_loop: default: ops = &nfs4_network_partition_recovery_ops; }; + nfs4_state_mark_reclaim(clp); status = __nfs4_init_client(clp); if (status) goto out_error; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 6c564ef9489e..fcd28a29a2f8 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -604,7 +604,7 @@ static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg) RESERVE_SPACE(8+sizeof(arg->stateid.data)); WRITE32(OP_CLOSE); - WRITE32(arg->seqid); + WRITE32(arg->seqid->sequence->counter); WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); return 0; @@ -732,9 +732,9 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lockargs *arg) struct nfs_open_to_lock *ol = opargs->u.open_lock; RESERVE_SPACE(40); - WRITE32(ol->open_seqid); + WRITE32(ol->open_seqid->sequence->counter); WRITEMEM(&ol->open_stateid, sizeof(ol->open_stateid)); - WRITE32(ol->lock_seqid); + WRITE32(ol->lock_seqid->sequence->counter); WRITE64(ol->lock_owner.clientid); WRITE32(4); WRITE32(ol->lock_owner.id); @@ -744,7 +744,7 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lockargs *arg) RESERVE_SPACE(20); WRITEMEM(&el->stateid, sizeof(el->stateid)); - WRITE32(el->seqid); + WRITE32(el->seqid->sequence->counter); } return 0; @@ -775,7 +775,7 @@ static int encode_locku(struct xdr_stream *xdr, const struct nfs_lockargs *arg) RESERVE_SPACE(44); WRITE32(OP_LOCKU); WRITE32(arg->type); - WRITE32(opargs->seqid); + WRITE32(opargs->seqid->sequence->counter); WRITEMEM(&opargs->stateid, sizeof(opargs->stateid)); WRITE64(arg->offset); WRITE64(arg->length); @@ -826,7 +826,7 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena */ RESERVE_SPACE(8); WRITE32(OP_OPEN); - WRITE32(arg->seqid); + WRITE32(arg->seqid->sequence->counter); encode_share_access(xdr, arg->open_flags); RESERVE_SPACE(16); WRITE64(arg->clientid); @@ -941,7 +941,7 @@ static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_con RESERVE_SPACE(8+sizeof(arg->stateid.data)); WRITE32(OP_OPEN_CONFIRM); WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); - WRITE32(arg->seqid); + WRITE32(arg->seqid->sequence->counter); return 0; } @@ -953,7 +953,7 @@ static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closea RESERVE_SPACE(8+sizeof(arg->stateid.data)); WRITE32(OP_OPEN_DOWNGRADE); WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); - WRITE32(arg->seqid); + WRITE32(arg->seqid->sequence->counter); encode_share_access(xdr, arg->open_flags); return 0; } @@ -1416,6 +1416,9 @@ static int nfs4_xdr_enc_close(struct rpc_rqst *req, uint32_t *p, struct nfs_clos }; int status; + status = nfs_wait_on_sequence(args->seqid, req->rq_task); + if (status != 0) + goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -1437,6 +1440,9 @@ static int nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_opena }; int status; + status = nfs_wait_on_sequence(args->seqid, req->rq_task); + if (status != 0) + goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -1464,6 +1470,9 @@ static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, uint32_t *p, struct n }; int status; + status = nfs_wait_on_sequence(args->seqid, req->rq_task); + if (status != 0) + goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -1485,6 +1494,9 @@ static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, uint32_t *p, struct nf }; int status; + status = nfs_wait_on_sequence(args->seqid, req->rq_task); + if (status != 0) + goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -1506,6 +1518,9 @@ static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct }; int status; + status = nfs_wait_on_sequence(args->seqid, req->rq_task); + if (status != 0) + goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -1525,8 +1540,17 @@ static int nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_locka struct compound_hdr hdr = { .nops = 2, }; + struct nfs_lock_opargs *opargs = args->u.lock; + struct nfs_seqid *seqid; int status; + if (opargs->new_lock_owner) + seqid = opargs->u.open_lock->lock_seqid; + else + seqid = opargs->u.exist_lock->seqid; + status = nfs_wait_on_sequence(seqid, req->rq_task); + if (status != 0) + goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -1569,6 +1593,9 @@ static int nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_lock }; int status; + status = nfs_wait_on_sequence(args->u.locku->seqid, req->rq_task); + if (status != 0) + goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index a2bf6914ff1b..d578912bf9a9 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -96,12 +96,13 @@ struct nfs4_change_info { u64 after; }; +struct nfs_seqid; /* * Arguments to the open call. */ struct nfs_openargs { const struct nfs_fh * fh; - __u32 seqid; + struct nfs_seqid * seqid; int open_flags; __u64 clientid; __u32 id; @@ -136,7 +137,7 @@ struct nfs_openres { struct nfs_open_confirmargs { const struct nfs_fh * fh; nfs4_stateid stateid; - __u32 seqid; + struct nfs_seqid * seqid; }; struct nfs_open_confirmres { @@ -149,7 +150,7 @@ struct nfs_open_confirmres { struct nfs_closeargs { struct nfs_fh * fh; nfs4_stateid stateid; - __u32 seqid; + struct nfs_seqid * seqid; int open_flags; }; @@ -165,15 +166,15 @@ struct nfs_lowner { }; struct nfs_open_to_lock { - __u32 open_seqid; + struct nfs_seqid * open_seqid; nfs4_stateid open_stateid; - __u32 lock_seqid; + struct nfs_seqid * lock_seqid; struct nfs_lowner lock_owner; }; struct nfs_exist_lock { nfs4_stateid stateid; - __u32 seqid; + struct nfs_seqid * seqid; }; struct nfs_lock_opargs { @@ -186,7 +187,7 @@ struct nfs_lock_opargs { }; struct nfs_locku_opargs { - __u32 seqid; + struct nfs_seqid * seqid; nfs4_stateid stateid; }; -- cgit v1.2.3 From 9512135df14f8293b9bc5e8fb22d4279dee5ff66 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:12 -0700 Subject: NFSv4: Fix a potential CLOSE race Once the state_owner and lock_owner semaphores get removed, it will be possible for other OPEN requests to reopen the same file if they have lower sequence ids than our CLOSE call. This patch ensures that we recheck the file state once nfs_wait_on_sequence() has completed waiting. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 110 +++++++++++++++++++++++++++++++++--------------- fs/nfs/nfs4state.c | 6 ++- fs/nfs/nfs4xdr.c | 14 ++---- include/linux/nfs_xdr.h | 2 +- 4 files changed, 87 insertions(+), 45 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9ba89e7cdd28..5154ddf6d9a5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -189,6 +189,21 @@ static void update_changeattr(struct inode *inode, struct nfs4_change_info *cinf nfsi->change_attr = cinfo->after; } +/* Helper for asynchronous RPC calls */ +static int nfs4_call_async(struct rpc_clnt *clnt, rpc_action tk_begin, + rpc_action tk_exit, void *calldata) +{ + struct rpc_task *task; + + if (!(task = rpc_new_task(clnt, tk_exit, RPC_TASK_ASYNC))) + return -ENOMEM; + + task->tk_calldata = calldata; + task->tk_action = tk_begin; + rpc_execute(task); + return 0; +} + static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) { struct inode *inode = state->inode; @@ -810,11 +825,24 @@ struct nfs4_closedata { struct nfs_closeres res; }; +static void nfs4_free_closedata(struct nfs4_closedata *calldata) +{ + struct nfs4_state *state = calldata->state; + struct nfs4_state_owner *sp = state->owner; + struct nfs_server *server = NFS_SERVER(calldata->inode); + + nfs4_put_open_state(calldata->state); + nfs_free_seqid(calldata->arg.seqid); + up(&sp->so_sema); + nfs4_put_state_owner(sp); + up_read(&server->nfs4_state->cl_sem); + kfree(calldata); +} + static void nfs4_close_done(struct rpc_task *task) { struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata; struct nfs4_state *state = calldata->state; - struct nfs4_state_owner *sp = state->owner; struct nfs_server *server = NFS_SERVER(calldata->inode); /* hmm. we are done with the inode, and in the process of freeing @@ -838,25 +866,46 @@ static void nfs4_close_done(struct rpc_task *task) } } state->state = calldata->arg.open_flags; - nfs4_put_open_state(state); - nfs_free_seqid(calldata->arg.seqid); - up(&sp->so_sema); - nfs4_put_state_owner(sp); - up_read(&server->nfs4_state->cl_sem); - kfree(calldata); + nfs4_free_closedata(calldata); } -static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata *calldata) +static void nfs4_close_begin(struct rpc_task *task) { + struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata; + struct nfs4_state *state = calldata->state; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE], .rpc_argp = &calldata->arg, .rpc_resp = &calldata->res, - .rpc_cred = calldata->state->owner->so_cred, + .rpc_cred = state->owner->so_cred, }; - if (calldata->arg.open_flags != 0) + int mode = 0; + int status; + + status = nfs_wait_on_sequence(calldata->arg.seqid, task); + if (status != 0) + return; + /* Don't reorder reads */ + smp_rmb(); + /* Recalculate the new open mode in case someone reopened the file + * while we were waiting in line to be scheduled. + */ + if (state->nreaders != 0) + mode |= FMODE_READ; + if (state->nwriters != 0) + mode |= FMODE_WRITE; + if (test_bit(NFS_DELEGATED_STATE, &state->flags)) + state->state = mode; + if (mode == state->state) { + nfs4_free_closedata(calldata); + task->tk_exit = NULL; + rpc_exit(task, 0); + return; + } + if (mode != 0) msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; - return rpc_call_async(clnt, &msg, 0, nfs4_close_done, calldata); + calldata->arg.open_flags = mode; + rpc_call_setup(task, &msg, 0); } /* @@ -873,35 +922,30 @@ static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata * int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode) { struct nfs4_closedata *calldata; - int status; + int status = -ENOMEM; - /* Tell caller we're done */ - if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { - state->state = mode; - return 0; - } - calldata = (struct nfs4_closedata *)kmalloc(sizeof(*calldata), GFP_KERNEL); + calldata = kmalloc(sizeof(*calldata), GFP_KERNEL); if (calldata == NULL) - return -ENOMEM; + goto out; calldata->inode = inode; calldata->state = state; calldata->arg.fh = NFS_FH(inode); + calldata->arg.stateid = &state->stateid; /* Serialization for the sequence id */ calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); - if (calldata->arg.seqid == NULL) { - kfree(calldata); - return -ENOMEM; - } - calldata->arg.open_flags = mode; - memcpy(&calldata->arg.stateid, &state->stateid, - sizeof(calldata->arg.stateid)); - status = nfs4_close_call(NFS_SERVER(inode)->client, calldata); - /* - * Return -EINPROGRESS on success in order to indicate to the - * caller that an asynchronous RPC call has been launched, and - * that it will release the semaphores on completion. - */ - return (status == 0) ? -EINPROGRESS : status; + if (calldata->arg.seqid == NULL) + goto out_free_calldata; + + status = nfs4_call_async(NFS_SERVER(inode)->client, nfs4_close_begin, + nfs4_close_done, calldata); + if (status == 0) + goto out; + + nfs_free_seqid(calldata->arg.seqid); +out_free_calldata: + kfree(calldata); +out: + return status; } struct inode * diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index f535c219cf3a..59c93f37e1b2 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -518,7 +518,11 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode) newstate |= FMODE_WRITE; if (state->state == newstate) goto out; - if (nfs4_do_close(inode, state, newstate) == -EINPROGRESS) + if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { + state->state = newstate; + goto out; + } + if (nfs4_do_close(inode, state, newstate) == 0) return; } out: diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index fcd28a29a2f8..934ec50ea6bf 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -602,10 +602,10 @@ static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg) { uint32_t *p; - RESERVE_SPACE(8+sizeof(arg->stateid.data)); + RESERVE_SPACE(8+sizeof(arg->stateid->data)); WRITE32(OP_CLOSE); WRITE32(arg->seqid->sequence->counter); - WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); + WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data)); return 0; } @@ -950,9 +950,9 @@ static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closea { uint32_t *p; - RESERVE_SPACE(8+sizeof(arg->stateid.data)); + RESERVE_SPACE(8+sizeof(arg->stateid->data)); WRITE32(OP_OPEN_DOWNGRADE); - WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); + WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data)); WRITE32(arg->seqid->sequence->counter); encode_share_access(xdr, arg->open_flags); return 0; @@ -1416,9 +1416,6 @@ static int nfs4_xdr_enc_close(struct rpc_rqst *req, uint32_t *p, struct nfs_clos }; int status; - status = nfs_wait_on_sequence(args->seqid, req->rq_task); - if (status != 0) - goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -1518,9 +1515,6 @@ static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct }; int status; - status = nfs_wait_on_sequence(args->seqid, req->rq_task); - if (status != 0) - goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d578912bf9a9..cac0df950c66 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -149,7 +149,7 @@ struct nfs_open_confirmres { */ struct nfs_closeargs { struct nfs_fh * fh; - nfs4_stateid stateid; + nfs4_stateid * stateid; struct nfs_seqid * seqid; int open_flags; }; -- cgit v1.2.3 From e6dfa553cffcb9740f932311dff42f81d6ac63bb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:13 -0700 Subject: NFSv4: Remove obsolete state_owner and lock_owner semaphores OPEN, CLOSE, etc no longer need these semaphores to ensure ordering of requests. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 6 ------ fs/nfs/nfs4proc.c | 18 ------------------ fs/nfs/nfs4state.c | 15 ++++----------- 3 files changed, 4 insertions(+), 35 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 6ac6708484f5..d4fcb5d0ce6c 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -125,16 +125,11 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status * NFS4 state_owners and lock_owners are simply labels for ordered * sequences of RPC calls. Their sole purpose is to provide once-only * semantics by allowing the server to identify replayed requests. - * - * The ->so_sema is held during all state_owner seqid-mutating operations: - * OPEN, OPEN_DOWNGRADE, and CLOSE. Its purpose is to properly serialize - * so_seqid. */ struct nfs4_state_owner { struct list_head so_list; /* per-clientid list of state_owners */ struct nfs4_client *so_client; u32 so_id; /* 32-bit identifier, unique */ - struct semaphore so_sema; atomic_t so_count; struct rpc_cred *so_cred; /* Associated cred */ @@ -183,7 +178,6 @@ struct nfs4_state { struct inode *inode; /* Pointer to the inode */ unsigned long flags; /* Do we hold any locks? */ - struct semaphore lock_sema; /* Serializes file locking operations */ spinlock_t state_lock; /* Protects the lock_states list */ nfs4_stateid stateid; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5154ddf6d9a5..25bab6032dfe 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -224,7 +224,6 @@ static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, /* * OPEN_RECLAIM: * reclaim state on the server after a reboot. - * Assumes caller is holding the sp->so_sem */ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) { @@ -322,7 +321,6 @@ static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state }; int status = 0; - down(&sp->so_sema); if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) goto out; if (state->state == 0) @@ -342,7 +340,6 @@ static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state } nfs_free_seqid(arg.seqid); out: - up(&sp->so_sema); dput(parent); return status; } @@ -595,7 +592,6 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__); goto out_err; } - down(&sp->so_sema); state = nfs4_get_open_state(inode, sp); if (state == NULL) goto out_err; @@ -620,7 +616,6 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred set_bit(NFS_DELEGATED_STATE, &state->flags); update_open_stateid(state, &delegation->stateid, open_flags); out_ok: - up(&sp->so_sema); nfs4_put_state_owner(sp); up_read(&nfsi->rwsem); up_read(&clp->cl_sem); @@ -631,7 +626,6 @@ out_err: if (sp != NULL) { if (state != NULL) nfs4_put_open_state(state); - up(&sp->so_sema); nfs4_put_state_owner(sp); } up_read(&nfsi->rwsem); @@ -696,7 +690,6 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st } else o_arg.u.attrs = sattr; /* Serialization for the sequence id */ - down(&sp->so_sema); o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); if (o_arg.seqid == NULL) @@ -716,7 +709,6 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st if (o_res.delegation_type != 0) nfs_inode_set_delegation(inode, cred, &o_res); nfs_free_seqid(o_arg.seqid); - up(&sp->so_sema); nfs4_put_state_owner(sp); up_read(&clp->cl_sem); *res = state; @@ -726,7 +718,6 @@ out_err: if (state != NULL) nfs4_put_open_state(state); nfs_free_seqid(o_arg.seqid); - up(&sp->so_sema); nfs4_put_state_owner(sp); } /* Note: clp->cl_sem must be released before nfs4_put_open_state()! */ @@ -833,7 +824,6 @@ static void nfs4_free_closedata(struct nfs4_closedata *calldata) nfs4_put_open_state(calldata->state); nfs_free_seqid(calldata->arg.seqid); - up(&sp->so_sema); nfs4_put_state_owner(sp); up_read(&server->nfs4_state->cl_sem); kfree(calldata); @@ -2702,7 +2692,6 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock down_read(&clp->cl_sem); nlo.clientid = clp->cl_clientid; - down(&state->lock_sema); status = nfs4_set_lock_state(state, request); if (status != 0) goto out; @@ -2729,7 +2718,6 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock status = 0; } out: - up(&state->lock_sema); up_read(&clp->cl_sem); return status; } @@ -2791,7 +2779,6 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock int status; down_read(&clp->cl_sem); - down(&state->lock_sema); status = nfs4_set_lock_state(state, request); if (status != 0) goto out; @@ -2813,7 +2800,6 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock sizeof(lsp->ls_stateid.data)); nfs_free_seqid(luargs.seqid); out: - up(&state->lock_sema); if (status == 0) do_vfs_lock(request->fl_file, request); up_read(&clp->cl_sem); @@ -2877,7 +2863,6 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r largs.u.open_lock = &otl; largs.new_lock_owner = 1; arg.u.lock = &largs; - down(&owner->so_sema); otl.open_seqid = nfs_alloc_seqid(&owner->so_seqid); if (otl.open_seqid != NULL) { status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); @@ -2885,7 +2870,6 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r nfs_increment_open_seqid(status, otl.open_seqid); nfs_free_seqid(otl.open_seqid); } - up(&owner->so_sema); if (status == 0) nfs_confirm_seqid(&lsp->ls_seqid, 0); } else { @@ -2944,11 +2928,9 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock int status; down_read(&clp->cl_sem); - down(&state->lock_sema); status = nfs4_set_lock_state(state, request); if (status == 0) status = _nfs4_do_setlk(state, cmd, request, 0); - up(&state->lock_sema); if (status == 0) { /* Note: we always want to sleep here! */ request->fl_flags |= FL_SLEEP; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 59c93f37e1b2..86c08c165ce7 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -267,7 +267,6 @@ nfs4_alloc_state_owner(void) sp = kzalloc(sizeof(*sp),GFP_KERNEL); if (!sp) return NULL; - init_MUTEX(&sp->so_sema); INIT_LIST_HEAD(&sp->so_states); INIT_LIST_HEAD(&sp->so_delegations); rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue"); @@ -362,7 +361,6 @@ nfs4_alloc_open_state(void) memset(state->stateid.data, 0, sizeof(state->stateid.data)); atomic_set(&state->count, 1); INIT_LIST_HEAD(&state->lock_states); - init_MUTEX(&state->lock_sema); spin_lock_init(&state->state_lock); return state; } @@ -444,7 +442,6 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner) state = __nfs4_find_state_byowner(inode, owner); if (state == NULL && new != NULL) { state = new; - /* Caller *must* be holding owner->so_sem */ /* Note: The reclaim code dictates that we add stateless * and read-only stateids to the end of the list */ list_add_tail(&state->open_states, &owner->so_states); @@ -464,7 +461,7 @@ out: /* * Beware! Caller must be holding exactly one - * reference to clp->cl_sem and owner->so_sema! + * reference to clp->cl_sem! */ void nfs4_put_open_state(struct nfs4_state *state) { @@ -485,7 +482,6 @@ void nfs4_put_open_state(struct nfs4_state *state) /* * Beware! Caller must be holding no references to clp->cl_sem! - * of owner->so_sema! */ void nfs4_close_state(struct nfs4_state *state, mode_t mode) { @@ -496,7 +492,6 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode) atomic_inc(&owner->so_count); down_read(&clp->cl_sem); - down(&owner->so_sema); /* Protect against nfs4_find_state() */ spin_lock(&inode->i_lock); if (mode & FMODE_READ) @@ -527,7 +522,6 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode) } out: nfs4_put_open_state(state); - up(&owner->so_sema); nfs4_put_state_owner(owner); up_read(&clp->cl_sem); } @@ -553,7 +547,6 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) * Return a compatible lock_state. If no initialized lock_state structure * exists, return an uninitialized one. * - * The caller must be holding state->lock_sema */ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) { @@ -577,7 +570,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f * Return a compatible lock_state. If no initialized lock_state structure * exists, return an uninitialized one. * - * The caller must be holding state->lock_sema and clp->cl_sem + * The caller must be holding clp->cl_sem */ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) { @@ -711,7 +704,7 @@ void nfs_free_seqid(struct nfs_seqid *seqid) } /* - * Called with sp->so_sema and clp->cl_sem held. + * Called with clp->cl_sem held. * * Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or * failed with a seqid incrementing error - @@ -750,7 +743,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) } /* - * Called with ls->lock_sema and clp->cl_sem held. + * Called with clp->cl_sem held. * * Increment the seqid if the LOCK/LOCKU succeeded, or * failed with a seqid incrementing error - -- cgit v1.2.3 From 83c9d41e456033000ccfadf535f3098d8739488d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:13 -0700 Subject: NFSv4: Remove nfs4_client->cl_sem from close() path We no longer need to worry about collisions between close() and the state recovery code, since the new close will automatically recheck the file state once it is done waiting on its sequence slot. Ditto for the nfs4_proc_locku() procedure. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 5 ----- fs/nfs/nfs4state.c | 9 +-------- 2 files changed, 1 insertion(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 25bab6032dfe..611052bacaff 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -820,12 +820,10 @@ static void nfs4_free_closedata(struct nfs4_closedata *calldata) { struct nfs4_state *state = calldata->state; struct nfs4_state_owner *sp = state->owner; - struct nfs_server *server = NFS_SERVER(calldata->inode); nfs4_put_open_state(calldata->state); nfs_free_seqid(calldata->arg.seqid); nfs4_put_state_owner(sp); - up_read(&server->nfs4_state->cl_sem); kfree(calldata); } @@ -2758,7 +2756,6 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock { struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(inode); - struct nfs4_client *clp = server->nfs4_state; struct nfs_lockargs arg = { .fh = NFS_FH(inode), .type = nfs4_lck_type(cmd, request), @@ -2778,7 +2775,6 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock struct nfs_locku_opargs luargs; int status; - down_read(&clp->cl_sem); status = nfs4_set_lock_state(state, request); if (status != 0) goto out; @@ -2802,7 +2798,6 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock out: if (status == 0) do_vfs_lock(request->fl_file, request); - up_read(&clp->cl_sem); return status; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 86c08c165ce7..bb3574361958 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -481,17 +481,15 @@ void nfs4_put_open_state(struct nfs4_state *state) } /* - * Beware! Caller must be holding no references to clp->cl_sem! + * Close the current file. */ void nfs4_close_state(struct nfs4_state *state, mode_t mode) { struct inode *inode = state->inode; struct nfs4_state_owner *owner = state->owner; - struct nfs4_client *clp = owner->so_client; int newstate; atomic_inc(&owner->so_count); - down_read(&clp->cl_sem); /* Protect against nfs4_find_state() */ spin_lock(&inode->i_lock); if (mode & FMODE_READ) @@ -523,7 +521,6 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode) out: nfs4_put_open_state(state); nfs4_put_state_owner(owner); - up_read(&clp->cl_sem); } /* @@ -704,8 +701,6 @@ void nfs_free_seqid(struct nfs_seqid *seqid) } /* - * Called with clp->cl_sem held. - * * Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or * failed with a seqid incrementing error - * see comments nfs_fs.h:seqid_mutating_error() @@ -743,8 +738,6 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) } /* - * Called with clp->cl_sem held. - * * Increment the seqid if the LOCK/LOCKU succeeded, or * failed with a seqid incrementing error - * see comments nfs_fs.h:seqid_mutating_error() -- cgit v1.2.3 From 0a8838f972883112f0a7b259141b24db17583c2d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:14 -0700 Subject: NFSv4: Add missing handling of OPEN_CONFIRM requests on CLAIM_DELEGATE_CUR. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 611052bacaff..f57dba815099 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -56,6 +56,7 @@ #define NFS4_POLL_RETRY_MIN (1*HZ) #define NFS4_POLL_RETRY_MAX (15*HZ) +static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *); static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); @@ -333,11 +334,21 @@ static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state memcpy(arg.u.delegation.data, state->stateid.data, sizeof(arg.u.delegation.data)); status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); nfs_increment_open_seqid(status, arg.seqid); + if (status != 0) + goto out_free; + if(res.rflags & NFS4_OPEN_RESULT_CONFIRM) { + status = _nfs4_proc_open_confirm(server->client, NFS_FH(inode), + sp, &res.stateid, arg.seqid); + if (status != 0) + goto out_free; + } + nfs_confirm_seqid(&sp->so_seqid, 0); if (status >= 0) { memcpy(state->stateid.data, res.stateid.data, sizeof(state->stateid.data)); clear_bit(NFS_DELEGATED_STATE, &state->flags); } +out_free: nfs_free_seqid(arg.seqid); out: dput(parent); @@ -366,7 +377,7 @@ int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) return err; } -static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid) +static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid) { struct nfs_open_confirmargs arg = { .fh = fh, -- cgit v1.2.3 From faf5f49c2d9c0af2847837c232a432cc146e203b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:15 -0700 Subject: NFSv4: Make NFS clean up byte range locks asynchronously Currently we fail to do so if the process was signalled. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 161 +++++++++++++++++++++++++++++++++--------------- fs/nfs/nfs4state.c | 2 +- fs/nfs/nfs4xdr.c | 9 +-- include/linux/nfs_xdr.h | 2 +- 5 files changed, 116 insertions(+), 59 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index d4fcb5d0ce6c..2215cdee43ae 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -249,6 +249,7 @@ extern void nfs4_put_open_state(struct nfs4_state *); extern void nfs4_close_state(struct nfs4_state *, mode_t); extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode); extern void nfs4_schedule_state_recovery(struct nfs4_client *); +extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f57dba815099..612a9a14aed3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -58,9 +58,9 @@ static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); -static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *); +static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *); static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); -static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception); +static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); extern struct rpc_procinfo nfs4_procedures[]; @@ -2422,7 +2422,7 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen } static int -nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server) +nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) { struct nfs4_client *clp = server->nfs4_state; @@ -2500,7 +2500,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) /* This is the error handling routine for processes that are allowed * to sleep. */ -int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) +int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception) { struct nfs4_client *clp = server->nfs4_state; int ret = errorcode; @@ -2763,68 +2763,127 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl) return res; } -static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) +struct nfs4_unlockdata { + struct nfs_lockargs arg; + struct nfs_locku_opargs luargs; + struct nfs_lockres res; + struct nfs4_lock_state *lsp; + struct nfs_open_context *ctx; + atomic_t refcount; + struct completion completion; +}; + +static void nfs4_locku_release_calldata(struct nfs4_unlockdata *calldata) { - struct inode *inode = state->inode; - struct nfs_server *server = NFS_SERVER(inode); - struct nfs_lockargs arg = { - .fh = NFS_FH(inode), - .type = nfs4_lck_type(cmd, request), - .offset = request->fl_start, - .length = nfs4_lck_length(request), - }; - struct nfs_lockres res = { - .server = server, - }; + if (atomic_dec_and_test(&calldata->refcount)) { + nfs_free_seqid(calldata->luargs.seqid); + nfs4_put_lock_state(calldata->lsp); + put_nfs_open_context(calldata->ctx); + kfree(calldata); + } +} + +static void nfs4_locku_complete(struct nfs4_unlockdata *calldata) +{ + complete(&calldata->completion); + nfs4_locku_release_calldata(calldata); +} + +static void nfs4_locku_done(struct rpc_task *task) +{ + struct nfs4_unlockdata *calldata = (struct nfs4_unlockdata *)task->tk_calldata; + + nfs_increment_lock_seqid(task->tk_status, calldata->luargs.seqid); + switch (task->tk_status) { + case 0: + memcpy(calldata->lsp->ls_stateid.data, + calldata->res.u.stateid.data, + sizeof(calldata->lsp->ls_stateid.data)); + break; + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + nfs4_schedule_state_recovery(calldata->res.server->nfs4_state); + break; + default: + if (nfs4_async_handle_error(task, calldata->res.server) == -EAGAIN) { + rpc_restart_call(task); + return; + } + } + nfs4_locku_complete(calldata); +} + +static void nfs4_locku_begin(struct rpc_task *task) +{ + struct nfs4_unlockdata *calldata = (struct nfs4_unlockdata *)task->tk_calldata; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU], - .rpc_argp = &arg, - .rpc_resp = &res, - .rpc_cred = state->owner->so_cred, + .rpc_argp = &calldata->arg, + .rpc_resp = &calldata->res, + .rpc_cred = calldata->lsp->ls_state->owner->so_cred, }; + int status; + + status = nfs_wait_on_sequence(calldata->luargs.seqid, task); + if (status != 0) + return; + if ((calldata->lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) { + nfs4_locku_complete(calldata); + task->tk_exit = NULL; + rpc_exit(task, 0); + return; + } + rpc_call_setup(task, &msg, 0); +} + +static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct nfs4_unlockdata *calldata; + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); struct nfs4_lock_state *lsp; - struct nfs_locku_opargs luargs; int status; - + status = nfs4_set_lock_state(state, request); if (status != 0) - goto out; + return status; lsp = request->fl_u.nfs4_fl.owner; /* We might have lost the locks! */ if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) - goto out; - luargs.seqid = nfs_alloc_seqid(&lsp->ls_seqid); - status = -ENOMEM; - if (luargs.seqid == NULL) - goto out; - memcpy(luargs.stateid.data, lsp->ls_stateid.data, sizeof(luargs.stateid.data)); - arg.u.locku = &luargs; - status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - nfs_increment_lock_seqid(status, luargs.seqid); - - if (status == 0) - memcpy(lsp->ls_stateid.data, res.u.stateid.data, - sizeof(lsp->ls_stateid.data)); - nfs_free_seqid(luargs.seqid); -out: + return 0; + calldata = kmalloc(sizeof(*calldata), GFP_KERNEL); + if (calldata == NULL) + return -ENOMEM; + calldata->luargs.seqid = nfs_alloc_seqid(&lsp->ls_seqid); + if (calldata->luargs.seqid == NULL) { + kfree(calldata); + return -ENOMEM; + } + calldata->luargs.stateid = &lsp->ls_stateid; + calldata->arg.fh = NFS_FH(inode); + calldata->arg.type = nfs4_lck_type(cmd, request); + calldata->arg.offset = request->fl_start; + calldata->arg.length = nfs4_lck_length(request); + calldata->arg.u.locku = &calldata->luargs; + calldata->res.server = server; + calldata->lsp = lsp; + atomic_inc(&lsp->ls_count); + + /* Ensure we don't close file until we're done freeing locks! */ + calldata->ctx = get_nfs_open_context((struct nfs_open_context*)request->fl_file->private_data); + + atomic_set(&calldata->refcount, 2); + init_completion(&calldata->completion); + + status = nfs4_call_async(NFS_SERVER(inode)->client, nfs4_locku_begin, + nfs4_locku_done, calldata); if (status == 0) - do_vfs_lock(request->fl_file, request); + wait_for_completion_interruptible(&calldata->completion); + do_vfs_lock(request->fl_file, request); + nfs4_locku_release_calldata(calldata); return status; } -static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) -{ - struct nfs4_exception exception = { }; - int err; - - do { - err = nfs4_handle_exception(NFS_SERVER(state->inode), - _nfs4_proc_unlck(state, cmd, request), - &exception); - } while (exception.retry); - return err; -} - static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *request, int reclaim) { struct inode *inode = state->inode; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index bb3574361958..23834c8fb740 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -600,7 +600,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_ * Release reference to lock_state, and free it if we see that * it is no longer in use */ -static void nfs4_put_lock_state(struct nfs4_lock_state *lsp) +void nfs4_put_lock_state(struct nfs4_lock_state *lsp) { struct nfs4_state *state; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 934ec50ea6bf..4706192cfb07 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -776,7 +776,7 @@ static int encode_locku(struct xdr_stream *xdr, const struct nfs_lockargs *arg) WRITE32(OP_LOCKU); WRITE32(arg->type); WRITE32(opargs->seqid->sequence->counter); - WRITEMEM(&opargs->stateid, sizeof(opargs->stateid)); + WRITEMEM(opargs->stateid->data, sizeof(opargs->stateid->data)); WRITE64(arg->offset); WRITE64(arg->length); @@ -1587,9 +1587,6 @@ static int nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_lock }; int status; - status = nfs_wait_on_sequence(args->u.locku->seqid, req->rq_task); - if (status != 0) - goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -2934,8 +2931,8 @@ static int decode_locku(struct xdr_stream *xdr, struct nfs_lockres *res) status = decode_op_hdr(xdr, OP_LOCKU); if (status == 0) { - READ_BUF(sizeof(nfs4_stateid)); - COPYMEM(&res->u.stateid, sizeof(res->u.stateid)); + READ_BUF(sizeof(res->u.stateid.data)); + COPYMEM(res->u.stateid.data, sizeof(res->u.stateid.data)); } return status; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index cac0df950c66..849f95c5fae4 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -188,7 +188,7 @@ struct nfs_lock_opargs { struct nfs_locku_opargs { struct nfs_seqid * seqid; - nfs4_stateid stateid; + nfs4_stateid * stateid; }; struct nfs_lockargs { -- cgit v1.2.3 From 06735b3454824bd561decbde46111f144e905923 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:15 -0700 Subject: NFSv4: Fix up handling of open_to_lock sequence ids Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 69 +++++++++++++++++++++++-------------------------- fs/nfs/nfs4xdr.c | 32 ++++++++++------------- include/linux/nfs_xdr.h | 19 +++----------- 3 files changed, 49 insertions(+), 71 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 612a9a14aed3..35da15342e05 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2889,11 +2889,23 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(inode); struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner; + struct nfs_lock_opargs largs = { + .lock_stateid = &lsp->ls_stateid, + .open_stateid = &state->stateid, + .lock_owner = { + .clientid = server->nfs4_state->cl_clientid, + .id = lsp->ls_id, + }, + .reclaim = reclaim, + }; struct nfs_lockargs arg = { .fh = NFS_FH(inode), .type = nfs4_lck_type(cmd, request), .offset = request->fl_start, .length = nfs4_lck_length(request), + .u = { + .lock = &largs, + }, }; struct nfs_lockres res = { .server = server, @@ -2904,56 +2916,39 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r .rpc_resp = &res, .rpc_cred = state->owner->so_cred, }; - struct nfs_lock_opargs largs = { - .reclaim = reclaim, - .new_lock_owner = 0, - }; - struct nfs_seqid *lock_seqid; int status = -ENOMEM; - lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid); - if (lock_seqid == NULL) + largs.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid); + if (largs.lock_seqid == NULL) return -ENOMEM; if (!(lsp->ls_seqid.flags & NFS_SEQID_CONFIRMED)) { struct nfs4_state_owner *owner = state->owner; - struct nfs_open_to_lock otl = { - .lock_owner = { - .clientid = server->nfs4_state->cl_clientid, - }, - }; - - otl.lock_seqid = lock_seqid; - otl.lock_owner.id = lsp->ls_id; - memcpy(&otl.open_stateid, &state->stateid, sizeof(otl.open_stateid)); - largs.u.open_lock = &otl; + + largs.open_seqid = nfs_alloc_seqid(&owner->so_seqid); + if (largs.open_seqid == NULL) + goto out; largs.new_lock_owner = 1; - arg.u.lock = &largs; - otl.open_seqid = nfs_alloc_seqid(&owner->so_seqid); - if (otl.open_seqid != NULL) { - status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - /* increment seqid on success, and seqid mutating errors */ - nfs_increment_open_seqid(status, otl.open_seqid); - nfs_free_seqid(otl.open_seqid); + status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); + /* increment open seqid on success, and seqid mutating errors */ + if (largs.new_lock_owner != 0) { + nfs_increment_open_seqid(status, largs.open_seqid); + if (status == 0) + nfs_confirm_seqid(&lsp->ls_seqid, 0); } - if (status == 0) - nfs_confirm_seqid(&lsp->ls_seqid, 0); - } else { - struct nfs_exist_lock el; - memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid)); - largs.u.exist_lock = ⪙ - arg.u.lock = &largs; - el.seqid = lock_seqid; + nfs_free_seqid(largs.open_seqid); + } else status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - } - /* increment seqid on success, and seqid mutating errors*/ - nfs_increment_lock_seqid(status, lock_seqid); + /* increment lock seqid on success, and seqid mutating errors*/ + nfs_increment_lock_seqid(status, largs.lock_seqid); /* save the returned stateid. */ if (status == 0) { - memcpy(lsp->ls_stateid.data, res.u.stateid.data, sizeof(lsp->ls_stateid.data)); + memcpy(lsp->ls_stateid.data, res.u.stateid.data, + sizeof(lsp->ls_stateid.data)); lsp->ls_flags |= NFS_LOCK_INITIALIZED; } else if (status == -NFS4ERR_DENIED) status = -EAGAIN; - nfs_free_seqid(lock_seqid); +out: + nfs_free_seqid(largs.lock_seqid); return status; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 4706192cfb07..c5c75235c5b8 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -729,22 +729,18 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lockargs *arg) WRITE64(arg->length); WRITE32(opargs->new_lock_owner); if (opargs->new_lock_owner){ - struct nfs_open_to_lock *ol = opargs->u.open_lock; - RESERVE_SPACE(40); - WRITE32(ol->open_seqid->sequence->counter); - WRITEMEM(&ol->open_stateid, sizeof(ol->open_stateid)); - WRITE32(ol->lock_seqid->sequence->counter); - WRITE64(ol->lock_owner.clientid); + WRITE32(opargs->open_seqid->sequence->counter); + WRITEMEM(opargs->open_stateid->data, sizeof(opargs->open_stateid->data)); + WRITE32(opargs->lock_seqid->sequence->counter); + WRITE64(opargs->lock_owner.clientid); WRITE32(4); - WRITE32(ol->lock_owner.id); + WRITE32(opargs->lock_owner.id); } else { - struct nfs_exist_lock *el = opargs->u.exist_lock; - RESERVE_SPACE(20); - WRITEMEM(&el->stateid, sizeof(el->stateid)); - WRITE32(el->seqid->sequence->counter); + WRITEMEM(opargs->lock_stateid->data, sizeof(opargs->lock_stateid->data)); + WRITE32(opargs->lock_seqid->sequence->counter); } return 0; @@ -1535,16 +1531,14 @@ static int nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_locka .nops = 2, }; struct nfs_lock_opargs *opargs = args->u.lock; - struct nfs_seqid *seqid; int status; - if (opargs->new_lock_owner) - seqid = opargs->u.open_lock->lock_seqid; - else - seqid = opargs->u.exist_lock->seqid; - status = nfs_wait_on_sequence(seqid, req->rq_task); + status = nfs_wait_on_sequence(opargs->lock_seqid, req->rq_task); if (status != 0) goto out; + /* Do we need to do an open_to_lock_owner? */ + if (opargs->lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED) + opargs->new_lock_owner = 0; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -2908,8 +2902,8 @@ static int decode_lock(struct xdr_stream *xdr, struct nfs_lockres *res) status = decode_op_hdr(xdr, OP_LOCK); if (status == 0) { - READ_BUF(sizeof(nfs4_stateid)); - COPYMEM(&res->u.stateid, sizeof(res->u.stateid)); + READ_BUF(sizeof(res->u.stateid.data)); + COPYMEM(res->u.stateid.data, sizeof(res->u.stateid.data)); } else if (status == -NFS4ERR_DENIED) return decode_lock_denied(xdr, &res->u.denied); return status; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 849f95c5fae4..57efcc27f20b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -165,25 +165,14 @@ struct nfs_lowner { u32 id; }; -struct nfs_open_to_lock { - struct nfs_seqid * open_seqid; - nfs4_stateid open_stateid; +struct nfs_lock_opargs { struct nfs_seqid * lock_seqid; + nfs4_stateid * lock_stateid; + struct nfs_seqid * open_seqid; + nfs4_stateid * open_stateid; struct nfs_lowner lock_owner; -}; - -struct nfs_exist_lock { - nfs4_stateid stateid; - struct nfs_seqid * seqid; -}; - -struct nfs_lock_opargs { __u32 reclaim; __u32 new_lock_owner; - union { - struct nfs_open_to_lock *open_lock; - struct nfs_exist_lock *exist_lock; - } u; }; struct nfs_locku_opargs { -- cgit v1.2.3 From 039c4d7a82d8268ec71f59679460b41d0dd9b225 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:16 -0700 Subject: NFS: Fix up a race in the NFS implementation of GETLK ...and fix a memory corruption bug due to improper use of memcpy() on a struct file_lock. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 6bdcfa95de94..572d8593486f 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -376,22 +376,31 @@ out_swapfile: static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) { + struct file_lock *cfl; struct inode *inode = filp->f_mapping->host; int status = 0; lock_kernel(); - /* Use local locking if mounted with "-onolock" */ - if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) - status = NFS_PROTO(inode)->lock(filp, cmd, fl); - else { - struct file_lock *cfl = posix_test_lock(filp, fl); - - fl->fl_type = F_UNLCK; - if (cfl != NULL) - memcpy(fl, cfl, sizeof(*fl)); + /* Try local locking first */ + cfl = posix_test_lock(filp, fl); + if (cfl != NULL) { + locks_copy_lock(fl, cfl); + goto out; } + + if (nfs_have_delegation(inode, FMODE_READ)) + goto out_noconflict; + + if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) + goto out_noconflict; + + status = NFS_PROTO(inode)->lock(filp, cmd, fl); +out: unlock_kernel(); return status; +out_noconflict: + fl->fl_type = F_UNLCK; + goto out; } static int do_vfs_lock(struct file *file, struct file_lock *fl) -- cgit v1.2.3 From 834f2a4a1554dc5b2598038b3fe8703defcbe467 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:16 -0700 Subject: VFS: Allow the filesystem to return a full file pointer on open intent This is needed by NFSv4 for atomicity reasons: our open command is in fact a lookup+open, so we need to be able to propagate open context information from lookup() into the resulting struct file's private_data field. Signed-off-by: Trond Myklebust --- fs/exec.c | 12 +++---- fs/namei.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++---- fs/open.c | 79 +++++++++++++++++++++++++++++++++++-------- include/linux/namei.h | 8 +++++ 4 files changed, 165 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index a04a575ad433..d2208f7c87db 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -126,8 +126,7 @@ asmlinkage long sys_uselib(const char __user * library) struct nameidata nd; int error; - nd.intent.open.flags = FMODE_READ; - error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); + error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ); if (error) goto out; @@ -139,7 +138,7 @@ asmlinkage long sys_uselib(const char __user * library) if (error) goto exit; - file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); + file = nameidata_to_filp(&nd, O_RDONLY); error = PTR_ERR(file); if (IS_ERR(file)) goto out; @@ -167,6 +166,7 @@ asmlinkage long sys_uselib(const char __user * library) out: return error; exit: + release_open_intent(&nd); path_release(&nd); goto out; } @@ -490,8 +490,7 @@ struct file *open_exec(const char *name) int err; struct file *file; - nd.intent.open.flags = FMODE_READ; - err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); + err = path_lookup_open(name, LOOKUP_FOLLOW, &nd, FMODE_READ); file = ERR_PTR(err); if (!err) { @@ -504,7 +503,7 @@ struct file *open_exec(const char *name) err = -EACCES; file = ERR_PTR(err); if (!err) { - file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); + file = nameidata_to_filp(&nd, O_RDONLY); if (!IS_ERR(file)) { err = deny_write_access(file); if (err) { @@ -516,6 +515,7 @@ out: return file; } } + release_open_intent(&nd); path_release(&nd); } goto out; diff --git a/fs/namei.c b/fs/namei.c index aa62dbda93ac..0d1dff7d3d95 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -317,6 +318,18 @@ void path_release_on_umount(struct nameidata *nd) mntput_no_expire(nd->mnt); } +/** + * release_open_intent - free up open intent resources + * @nd: pointer to nameidata + */ +void release_open_intent(struct nameidata *nd) +{ + if (nd->intent.open.file->f_dentry == NULL) + put_filp(nd->intent.open.file); + else + fput(nd->intent.open.file); +} + /* * Internal lookup() using the new generic dcache. * SMP-safe @@ -1052,6 +1065,70 @@ out: return retval; } +static int __path_lookup_intent_open(const char *name, unsigned int lookup_flags, + struct nameidata *nd, int open_flags, int create_mode) +{ + struct file *filp = get_empty_filp(); + int err; + + if (filp == NULL) + return -ENFILE; + nd->intent.open.file = filp; + nd->intent.open.flags = open_flags; + nd->intent.open.create_mode = create_mode; + err = path_lookup(name, lookup_flags|LOOKUP_OPEN, nd); + if (IS_ERR(nd->intent.open.file)) { + if (err == 0) { + err = PTR_ERR(nd->intent.open.file); + path_release(nd); + } + } else if (err != 0) + release_open_intent(nd); + return err; +} + +/** + * path_lookup_open - lookup a file path with open intent + * @name: pointer to file name + * @lookup_flags: lookup intent flags + * @nd: pointer to nameidata + * @open_flags: open intent flags + */ +int path_lookup_open(const char *name, unsigned int lookup_flags, + struct nameidata *nd, int open_flags) +{ + return __path_lookup_intent_open(name, lookup_flags, nd, + open_flags, 0); +} + +/** + * path_lookup_create - lookup a file path with open + create intent + * @name: pointer to file name + * @lookup_flags: lookup intent flags + * @nd: pointer to nameidata + * @open_flags: open intent flags + * @create_mode: create intent flags + */ +int path_lookup_create(const char *name, unsigned int lookup_flags, + struct nameidata *nd, int open_flags, int create_mode) +{ + return __path_lookup_intent_open(name, lookup_flags|LOOKUP_CREATE, nd, + open_flags, create_mode); +} + +int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags, + struct nameidata *nd, int open_flags) +{ + char *tmp = getname(name); + int err = PTR_ERR(tmp); + + if (!IS_ERR(tmp)) { + err = __path_lookup_intent_open(tmp, lookup_flags, nd, open_flags, 0); + putname(tmp); + } + return err; +} + /* * Restricted form of lookup. Doesn't follow links, single-component only, * needs parent already locked. Doesn't follow mounts. @@ -1416,27 +1493,27 @@ int may_open(struct nameidata *nd, int acc_mode, int flag) */ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) { - int acc_mode, error = 0; + int acc_mode, error; struct path path; struct dentry *dir; int count = 0; acc_mode = ACC_MODE(flag); + /* O_TRUNC implies we need access checks for write permissions */ + if (flag & O_TRUNC) + acc_mode |= MAY_WRITE; + /* Allow the LSM permission hook to distinguish append access from general write access. */ if (flag & O_APPEND) acc_mode |= MAY_APPEND; - /* Fill in the open() intent data */ - nd->intent.open.flags = flag; - nd->intent.open.create_mode = mode; - /* * The simplest case - just a plain lookup. */ if (!(flag & O_CREAT)) { - error = path_lookup(pathname, lookup_flags(flag)|LOOKUP_OPEN, nd); + error = path_lookup_open(pathname, lookup_flags(flag), nd, flag); if (error) return error; goto ok; @@ -1445,7 +1522,7 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) /* * Create - we need to know the parent. */ - error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd); + error = path_lookup_create(pathname, LOOKUP_PARENT, nd, flag, mode); if (error) return error; @@ -1520,6 +1597,8 @@ ok: exit_dput: dput_path(&path, nd); exit: + if (!IS_ERR(nd->intent.open.file)) + release_open_intent(nd); path_release(nd); return error; diff --git a/fs/open.c b/fs/open.c index f0d90cf0495c..8d06ec911fd9 100644 --- a/fs/open.c +++ b/fs/open.c @@ -739,7 +739,8 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group) } static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, - int flags, struct file *f) + int flags, struct file *f, + int (*open)(struct inode *, struct file *)) { struct inode *inode; int error; @@ -761,11 +762,14 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, f->f_op = fops_get(inode->i_fop); file_move(f, &inode->i_sb->s_files); - if (f->f_op && f->f_op->open) { - error = f->f_op->open(inode,f); + if (!open && f->f_op) + open = f->f_op->open; + if (open) { + error = open(inode, f); if (error) goto cleanup_all; } + f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); @@ -814,28 +818,75 @@ struct file *filp_open(const char * filename, int flags, int mode) { int namei_flags, error; struct nameidata nd; - struct file *f; namei_flags = flags; if ((namei_flags+1) & O_ACCMODE) namei_flags++; - if (namei_flags & O_TRUNC) - namei_flags |= 2; - - error = -ENFILE; - f = get_empty_filp(); - if (f == NULL) - return ERR_PTR(error); error = open_namei(filename, namei_flags, mode, &nd); if (!error) - return __dentry_open(nd.dentry, nd.mnt, flags, f); + return nameidata_to_filp(&nd, flags); - put_filp(f); return ERR_PTR(error); } EXPORT_SYMBOL(filp_open); +/** + * lookup_instantiate_filp - instantiates the open intent filp + * @nd: pointer to nameidata + * @dentry: pointer to dentry + * @open: open callback + * + * Helper for filesystems that want to use lookup open intents and pass back + * a fully instantiated struct file to the caller. + * This function is meant to be called from within a filesystem's + * lookup method. + * Note that in case of error, nd->intent.open.file is destroyed, but the + * path information remains valid. + * If the open callback is set to NULL, then the standard f_op->open() + * filesystem callback is substituted. + */ +struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, + int (*open)(struct inode *, struct file *)) +{ + if (IS_ERR(nd->intent.open.file)) + goto out; + if (IS_ERR(dentry)) + goto out_err; + nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->mnt), + nd->intent.open.flags - 1, + nd->intent.open.file, + open); +out: + return nd->intent.open.file; +out_err: + release_open_intent(nd); + nd->intent.open.file = (struct file *)dentry; + goto out; +} +EXPORT_SYMBOL_GPL(lookup_instantiate_filp); + +/** + * nameidata_to_filp - convert a nameidata to an open filp. + * @nd: pointer to nameidata + * @flags: open flags + * + * Note that this function destroys the original nameidata + */ +struct file *nameidata_to_filp(struct nameidata *nd, int flags) +{ + struct file *filp; + + /* Pick up the filp from the open intent */ + filp = nd->intent.open.file; + /* Has the filesystem initialised the file for us? */ + if (filp->f_dentry == NULL) + filp = __dentry_open(nd->dentry, nd->mnt, flags, filp, NULL); + else + path_release(nd); + return filp; +} + struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) { int error; @@ -846,7 +897,7 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) if (f == NULL) return ERR_PTR(error); - return __dentry_open(dentry, mnt, flags, f); + return __dentry_open(dentry, mnt, flags, f, NULL); } EXPORT_SYMBOL(dentry_open); diff --git a/include/linux/namei.h b/include/linux/namei.h index 7db67b008cac..1c975d0d9e94 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -8,6 +8,7 @@ struct vfsmount; struct open_intent { int flags; int create_mode; + struct file *file; }; enum { MAX_NESTED_LINKS = 5 }; @@ -65,6 +66,13 @@ extern int FASTCALL(link_path_walk(const char *, struct nameidata *)); extern void path_release(struct nameidata *); extern void path_release_on_umount(struct nameidata *); +extern int __user_path_lookup_open(const char __user *, unsigned lookup_flags, struct nameidata *nd, int open_flags); +extern int path_lookup_open(const char *, unsigned lookup_flags, struct nameidata *, int open_flags); +extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, + int (*open)(struct inode *, struct file *)); +extern struct file *nameidata_to_filp(struct nameidata *nd, int flags); +extern void release_open_intent(struct nameidata *); + extern struct dentry * lookup_one_len(const char *, struct dentry *, int); extern struct dentry * lookup_hash(struct qstr *, struct dentry *); -- cgit v1.2.3 From 02a913a73b52071e93f4b76db3e86138d19efffd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:17 -0700 Subject: NFSv4: Eliminate nfsv4 open race... Make NFSv4 return the fully initialized file pointer with the stateid that it created in the lookup w/intent. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 29 +++++----- fs/nfs/nfs3proc.c | 2 +- fs/nfs/nfs4_fs.h | 4 +- fs/nfs/nfs4proc.c | 137 ++++++++++++++++++++---------------------------- fs/nfs/proc.c | 2 +- include/linux/nfs_xdr.h | 2 +- 6 files changed, 74 insertions(+), 102 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index c70eabd6d179..a6e251f21fd8 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -914,7 +914,6 @@ static int is_atomic_open(struct inode *dir, struct nameidata *nd) static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct dentry *res = NULL; - struct inode *inode = NULL; int error; /* Check that we are indeed trying to open this file */ @@ -928,8 +927,10 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry dentry->d_op = NFS_PROTO(dir)->dentry_ops; /* Let vfs_create() deal with O_EXCL */ - if (nd->intent.open.flags & O_EXCL) - goto no_entry; + if (nd->intent.open.flags & O_EXCL) { + d_add(dentry, NULL); + goto out; + } /* Open the file on the server */ lock_kernel(); @@ -943,18 +944,18 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry if (nd->intent.open.flags & O_CREAT) { nfs_begin_data_update(dir); - inode = nfs4_atomic_open(dir, dentry, nd); + res = nfs4_atomic_open(dir, dentry, nd); nfs_end_data_update(dir); } else - inode = nfs4_atomic_open(dir, dentry, nd); + res = nfs4_atomic_open(dir, dentry, nd); unlock_kernel(); - if (IS_ERR(inode)) { - error = PTR_ERR(inode); + if (IS_ERR(res)) { + error = PTR_ERR(res); switch (error) { /* Make a negative dentry */ case -ENOENT: - inode = NULL; - break; + res = NULL; + goto out; /* This turned out not to be a regular file */ case -ELOOP: if (!(nd->intent.open.flags & O_NOFOLLOW)) @@ -962,13 +963,9 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry /* case -EISDIR: */ /* case -EINVAL: */ default: - res = ERR_PTR(error); goto out; } - } -no_entry: - res = d_add_unique(dentry, inode); - if (res != NULL) + } else if (res != NULL) dentry = res; nfs_renew_times(dentry); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); @@ -1012,7 +1009,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) */ lock_kernel(); verifier = nfs_save_change_attribute(dir); - ret = nfs4_open_revalidate(dir, dentry, openflags); + ret = nfs4_open_revalidate(dir, dentry, openflags, nd); if (!ret) nfs_set_verifier(dentry, verifier); unlock_kernel(); @@ -1135,7 +1132,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, lock_kernel(); nfs_begin_data_update(dir); - error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); + error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd); nfs_end_data_update(dir); if (error != 0) goto out_err; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index edc95514046d..df80477c5af4 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -299,7 +299,7 @@ static int nfs3_proc_commit(struct nfs_write_data *cdata) */ static int nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags) + int flags, struct nameidata *nd) { struct nfs_fh fhandle; struct nfs_fattr fattr; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 2215cdee43ae..8a3788199052 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -215,8 +215,8 @@ extern int nfs4_proc_setclientid_confirm(struct nfs4_client *); extern int nfs4_proc_async_renew(struct nfs4_client *); extern int nfs4_proc_renew(struct nfs4_client *); extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode); -extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); -extern int nfs4_open_revalidate(struct inode *, struct dentry *, int); +extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); +extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 35da15342e05..c9ecb8119632 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -47,6 +47,7 @@ #include #include #include +#include #include "nfs4_fs.h" #include "delegation.h" @@ -947,12 +948,26 @@ out: return status; } -struct inode * +static void nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state) +{ + struct file *filp; + + filp = lookup_instantiate_filp(nd, dentry, NULL); + if (!IS_ERR(filp)) { + struct nfs_open_context *ctx; + ctx = (struct nfs_open_context *)filp->private_data; + ctx->state = state; + } else + nfs4_close_state(state, nd->intent.open.flags); +} + +struct dentry * nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct iattr attr; struct rpc_cred *cred; struct nfs4_state *state; + struct dentry *res; if (nd->flags & LOOKUP_CREATE) { attr.ia_mode = nd->intent.open.create_mode; @@ -966,16 +981,23 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); if (IS_ERR(cred)) - return (struct inode *)cred; + return (struct dentry *)cred; state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred); put_rpccred(cred); - if (IS_ERR(state)) - return (struct inode *)state; - return state->inode; + if (IS_ERR(state)) { + if (PTR_ERR(state) == -ENOENT) + d_add(dentry, NULL); + return (struct dentry *)state; + } + res = d_add_unique(dentry, state->inode); + if (res != NULL) + dentry = res; + nfs4_intent_set_file(nd, dentry, state); + return res; } int -nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags) +nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd) { struct rpc_cred *cred; struct nfs4_state *state; @@ -988,18 +1010,30 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags) if (IS_ERR(state)) state = nfs4_do_open(dir, dentry, openflags, NULL, cred); put_rpccred(cred); - if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0) - return 1; - if (IS_ERR(state)) - return 0; + if (IS_ERR(state)) { + switch (PTR_ERR(state)) { + case -EPERM: + case -EACCES: + case -EDQUOT: + case -ENOSPC: + case -EROFS: + lookup_instantiate_filp(nd, (struct dentry *)state, NULL); + return 1; + case -ENOENT: + if (dentry->d_inode == NULL) + return 1; + } + goto out_drop; + } inode = state->inode; + iput(inode); if (inode == dentry->d_inode) { - iput(inode); + nfs4_intent_set_file(nd, dentry, state); return 1; } - d_drop(dentry); nfs4_close_state(state, openflags); - iput(inode); +out_drop: + d_drop(dentry); return 0; } @@ -1500,7 +1534,7 @@ static int nfs4_proc_commit(struct nfs_write_data *cdata) static int nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags) + int flags, struct nameidata *nd) { struct nfs4_state *state; struct rpc_cred *cred; @@ -1522,13 +1556,13 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, struct nfs_fattr fattr; status = nfs4_do_setattr(NFS_SERVER(dir), &fattr, NFS_FH(state->inode), sattr, state); - if (status == 0) { + if (status == 0) nfs_setattr_update_inode(state->inode, sattr); - goto out; - } - } else if (flags != 0) - goto out; - nfs4_close_state(state, flags); + } + if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN)) + nfs4_intent_set_file(nd, dentry, state); + else + nfs4_close_state(state, flags); out: return status; } @@ -2175,65 +2209,6 @@ nfs4_proc_renew(struct nfs4_client *clp) return 0; } -/* - * We will need to arrange for the VFS layer to provide an atomic open. - * Until then, this open method is prone to inefficiency and race conditions - * due to the lookup, potential create, and open VFS calls from sys_open() - * placed on the wire. - */ -static int -nfs4_proc_file_open(struct inode *inode, struct file *filp) -{ - struct dentry *dentry = filp->f_dentry; - struct nfs_open_context *ctx; - struct nfs4_state *state = NULL; - struct rpc_cred *cred; - int status = -ENOMEM; - - dprintk("nfs4_proc_file_open: starting on (%.*s/%.*s)\n", - (int)dentry->d_parent->d_name.len, - dentry->d_parent->d_name.name, - (int)dentry->d_name.len, dentry->d_name.name); - - - /* Find our open stateid */ - cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); - if (IS_ERR(cred)) - return PTR_ERR(cred); - ctx = alloc_nfs_open_context(dentry, cred); - put_rpccred(cred); - if (unlikely(ctx == NULL)) - return -ENOMEM; - status = -EIO; /* ERACE actually */ - state = nfs4_find_state(inode, cred, filp->f_mode); - if (unlikely(state == NULL)) - goto no_state; - ctx->state = state; - nfs4_close_state(state, filp->f_mode); - ctx->mode = filp->f_mode; - nfs_file_set_open_context(filp, ctx); - put_nfs_open_context(ctx); - if (filp->f_mode & FMODE_WRITE) - nfs_begin_data_update(inode); - return 0; -no_state: - printk(KERN_WARNING "NFS: v4 raced in function %s\n", __FUNCTION__); - put_nfs_open_context(ctx); - return status; -} - -/* - * Release our state - */ -static int -nfs4_proc_file_release(struct inode *inode, struct file *filp) -{ - if (filp->f_mode & FMODE_WRITE) - nfs_end_data_update(inode); - nfs_file_clear_open_context(filp); - return 0; -} - static inline int nfs4_server_supports_acls(struct nfs_server *server) { return (server->caps & NFS_CAP_ACLS) @@ -3145,8 +3120,8 @@ struct nfs_rpc_ops nfs_v4_clientops = { .read_setup = nfs4_proc_read_setup, .write_setup = nfs4_proc_write_setup, .commit_setup = nfs4_proc_commit_setup, - .file_open = nfs4_proc_file_open, - .file_release = nfs4_proc_file_release, + .file_open = nfs_open, + .file_release = nfs_release, .lock = nfs4_proc_lock, .clear_acl_cache = nfs4_zap_acl_attr, }; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index be23c3fb9260..8fef86523d7f 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -216,7 +216,7 @@ static int nfs_proc_write(struct nfs_write_data *wdata) static int nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags) + int flags, struct nameidata *nd) { struct nfs_fh fhandle; struct nfs_fattr fattr; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 57efcc27f20b..60086dac11d5 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -712,7 +712,7 @@ struct nfs_rpc_ops { int (*write) (struct nfs_write_data *); int (*commit) (struct nfs_write_data *); int (*create) (struct inode *, struct dentry *, - struct iattr *, int); + struct iattr *, int, struct nameidata *); int (*remove) (struct inode *, struct qstr *); int (*unlink_setup) (struct rpc_message *, struct dentry *, struct qstr *); -- cgit v1.2.3 From 6f926b5ba75c568296ec227e7d782db4ddbdca5c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:18 -0700 Subject: [NFS]: Check that the server returns a valid regular file to our OPEN request Since it appears that some servers don't... Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 4 +++- fs/nfs/nfs4proc.c | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index a6e251f21fd8..ac331d2d4c4a 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -957,10 +957,12 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry res = NULL; goto out; /* This turned out not to be a regular file */ + case -EISDIR: + case -ENOTDIR: + goto no_open; case -ELOOP: if (!(nd->intent.open.flags & O_NOFOLLOW)) goto no_open; - /* case -EISDIR: */ /* case -EINVAL: */ default: goto out; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c9ecb8119632..3db1c9f0b09c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -419,6 +419,22 @@ static int _nfs4_proc_open(struct inode *dir, struct nfs4_state_owner *sp, stru o_arg->clientid = sp->so_client->cl_clientid; status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); + if (status == 0) { + /* OPEN on anything except a regular file is disallowed in NFSv4 */ + switch (o_res->f_attr->mode & S_IFMT) { + case S_IFREG: + break; + case S_IFLNK: + status = -ELOOP; + break; + case S_IFDIR: + status = -EISDIR; + break; + default: + status = -ENOTDIR; + } + } + nfs_increment_open_seqid(status, o_arg->seqid); if (status != 0) goto out; -- cgit v1.2.3 From cdce5d6b94b6182f6d8a5b7b52923933e98cbc92 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:18 -0700 Subject: VFS: Make link_path_walk set LOOKUP_CONTINUE before calling permission(). This will allow nfs_permission() to perform additional optimizations when walking the path, by folding the ACCESS(MAY_EXEC) call on the directory into the lookup revalidation. Signed-off-by: Trond Myklebust --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 0d1dff7d3d95..aaaa81036234 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -763,6 +763,7 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd) struct qstr this; unsigned int c; + nd->flags |= LOOKUP_CONTINUE; err = exec_permission_lite(inode, nd); if (err == -EAGAIN) { err = permission(inode, MAY_EXEC, nd); @@ -815,7 +816,6 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd) if (err < 0) break; } - nd->flags |= LOOKUP_CONTINUE; /* This does the actual lookups.. */ err = do_lookup(nd, &this, &next); if (err) -- cgit v1.2.3 From cae7a073a4c5484cc5713eab606bf54b46724ab3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:19 -0700 Subject: NFSv4: Return delegation upon rename or removal of file. Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 2 +- fs/nfs/delegation.h | 16 +++++++++++++++- fs/nfs/dir.c | 3 +++ fs/nfs/inode.c | 3 +-- 4 files changed, 20 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 4a36839f0bbd..44135af9894c 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -142,7 +142,7 @@ static void nfs_msync_inode(struct inode *inode) /* * Basic procedure for returning a delegation to the server */ -int nfs_inode_return_delegation(struct inode *inode) +int __nfs_inode_return_delegation(struct inode *inode) { struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; struct nfs_inode *nfsi = NFS_I(inode); diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 3f6c45a29d6a..8017846b561f 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -25,7 +25,7 @@ struct nfs_delegation { int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); -int nfs_inode_return_delegation(struct inode *inode); +int __nfs_inode_return_delegation(struct inode *inode); int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle); @@ -47,11 +47,25 @@ static inline int nfs_have_delegation(struct inode *inode, int flags) return 1; return 0; } + +static inline int nfs_inode_return_delegation(struct inode *inode) +{ + int err = 0; + + if (NFS_I(inode)->delegation != NULL) + err = __nfs_inode_return_delegation(inode); + return err; +} #else static inline int nfs_have_delegation(struct inode *inode, int flags) { return 0; } + +static inline int nfs_inode_return_delegation(struct inode *inode) +{ + return 0; +} #endif #endif diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ac331d2d4c4a..72f50c0117b1 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -801,6 +801,7 @@ static int nfs_dentry_delete(struct dentry *dentry) */ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) { + nfs_inode_return_delegation(inode); if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { lock_kernel(); inode->i_nlink--; @@ -1329,6 +1330,7 @@ static int nfs_safe_remove(struct dentry *dentry) nfs_begin_data_update(dir); if (inode != NULL) { + nfs_inode_return_delegation(inode); nfs_begin_data_update(inode); error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); /* The VFS may want to delete this inode */ @@ -1547,6 +1549,7 @@ go_ahead: nfs_wb_all(old_inode); shrink_dcache_parent(old_dentry); } + nfs_inode_return_delegation(old_inode); if (new_inode) d_delete(new_dentry); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index d7abaa00473a..6b0f7fe6bd1d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1671,8 +1671,7 @@ static void nfs4_clear_inode(struct inode *inode) struct nfs_inode *nfsi = NFS_I(inode); /* If we are holding a delegation, return it! */ - if (nfsi->delegation != NULL) - nfs_inode_return_delegation(inode); + nfs_inode_return_delegation(inode); /* First call standard NFS clear_inode() code */ nfs_clear_inode(inode); /* Now clear out any remaining state */ -- cgit v1.2.3 From 642ac54923e0291ae2c8e42edde18d8c9c0a3c84 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:19 -0700 Subject: NFSv4: Return delegations in case we're changing ACLs Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 5 +++++ fs/nfs/nfs4proc.c | 1 + 2 files changed, 6 insertions(+) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6b0f7fe6bd1d..65d5ab45ddc5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -853,6 +853,11 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) filemap_fdatawait(inode->i_mapping); nfs_wb_all(inode); } + /* + * Return any delegations if we're going to change ACLs + */ + if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) + nfs_inode_return_delegation(inode); error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); if (error == 0) nfs_refresh_inode(inode, &fattr); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3db1c9f0b09c..c10dcd12af51 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2405,6 +2405,7 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen if (!nfs4_server_supports_acls(server)) return -EOPNOTSUPP; + nfs_inode_return_delegation(inode); buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0); if (ret == 0) -- cgit v1.2.3 From b8e5c4c2978fa666287525a9de2fa648a7581262 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:20 -0700 Subject: NFSv4: If a delegated open fails, ensure that we return the delegation Unless of course the open fails due to permission issues. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c10dcd12af51..a8be9af610ac 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -658,6 +658,8 @@ out_err: } up_read(&nfsi->rwsem); up_read(&clp->cl_sem); + if (err != -EACCES) + nfs_inode_return_delegation(inode); return err; } -- cgit v1.2.3 From 550f57470c6506f5ef3c708335dea6bd48bf3dc4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:21 -0700 Subject: NFSv4: Ensure that we recover from the OPEN + OPEN_CONFIRM BAD_STATEID race If the server is in the unconfirmed OPEN state for a given open owner and receives a second OPEN for the same open owner, it will cancel the state of the first request and set up an OPEN_CONFIRM for the second. This can cause a race that is discussed in rfc3530 on page 181. The following patch allows the client to recover by retrying the original open request. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a8be9af610ac..ff378126ccd7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -785,6 +785,16 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, exception.retry = 1; continue; } + /* + * BAD_STATEID on OPEN means that the server cancelled our + * state before it received the OPEN_CONFIRM. + * Recover by retrying the request as per the discussion + * on Page 181 of RFC3530. + */ + if (status == -NFS4ERR_BAD_STATEID) { + exception.retry = 1; + continue; + } res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir), status, &exception)); } while (exception.retry); -- cgit v1.2.3 From 4c780a4688b421baa896b59778c05d7e068e479f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:21 -0700 Subject: Fix Connectathon locking test failure We currently fail Connectathon test 6.10 in the case of 32-bit locks due to incorrect error checking. Also add support for l->l_len < 0 to 64-bit locks. Signed-off-by: Trond Myklebust --- fs/locks.c | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/locks.c b/fs/locks.c index 7eb1d77b9204..a1e8b2248014 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -316,21 +316,22 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, /* POSIX-1996 leaves the case l->l_len < 0 undefined; POSIX-2001 defines it. */ start += l->l_start; - end = start + l->l_len - 1; - if (l->l_len < 0) { + if (start < 0) + return -EINVAL; + fl->fl_end = OFFSET_MAX; + if (l->l_len > 0) { + end = start + l->l_len - 1; + fl->fl_end = end; + } else if (l->l_len < 0) { end = start - 1; + fl->fl_end = end; start += l->l_len; + if (start < 0) + return -EINVAL; } - - if (start < 0) - return -EINVAL; - if (l->l_len > 0 && end < 0) - return -EOVERFLOW; - fl->fl_start = start; /* we record the absolute position */ - fl->fl_end = end; - if (l->l_len == 0) - fl->fl_end = OFFSET_MAX; + if (fl->fl_end < fl->fl_start) + return -EOVERFLOW; fl->fl_owner = current->files; fl->fl_pid = current->tgid; @@ -362,14 +363,21 @@ static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, return -EINVAL; } - if (((start += l->l_start) < 0) || (l->l_len < 0)) + start += l->l_start; + if (start < 0) return -EINVAL; - fl->fl_end = start + l->l_len - 1; - if (l->l_len > 0 && fl->fl_end < 0) - return -EOVERFLOW; + fl->fl_end = OFFSET_MAX; + if (l->l_len > 0) { + fl->fl_end = start + l->l_len - 1; + } else if (l->l_len < 0) { + fl->fl_end = start - 1; + start += l->l_len; + if (start < 0) + return -EINVAL; + } fl->fl_start = start; /* we record the absolute position */ - if (l->l_len == 0) - fl->fl_end = OFFSET_MAX; + if (fl->fl_end < fl->fl_start) + return -EOVERFLOW; fl->fl_owner = current->files; fl->fl_pid = current->tgid; -- cgit v1.2.3 From 6fe43f9e3701f7a9f2be151a5e6cfe94b87e92f9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:22 -0700 Subject: NFS: Fix rename of directory onto empty directory If someone tries to rename a directory onto an empty directory, we currently fail and return EBUSY. This patch ensures that we try the rename if both source and target are directories, and that we fail with a correct error of EISDIR if the source is not a directory. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 72f50c0117b1..eb50c19fc253 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1511,9 +1511,11 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, */ if (!new_inode) goto go_ahead; - if (S_ISDIR(new_inode->i_mode)) - goto out; - else if (atomic_read(&new_dentry->d_count) > 2) { + if (S_ISDIR(new_inode->i_mode)) { + error = -EISDIR; + if (!S_ISDIR(old_inode->i_mode)) + goto out; + } else if (atomic_read(&new_dentry->d_count) > 2) { int err; /* copy the target dentry's name */ dentry = d_alloc(new_dentry->d_parent, -- cgit v1.2.3