From 56463e50d1fc3f070492434cea6303b35ea000de Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 17 Sep 2010 10:54:37 -0400 Subject: NFS: Use super.c for NFSROOT mount option parsing Replace duplicate code in NFSROOT for mounting an NFS server on '/' with logic that uses the existing mainline text-based logic in the NFS client. Add documenting comments where appropriate. Note that this means NFSROOT mounts now use the same default settings as v2/v3 mounts done via mount(2) from user space. vers=3,tcp,rsize=,wsize= As before, however, no version/protocol negotiation with the server is done. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 508f8cf6da37..2a18f1582fa4 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -364,6 +364,7 @@ extern struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ct extern void nfs_put_lock_context(struct nfs_lock_context *l_ctx); extern u64 nfs_compat_user_ino64(u64 fileid); extern void nfs_fattr_init(struct nfs_fattr *fattr); +extern unsigned long nfs_inc_attr_generation_counter(void); extern struct nfs_fattr *nfs_alloc_fattr(void); @@ -379,9 +380,12 @@ static inline void nfs_free_fhandle(const struct nfs_fh *fh) kfree(fh); } +/* + * linux/fs/nfs/nfsroot.c + */ +extern int nfs_root_data(char **root_device, char **root_data); /*__init*/ /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */ extern __be32 root_nfs_parse_addr(char *name); /*__init*/ -extern unsigned long nfs_inc_attr_generation_counter(void); /* * linux/fs/nfs/file.c @@ -584,10 +588,6 @@ nfs_fileid_to_ino_t(u64 fileid) return ino; } -/* NFS root */ - -extern void * nfs_root_data(void); - #define nfs_wait_event(clnt, wq, condition) \ ({ \ int __retval = wait_event_killable(wq, condition); \ -- cgit v1.2.3 From 859d5024f450686ad0a42ed3c06f2fa20295c9e6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 17 Sep 2010 10:54:37 -0400 Subject: SUNRPC: Remove rpcb_getport_sync() Clean up: rpcb_getport_sync() has no more users, so remove it. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 - net/sunrpc/rpcb_clnt.c | 51 --------------------------------------------- 2 files changed, 52 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 569dc722a600..a1a40f0c1856 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -137,7 +137,6 @@ int rpcb_register(u32, u32, int, unsigned short); int rpcb_v4_register(const u32 program, const u32 version, const struct sockaddr *address, const char *netid); -int rpcb_getport_sync(struct sockaddr_in *, u32, u32, int); void rpcb_getport_async(struct rpc_task *); void rpc_call_start(struct rpc_task *); diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index dac219a56ae1..c961094ebc62 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -475,57 +475,6 @@ int rpcb_v4_register(const u32 program, const u32 version, return -EAFNOSUPPORT; } -/** - * rpcb_getport_sync - obtain the port for an RPC service on a given host - * @sin: address of remote peer - * @prog: RPC program number to bind - * @vers: RPC version number to bind - * @prot: transport protocol to use to make this request - * - * Return value is the requested advertised port number, - * or a negative errno value. - * - * Called from outside the RPC client in a synchronous task context. - * Uses default timeout parameters specified by underlying transport. - * - * XXX: Needs to support IPv6 - */ -int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot) -{ - struct rpcbind_args map = { - .r_prog = prog, - .r_vers = vers, - .r_prot = prot, - .r_port = 0, - }; - struct rpc_message msg = { - .rpc_proc = &rpcb_procedures2[RPCBPROC_GETPORT], - .rpc_argp = &map, - .rpc_resp = &map, - }; - struct rpc_clnt *rpcb_clnt; - int status; - - dprintk("RPC: %s(%pI4, %u, %u, %d)\n", - __func__, &sin->sin_addr.s_addr, prog, vers, prot); - - rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin, - sizeof(*sin), prot, RPCBVERS_2); - if (IS_ERR(rpcb_clnt)) - return PTR_ERR(rpcb_clnt); - - status = rpc_call_sync(rpcb_clnt, &msg, 0); - rpc_shutdown_client(rpcb_clnt); - - if (status >= 0) { - if (map.r_port != 0) - return map.r_port; - status = -EACCES; - } - return status; -} -EXPORT_SYMBOL_GPL(rpcb_getport_sync); - static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, struct rpc_procinfo *proc) { struct rpc_message msg = { -- cgit v1.2.3 From cd9a1c0e5ac681871d64804f82291649e2a0accb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 17 Sep 2010 10:56:50 -0400 Subject: NFSv4: Clean up nfs4_atomic_open Start moving the 'struct nameidata' dependent code out of the lower level NFS code in preparation for the removal of open intents. Instead of the struct nameidata, we pass down a partially initialised struct nfs_open_context that will be fully initialised by the atomic open upon success. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++-- fs/nfs/inode.c | 8 +++--- fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 40 ++++++++------------------ include/linux/nfs_fs.h | 2 ++ 5 files changed, 93 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e257172d438c..17529b5bc551 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -38,6 +38,7 @@ #include "delegation.h" #include "iostat.h" #include "internal.h" +#include "fscache.h" /* #define NFS_DEBUG_VERBOSE 1 */ @@ -1029,9 +1030,61 @@ static int is_atomic_open(struct nameidata *nd) return 1; } +static struct nfs_open_context *nameidata_to_nfs_open_context(struct dentry *dentry, struct nameidata *nd) +{ + struct path path = { + .mnt = nd->path.mnt, + .dentry = dentry, + }; + struct nfs_open_context *ctx; + struct rpc_cred *cred; + fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); + + cred = rpc_lookup_cred(); + if (IS_ERR(cred)) + return ERR_CAST(cred); + ctx = alloc_nfs_open_context(&path, cred, fmode); + put_rpccred(cred); + if (ctx == NULL) + return ERR_PTR(-ENOMEM); + return ctx; +} + +static int do_open(struct inode *inode, struct file *filp) +{ + nfs_fscache_set_inode_cookie(inode, filp); + return 0; +} + +static int nfs_intent_set_file(struct nameidata *nd, struct nfs_open_context *ctx) +{ + struct file *filp; + int ret = 0; + + /* If the open_intent is for execute, we have an extra check to make */ + if (ctx->mode & FMODE_EXEC) { + ret = nfs_may_open(ctx->path.dentry->d_inode, + ctx->cred, + nd->intent.open.flags); + if (ret < 0) + goto out; + } + filp = lookup_instantiate_filp(nd, ctx->path.dentry, do_open); + if (IS_ERR(filp)) + ret = PTR_ERR(filp); + else + nfs_file_set_open_context(filp, ctx); +out: + put_nfs_open_context(ctx); + return ret; +} + static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { + struct nfs_open_context *ctx; + struct iattr attr; struct dentry *res = NULL; + int open_flags; int error; dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n", @@ -1054,9 +1107,27 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry goto out; } + ctx = nameidata_to_nfs_open_context(dentry, nd); + res = ERR_CAST(ctx); + if (IS_ERR(ctx)) + goto out; + + open_flags = nd->intent.open.flags; + if (nd->flags & LOOKUP_CREATE) { + attr.ia_mode = nd->intent.open.create_mode; + attr.ia_valid = ATTR_MODE; + if (!IS_POSIXACL(dir)) + attr.ia_mode &= ~current_umask(); + } else { + open_flags &= ~O_EXCL; + attr.ia_valid = 0; + BUG_ON(open_flags & O_CREAT); + } + /* Open the file on the server */ - res = nfs4_atomic_open(dir, dentry, nd); + res = nfs4_atomic_open(dir, ctx, open_flags, &attr); if (IS_ERR(res)) { + put_nfs_open_context(ctx); error = PTR_ERR(res); switch (error) { /* Make a negative dentry */ @@ -1074,8 +1145,10 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry default: goto out; } - } else if (res != NULL) + } + if (res != NULL) dentry = res; + nfs_intent_set_file(nd, ctx); out: return res; no_open: diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 7d2d6c72aa78..2f9266406afc 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -623,7 +623,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) nfs_revalidate_inode(server, inode); } -static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred) +struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred, fmode_t f_mode) { struct nfs_open_context *ctx; @@ -633,6 +633,7 @@ static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct path_get(&ctx->path); ctx->cred = get_rpccred(cred); ctx->state = NULL; + ctx->mode = f_mode; ctx->flags = 0; ctx->error = 0; ctx->dir_cookie = 0; @@ -673,7 +674,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx) * Ensure that mmap has a recent RPC credential for use when writing out * shared pages */ -static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) +void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) { struct inode *inode = filp->f_path.dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); @@ -730,11 +731,10 @@ int nfs_open(struct inode *inode, struct file *filp) cred = rpc_lookup_cred(); if (IS_ERR(cred)) return PTR_ERR(cred); - ctx = alloc_nfs_open_context(&filp->f_path, cred); + ctx = alloc_nfs_open_context(&filp->f_path, cred, filp->f_mode); put_rpccred(cred); if (ctx == NULL) return -ENOMEM; - ctx->mode = filp->f_mode; nfs_file_set_open_context(filp, ctx); put_nfs_open_context(ctx); nfs_fscache_set_inode_cookie(inode, filp); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 311e15cc8af0..c5cc2a6aceb0 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -242,7 +242,7 @@ extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait); -extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); +extern struct dentry *nfs4_atomic_open(struct inode *, struct nfs_open_context *, int, struct iattr *); extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 089da5b5d20a..38c3bed2240d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2025,39 +2025,17 @@ out_close: } struct dentry * -nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags, struct iattr *attr) { - struct path path = { - .mnt = nd->path.mnt, - .dentry = dentry, - }; + struct dentry *dentry = ctx->path.dentry; struct dentry *parent; - struct iattr attr; - struct rpc_cred *cred; struct nfs4_state *state; struct dentry *res; - int open_flags = nd->intent.open.flags; - fmode_t fmode = open_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); - - if (nd->flags & LOOKUP_CREATE) { - attr.ia_mode = nd->intent.open.create_mode; - attr.ia_valid = ATTR_MODE; - if (!IS_POSIXACL(dir)) - attr.ia_mode &= ~current_umask(); - } else { - open_flags &= ~O_EXCL; - attr.ia_valid = 0; - BUG_ON(open_flags & O_CREAT); - } - cred = rpc_lookup_cred(); - if (IS_ERR(cred)) - return (struct dentry *)cred; parent = dentry->d_parent; /* Protect against concurrent sillydeletes */ nfs_block_sillyrename(parent); - state = nfs4_do_open(dir, &path, fmode, open_flags, &attr, cred); - put_rpccred(cred); + state = nfs4_do_open(dir, &ctx->path, ctx->mode, open_flags, attr, ctx->cred); if (IS_ERR(state)) { if (PTR_ERR(state) == -ENOENT) { d_add(dentry, NULL); @@ -2067,11 +2045,15 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) return (struct dentry *)state; } res = d_add_unique(dentry, igrab(state->inode)); - if (res != NULL) - path.dentry = res; - nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir)); + if (res != NULL) { + struct dentry *dummy = ctx->path.dentry; + + ctx->path.dentry = dget(res); + dput(dummy); + } + ctx->state = state; + nfs_set_verifier(ctx->path.dentry, nfs_save_change_attribute(dir)); nfs_unblock_sillyrename(parent); - nfs4_intent_set_file(nd, &path, state, fmode); return res; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 2a18f1582fa4..61c89b4ad7c6 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -360,6 +360,8 @@ extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr); extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx); extern void put_nfs_open_context(struct nfs_open_context *ctx); extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode); +extern struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred, fmode_t f_mode); +extern void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx); extern struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx); extern void nfs_put_lock_context(struct nfs_lock_context *l_ctx); extern u64 nfs_compat_user_ino64(u64 fileid); -- cgit v1.2.3 From c0204fd2b8fe047b18b67e07e1bf2a03691240cd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 17 Sep 2010 10:56:51 -0400 Subject: NFS: Clean up nfs4_proc_create() Remove all remaining references to the struct nameidata from the low level NFS layers. Again pass down a partially initialised struct nfs_open_context when we want to do atomic open+create. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 47 +++++++++++++++++++++++++++++++++++------ fs/nfs/nfs3proc.c | 2 +- fs/nfs/nfs4proc.c | 56 +++++++++++++------------------------------------ fs/nfs/proc.c | 2 +- include/linux/nfs_xdr.h | 2 +- 5 files changed, 58 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index dc93d356341b..e37ffddd79c2 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -105,8 +105,9 @@ const struct inode_operations nfs3_dir_inode_operations = { #ifdef CONFIG_NFS_V4 static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); +static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd); const struct inode_operations nfs4_dir_inode_operations = { - .create = nfs_create, + .create = nfs_open_create, .lookup = nfs_atomic_lookup, .link = nfs_link, .unlink = nfs_unlink, @@ -1239,6 +1240,44 @@ no_open_dput: no_open: return nfs_lookup_revalidate(dentry, nd); } + +static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) +{ + struct nfs_open_context *ctx = NULL; + struct iattr attr; + int error; + int open_flags = 0; + + dfprintk(VFS, "NFS: create(%s/%ld), %s\n", + dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + + attr.ia_mode = mode; + attr.ia_valid = ATTR_MODE; + + if ((nd->flags & LOOKUP_CREATE) != 0) { + open_flags = nd->intent.open.flags; + + ctx = nameidata_to_nfs_open_context(dentry, nd); + error = PTR_ERR(ctx); + if (IS_ERR(ctx)) + goto out_err; + } + + error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx); + if (error != 0) + goto out_put_ctx; + if (ctx != NULL) + nfs_intent_set_file(nd, ctx); + return 0; +out_put_ctx: + if (ctx != NULL) + put_nfs_open_context(ctx); +out_err: + d_drop(dentry); + return error; +} + #endif /* CONFIG_NFSV4 */ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) @@ -1369,7 +1408,6 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, { struct iattr attr; int error; - int open_flags = 0; dfprintk(VFS, "NFS: create(%s/%ld), %s\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); @@ -1377,10 +1415,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; - if ((nd->flags & LOOKUP_CREATE) != 0) - open_flags = nd->intent.open.flags; - - error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd); + error = NFS_PROTO(dir)->create(dir, dentry, &attr, 0, NULL); if (error != 0) goto out_err; return 0; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index fabb4f2849a1..2be4a7f59f1c 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -313,7 +313,7 @@ static void nfs3_free_createdata(struct nfs3_createdata *data) */ static int nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags, struct nameidata *nd) + int flags, struct nfs_open_context *ctx) { struct nfs3_createdata *data; mode_t mode = sattr->ia_mode; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 83c5ef6e7cef..617b149ee16d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1998,32 +1998,6 @@ out: return status; } -static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state, fmode_t fmode) -{ - struct file *filp; - int ret; - - /* If the open_intent is for execute, we have an extra check to make */ - if (fmode & FMODE_EXEC) { - ret = nfs_may_open(state->inode, - state->owner->so_cred, - nd->intent.open.flags); - if (ret < 0) - goto out_close; - } - filp = lookup_instantiate_filp(nd, path->dentry, NULL); - if (!IS_ERR(filp)) { - struct nfs_open_context *ctx; - ctx = nfs_file_open_context(filp); - ctx->state = state; - return 0; - } - ret = PTR_ERR(filp); -out_close: - nfs4_close_sync(path, state, fmode & (FMODE_READ|FMODE_WRITE)); - return ret; -} - struct inode * nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags, struct iattr *attr) { @@ -2491,36 +2465,34 @@ static int nfs4_proc_readlink(struct inode *inode, struct page *page, static int nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags, struct nameidata *nd) + int flags, struct nfs_open_context *ctx) { - struct path path = { - .mnt = nd->path.mnt, + struct path my_path = { .dentry = dentry, }; + struct path *path = &my_path; struct nfs4_state *state; - struct rpc_cred *cred; - fmode_t fmode = flags & (FMODE_READ | FMODE_WRITE); + struct rpc_cred *cred = NULL; + fmode_t fmode = 0; int status = 0; - cred = rpc_lookup_cred(); - if (IS_ERR(cred)) { - status = PTR_ERR(cred); - goto out; + if (ctx != NULL) { + cred = ctx->cred; + path = &ctx->path; + fmode = ctx->mode; } - state = nfs4_do_open(dir, &path, fmode, flags, sattr, cred); + state = nfs4_do_open(dir, path, fmode, flags, sattr, cred); d_drop(dentry); if (IS_ERR(state)) { status = PTR_ERR(state); - goto out_putcred; + goto out; } d_add(dentry, igrab(state->inode)); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0) - status = nfs4_intent_set_file(nd, &path, state, fmode); + if (ctx != NULL) + ctx->state = state; else - nfs4_close_sync(&path, state, fmode); -out_putcred: - put_rpccred(cred); + nfs4_close_sync(path, state, fmode); out: return status; } diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 611bec22f552..4ef39ae88ea5 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -258,7 +258,7 @@ static void nfs_free_createdata(const struct nfs_createdata *data) static int nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags, struct nameidata *nd) + int flags, struct nfs_open_context *ctx) { struct nfs_createdata *data; struct rpc_message msg = { diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index fc461926c412..b1484dad7bef 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1032,7 +1032,7 @@ struct nfs_rpc_ops { int (*readlink)(struct inode *, struct page *, unsigned int, unsigned int); int (*create) (struct inode *, struct dentry *, - struct iattr *, int, struct nameidata *); + struct iattr *, int, struct nfs_open_context *); int (*remove) (struct inode *, struct qstr *); void (*unlink_setup) (struct rpc_message *, struct inode *dir); int (*unlink_done) (struct rpc_task *, struct inode *); -- cgit v1.2.3 From 2b484297e48c3fbb1846fc6ea10036d9465273e7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 17 Sep 2010 10:56:51 -0400 Subject: NFS: Add an 'open_context' element to struct nfs_rpc_ops Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 7 ++++--- fs/nfs/nfs4_fs.h | 1 - fs/nfs/nfs4proc.c | 3 ++- include/linux/nfs_xdr.h | 4 ++++ 4 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e37ffddd79c2..1e9e18813ad7 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -34,7 +34,6 @@ #include #include -#include "nfs4_fs.h" #include "delegation.h" #include "iostat.h" #include "internal.h" @@ -1127,7 +1126,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry /* Open the file on the server */ nfs_block_sillyrename(dentry->d_parent); - inode = nfs4_atomic_open(dir, ctx, open_flags, &attr); + inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr); if (IS_ERR(inode)) { nfs_unblock_sillyrename(dentry->d_parent); put_nfs_open_context(ctx); @@ -1175,8 +1174,10 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) if (!is_atomic_open(nd) || d_mountpoint(dentry)) goto no_open; + parent = dget_parent(dentry); dir = parent->d_inode; + /* We can't create new files in nfs_open_revalidate(), so we * optimize away revalidation of negative dentries. */ @@ -1205,7 +1206,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) * operations that change the directory. We therefore save the * change attribute *before* we do the RPC call. */ - inode = nfs4_atomic_open(dir, ctx, openflags, NULL); + inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, NULL); if (IS_ERR(inode)) { ret = PTR_ERR(inode); switch (ret) { diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 6cf12ba9f4e7..d24a8e07b5e2 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -242,7 +242,6 @@ extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait); -extern struct inode *nfs4_atomic_open(struct inode *, struct nfs_open_context *, int, struct iattr *); extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, struct nfs4_fs_locations *fs_locations, struct page *page); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 617b149ee16d..643abd26f2de 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1998,7 +1998,7 @@ out: return status; } -struct inode * +static struct inode * nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags, struct iattr *attr) { struct nfs4_state *state; @@ -5358,6 +5358,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .lock = nfs4_proc_lock, .clear_acl_cache = nfs4_zap_acl_attr, .close_context = nfs4_close_context, + .open_context = nfs4_atomic_open, }; /* diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index b1484dad7bef..6f345f8af4ae 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1065,6 +1065,10 @@ struct nfs_rpc_ops { int (*lock_check_bounds)(const struct file_lock *); void (*clear_acl_cache)(struct inode *); void (*close_context)(struct nfs_open_context *ctx, int); + struct inode * (*open_context) (struct inode *dir, + struct nfs_open_context *ctx, + int open_flags, + struct iattr *iattr); }; /* -- cgit v1.2.3 From 920769f031a8aff87b66bdf49d1a0d0988241ef9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 17 Sep 2010 17:30:25 -0400 Subject: nfs: standardize the rename args container Each NFS version has its own version of the rename args container. Standardize them on a common one that's identical to the one NFSv4 uses. Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs2xdr.c | 8 ++++---- fs/nfs/nfs3proc.c | 12 +++++------- fs/nfs/nfs3xdr.c | 10 +++++----- fs/nfs/nfs4proc.c | 2 +- fs/nfs/nfs4xdr.c | 2 +- fs/nfs/proc.c | 10 ++++------ include/linux/nfs_xdr.h | 39 ++++++++++++--------------------------- 7 files changed, 32 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index db8846a0e82e..e15bc0306d0c 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -337,10 +337,10 @@ nfs_xdr_createargs(struct rpc_rqst *req, __be32 *p, struct nfs_createargs *args) static int nfs_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args) { - p = xdr_encode_fhandle(p, args->fromfh); - p = xdr_encode_array(p, args->fromname, args->fromlen); - p = xdr_encode_fhandle(p, args->tofh); - p = xdr_encode_array(p, args->toname, args->tolen); + p = xdr_encode_fhandle(p, args->old_dir); + p = xdr_encode_array(p, args->old_name->name, args->old_name->len); + p = xdr_encode_fhandle(p, args->new_dir); + p = xdr_encode_array(p, args->new_name->name, args->new_name->len); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; } diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 2be4a7f59f1c..6dc4ef66f074 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -442,13 +442,11 @@ static int nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name, struct inode *new_dir, struct qstr *new_name) { - struct nfs3_renameargs arg = { - .fromfh = NFS_FH(old_dir), - .fromname = old_name->name, - .fromlen = old_name->len, - .tofh = NFS_FH(new_dir), - .toname = new_name->name, - .tolen = new_name->len + struct nfs_renameargs arg = { + .old_dir = NFS_FH(old_dir), + .old_name = old_name, + .new_dir = NFS_FH(new_dir), + .new_name = new_name, }; struct nfs3_renameres res; struct rpc_message msg = { diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 9769704f8ce6..f385759eb35b 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -442,12 +442,12 @@ nfs3_xdr_mknodargs(struct rpc_rqst *req, __be32 *p, struct nfs3_mknodargs *args) * Encode RENAME arguments */ static int -nfs3_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs3_renameargs *args) +nfs3_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args) { - p = xdr_encode_fhandle(p, args->fromfh); - p = xdr_encode_array(p, args->fromname, args->fromlen); - p = xdr_encode_fhandle(p, args->tofh); - p = xdr_encode_array(p, args->toname, args->tolen); + p = xdr_encode_fhandle(p, args->old_dir); + p = xdr_encode_array(p, args->old_name->name, args->old_name->len); + p = xdr_encode_fhandle(p, args->new_dir); + p = xdr_encode_array(p, args->new_name->name, args->new_name->len); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 643abd26f2de..3aa13c1b8dd8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2570,7 +2570,7 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, struct inode *new_dir, struct qstr *new_name) { struct nfs_server *server = NFS_SERVER(old_dir); - struct nfs4_rename_arg arg = { + struct nfs_renameargs arg = { .old_dir = NFS_FH(old_dir), .new_dir = NFS_FH(new_dir), .old_name = old_name, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 08ef91291132..7a098ae07a1b 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1823,7 +1823,7 @@ static int nfs4_xdr_enc_remove(struct rpc_rqst *req, __be32 *p, const struct nfs /* * Encode RENAME request */ -static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs4_rename_arg *args) +static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs_renameargs *args) { struct xdr_stream xdr; struct compound_hdr hdr = { diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 4ef39ae88ea5..0aff10c3bbce 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -370,12 +370,10 @@ nfs_proc_rename(struct inode *old_dir, struct qstr *old_name, struct inode *new_dir, struct qstr *new_name) { struct nfs_renameargs arg = { - .fromfh = NFS_FH(old_dir), - .fromname = old_name->name, - .fromlen = old_name->len, - .tofh = NFS_FH(new_dir), - .toname = new_name->name, - .tolen = new_name->len + .old_dir = NFS_FH(old_dir), + .old_name = old_name, + .new_dir = NFS_FH(new_dir), + .new_name = new_name, }; struct rpc_message msg = { .rpc_proc = &nfs_procedures[NFSPROC_RENAME], diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 6f345f8af4ae..acb95fb27bcc 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -399,6 +399,18 @@ struct nfs_removeres { struct nfs4_sequence_res seq_res; }; +/* + * Common arguments to the rename call + */ +struct nfs_renameargs { + const struct nfs_fh *old_dir; + const struct nfs_fh *new_dir; + const struct qstr *old_name; + const struct qstr *new_name; + const u32 *bitmask; + struct nfs4_sequence_args seq_args; +}; + /* * Argument struct for decode_entry function */ @@ -434,15 +446,6 @@ struct nfs_createargs { struct iattr * sattr; }; -struct nfs_renameargs { - struct nfs_fh * fromfh; - const char * fromname; - unsigned int fromlen; - struct nfs_fh * tofh; - const char * toname; - unsigned int tolen; -}; - struct nfs_setattrargs { struct nfs_fh * fh; nfs4_stateid stateid; @@ -586,15 +589,6 @@ struct nfs3_mknodargs { dev_t rdev; }; -struct nfs3_renameargs { - struct nfs_fh * fromfh; - const char * fromname; - unsigned int fromlen; - struct nfs_fh * tofh; - const char * toname; - unsigned int tolen; -}; - struct nfs3_linkargs { struct nfs_fh * fromfh; struct nfs_fh * tofh; @@ -801,15 +795,6 @@ struct nfs4_readlink_res { struct nfs4_sequence_res seq_res; }; -struct nfs4_rename_arg { - const struct nfs_fh * old_dir; - const struct nfs_fh * new_dir; - const struct qstr * old_name; - const struct qstr * new_name; - const u32 * bitmask; - struct nfs4_sequence_args seq_args; -}; - struct nfs4_rename_res { const struct nfs_server * server; struct nfs4_change_info old_cinfo; -- cgit v1.2.3 From e8582a8b96f329083b4da29aa87bc43cc0d80dd1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 17 Sep 2010 17:31:06 -0400 Subject: nfs: standardize the rename response container Right now, v3 and v4 have their own variants. Create a standard struct that will work for v3 and v4. v2 doesn't get anything but a simple error and so isn't affected by this. Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs3proc.c | 16 ++++++++-------- fs/nfs/nfs3xdr.c | 6 +++--- fs/nfs/nfs4proc.c | 2 +- fs/nfs/nfs4xdr.c | 2 +- include/linux/nfs_xdr.h | 23 +++++++++-------------- 5 files changed, 22 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 6dc4ef66f074..bb41d88e1567 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -448,7 +448,7 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name, .new_dir = NFS_FH(new_dir), .new_name = new_name, }; - struct nfs3_renameres res; + struct nfs_renameres res; struct rpc_message msg = { .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME], .rpc_argp = &arg, @@ -458,17 +458,17 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name, dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); - res.fromattr = nfs_alloc_fattr(); - res.toattr = nfs_alloc_fattr(); - if (res.fromattr == NULL || res.toattr == NULL) + res.old_fattr = nfs_alloc_fattr(); + res.new_fattr = nfs_alloc_fattr(); + if (res.old_fattr == NULL || res.new_fattr == NULL) goto out; status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); - nfs_post_op_update_inode(old_dir, res.fromattr); - nfs_post_op_update_inode(new_dir, res.toattr); + nfs_post_op_update_inode(old_dir, res.old_fattr); + nfs_post_op_update_inode(new_dir, res.new_fattr); out: - nfs_free_fattr(res.toattr); - nfs_free_fattr(res.fromattr); + nfs_free_fattr(res.old_fattr); + nfs_free_fattr(res.new_fattr); dprintk("NFS reply rename: %d\n", status); return status; } diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index f385759eb35b..89c40f8ec6e6 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -970,14 +970,14 @@ nfs3_xdr_createres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res) * Decode RENAME reply */ static int -nfs3_xdr_renameres(struct rpc_rqst *req, __be32 *p, struct nfs3_renameres *res) +nfs3_xdr_renameres(struct rpc_rqst *req, __be32 *p, struct nfs_renameres *res) { int status; if ((status = ntohl(*p++)) != 0) status = nfs_stat_to_errno(status); - p = xdr_decode_wcc_data(p, res->fromattr); - p = xdr_decode_wcc_data(p, res->toattr); + p = xdr_decode_wcc_data(p, res->old_fattr); + p = xdr_decode_wcc_data(p, res->new_fattr); return status; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3aa13c1b8dd8..a3c21cc4677b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2577,7 +2577,7 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, .new_name = new_name, .bitmask = server->attr_bitmask, }; - struct nfs4_rename_res res = { + struct nfs_renameres res = { .server = server, }; struct rpc_message msg = { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 7a098ae07a1b..b0bd7efe8100 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4873,7 +4873,7 @@ out: /* * Decode RENAME response */ -static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_rename_res *res) +static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs_renameres *res) { struct xdr_stream xdr; struct compound_hdr hdr; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index acb95fb27bcc..9ad132e13d12 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -411,6 +411,15 @@ struct nfs_renameargs { struct nfs4_sequence_args seq_args; }; +struct nfs_renameres { + const struct nfs_server *server; + struct nfs4_change_info old_cinfo; + struct nfs_fattr *old_fattr; + struct nfs4_change_info new_cinfo; + struct nfs_fattr *new_fattr; + struct nfs4_sequence_res seq_res; +}; + /* * Argument struct for decode_entry function */ @@ -623,11 +632,6 @@ struct nfs3_readlinkargs { struct page ** pages; }; -struct nfs3_renameres { - struct nfs_fattr * fromattr; - struct nfs_fattr * toattr; -}; - struct nfs3_linkres { struct nfs_fattr * dir_attr; struct nfs_fattr * fattr; @@ -795,15 +799,6 @@ struct nfs4_readlink_res { struct nfs4_sequence_res seq_res; }; -struct nfs4_rename_res { - const struct nfs_server * server; - struct nfs4_change_info old_cinfo; - struct nfs_fattr * old_fattr; - struct nfs4_change_info new_cinfo; - struct nfs_fattr * new_fattr; - struct nfs4_sequence_res seq_res; -}; - #define NFS4_SETCLIENTID_NAMELEN (127) struct nfs4_setclientid { const nfs4_verifier * sc_verifier; -- cgit v1.2.3 From 779c51795bfb35c2403c924b9de90ca9356bc693 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 17 Sep 2010 17:31:30 -0400 Subject: nfs: move nfs_sillyrename to unlink.c ...since that's where most of the sillyrenaming code lives. A comment block is added to the beginning as well to clarify how sillyrenaming works. Also, make nfs_async_unlink static as nfs_sillyrename is the only caller. Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 70 ----------------------------------------- fs/nfs/unlink.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++- include/linux/nfs_fs.h | 2 +- 3 files changed, 85 insertions(+), 72 deletions(-) (limited to 'include') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 1e9e18813ad7..86f1d41c6078 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1498,76 +1498,6 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry) return error; } -static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) -{ - static unsigned int sillycounter; - const int fileidsize = sizeof(NFS_FILEID(dentry->d_inode))*2; - const int countersize = sizeof(sillycounter)*2; - const int slen = sizeof(".nfs")+fileidsize+countersize-1; - char silly[slen+1]; - struct qstr qsilly; - struct dentry *sdentry; - int error = -EIO; - - dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - atomic_read(&dentry->d_count)); - nfs_inc_stats(dir, NFSIOS_SILLYRENAME); - - /* - * We don't allow a dentry to be silly-renamed twice. - */ - error = -EBUSY; - if (dentry->d_flags & DCACHE_NFSFS_RENAMED) - goto out; - - sprintf(silly, ".nfs%*.*Lx", - fileidsize, fileidsize, - (unsigned long long)NFS_FILEID(dentry->d_inode)); - - /* Return delegation in anticipation of the rename */ - nfs_inode_return_delegation(dentry->d_inode); - - sdentry = NULL; - do { - char *suffix = silly + slen - countersize; - - dput(sdentry); - sillycounter++; - sprintf(suffix, "%*.*x", countersize, countersize, sillycounter); - - dfprintk(VFS, "NFS: trying to rename %s to %s\n", - dentry->d_name.name, silly); - - sdentry = lookup_one_len(silly, dentry->d_parent, slen); - /* - * N.B. Better to return EBUSY here ... it could be - * dangerous to delete the file while it's in use. - */ - if (IS_ERR(sdentry)) - goto out; - } while(sdentry->d_inode != NULL); /* need negative lookup */ - - qsilly.name = silly; - qsilly.len = strlen(silly); - if (dentry->d_inode) { - error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, - dir, &qsilly); - nfs_mark_for_revalidate(dentry->d_inode); - } else - error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, - dir, &qsilly); - if (!error) { - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - d_move(dentry, sdentry); - error = nfs_async_unlink(dir, dentry); - /* If we return 0 we don't unlink */ - } - dput(sdentry); -out: - return error; -} - /* * Remove a file after making sure there are no pending writes, * and after checking that the file has only one user. diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 2f84adaad427..c3ce865294f1 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -13,9 +13,12 @@ #include #include #include +#include #include "internal.h" #include "nfs4_fs.h" +#include "iostat.h" +#include "delegation.h" struct nfs_unlinkdata { struct hlist_node list; @@ -244,7 +247,7 @@ void nfs_unblock_sillyrename(struct dentry *dentry) * @dir: parent directory of dentry * @dentry: dentry to unlink */ -int +static int nfs_async_unlink(struct inode *dir, struct dentry *dentry) { struct nfs_unlinkdata *data; @@ -303,3 +306,83 @@ nfs_complete_unlink(struct dentry *dentry, struct inode *inode) if (data != NULL && (NFS_STALE(inode) || !nfs_call_unlink(dentry, data))) nfs_free_unlinkdata(data); } + +/** + * nfs_sillyrename - Perform a silly-rename of a dentry + * @dir: inode of directory that contains dentry + * @dentry: dentry to be sillyrenamed + * + * NFSv2/3 is stateless and the server doesn't know when the client is + * holding a file open. To prevent application problems when a file is + * unlinked while it's still open, the client performs a "silly-rename". + * That is, it renames the file to a hidden file in the same directory, + * and only performs the unlink once the last reference to it is put. + * + * The final cleanup is done during dentry_iput. + */ +int +nfs_sillyrename(struct inode *dir, struct dentry *dentry) +{ + static unsigned int sillycounter; + const int fileidsize = sizeof(NFS_FILEID(dentry->d_inode))*2; + const int countersize = sizeof(sillycounter)*2; + const int slen = sizeof(".nfs")+fileidsize+countersize-1; + char silly[slen+1]; + struct qstr qsilly; + struct dentry *sdentry; + int error = -EIO; + + dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + atomic_read(&dentry->d_count)); + nfs_inc_stats(dir, NFSIOS_SILLYRENAME); + + /* + * We don't allow a dentry to be silly-renamed twice. + */ + error = -EBUSY; + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) + goto out; + + sprintf(silly, ".nfs%*.*Lx", + fileidsize, fileidsize, + (unsigned long long)NFS_FILEID(dentry->d_inode)); + + /* Return delegation in anticipation of the rename */ + nfs_inode_return_delegation(dentry->d_inode); + + sdentry = NULL; + do { + char *suffix = silly + slen - countersize; + + dput(sdentry); + sillycounter++; + sprintf(suffix, "%*.*x", countersize, countersize, sillycounter); + + dfprintk(VFS, "NFS: trying to rename %s to %s\n", + dentry->d_name.name, silly); + + sdentry = lookup_one_len(silly, dentry->d_parent, slen); + /* + * N.B. Better to return EBUSY here ... it could be + * dangerous to delete the file while it's in use. + */ + if (IS_ERR(sdentry)) + goto out; + } while (sdentry->d_inode != NULL); /* need negative lookup */ + + qsilly.name = silly; + qsilly.len = strlen(silly); + error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, dir, &qsilly); + if (dentry->d_inode) + nfs_mark_for_revalidate(dentry->d_inode); + if (!error) { + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + d_move(dentry, sdentry); + error = nfs_async_unlink(dir, dentry); + /* If we return 0 we don't unlink */ + } + dput(sdentry); +out: + return error; +} diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 61c89b4ad7c6..d929b1883644 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -485,10 +485,10 @@ extern void nfs_release_automount_timer(void); /* * linux/fs/nfs/unlink.c */ -extern int nfs_async_unlink(struct inode *dir, struct dentry *dentry); extern void nfs_complete_unlink(struct dentry *dentry, struct inode *); extern void nfs_block_sillyrename(struct dentry *dentry); extern void nfs_unblock_sillyrename(struct dentry *dentry); +extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry); /* * linux/fs/nfs/write.c -- cgit v1.2.3 From d3d4152a5d59af9e13a73efa9e9c24383fbe307f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 17 Sep 2010 17:31:57 -0400 Subject: nfs: make sillyrename an async operation A synchronous rename can be interrupted by a SIGKILL. If that happens during a sillyrename operation, it's possible for the rename call to be sent to the server, but the task exits before processing the reply. If this happens, the sillyrenamed file won't get cleaned up during nfs_dentry_iput and the server is left with a dangling .nfs* file hanging around. Fix this problem by turning sillyrename into an asynchronous operation and have the task doing the sillyrename just wait on the reply. If the task is killed before the sillyrename completes, it'll still proceed to completion. Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs3proc.c | 23 ++++++ fs/nfs/nfs4proc.c | 30 ++++++++ fs/nfs/proc.c | 19 +++++ fs/nfs/unlink.c | 200 +++++++++++++++++++++++++++++++++++++++++++++--- include/linux/nfs_xdr.h | 2 + 5 files changed, 263 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index bb41d88e1567..4e9d941ab548 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -438,6 +438,27 @@ nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir) return 1; } +static void +nfs3_proc_rename_setup(struct rpc_message *msg, struct inode *dir) +{ + msg->rpc_proc = &nfs3_procedures[NFS3PROC_RENAME]; +} + +static int +nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir, + struct inode *new_dir) +{ + struct nfs_renameres *res; + + if (nfs3_async_handle_jukebox(task, old_dir)) + return 0; + res = task->tk_msg.rpc_resp; + + nfs_post_op_update_inode(old_dir, res->old_fattr); + nfs_post_op_update_inode(new_dir, res->new_fattr); + return 1; +} + static int nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name, struct inode *new_dir, struct qstr *new_name) @@ -842,6 +863,8 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .unlink_setup = nfs3_proc_unlink_setup, .unlink_done = nfs3_proc_unlink_done, .rename = nfs3_proc_rename, + .rename_setup = nfs3_proc_rename_setup, + .rename_done = nfs3_proc_rename_done, .link = nfs3_proc_link, .symlink = nfs3_proc_symlink, .mkdir = nfs3_proc_mkdir, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a3c21cc4677b..c46e45e9b33f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2566,6 +2566,34 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) return 1; } +static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir) +{ + struct nfs_server *server = NFS_SERVER(dir); + struct nfs_renameargs *arg = msg->rpc_argp; + struct nfs_renameres *res = msg->rpc_resp; + + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; + arg->bitmask = server->attr_bitmask; + res->server = server; +} + +static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, + struct inode *new_dir) +{ + struct nfs_renameres *res = task->tk_msg.rpc_resp; + + if (!nfs4_sequence_done(task, &res->seq_res)) + return 0; + if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) + return 0; + + update_changeattr(old_dir, &res->old_cinfo); + nfs_post_op_update_inode(old_dir, res->old_fattr); + update_changeattr(new_dir, &res->new_cinfo); + nfs_post_op_update_inode(new_dir, res->new_fattr); + return 1; +} + static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, struct inode *new_dir, struct qstr *new_name) { @@ -5338,6 +5366,8 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .unlink_setup = nfs4_proc_unlink_setup, .unlink_done = nfs4_proc_unlink_done, .rename = nfs4_proc_rename, + .rename_setup = nfs4_proc_rename_setup, + .rename_done = nfs4_proc_rename_done, .link = nfs4_proc_link, .symlink = nfs4_proc_symlink, .mkdir = nfs4_proc_mkdir, diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 0aff10c3bbce..e5e84aa2af17 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -365,6 +365,23 @@ static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir) return 1; } +static void +nfs_proc_rename_setup(struct rpc_message *msg, struct inode *dir) +{ + msg->rpc_proc = &nfs_procedures[NFSPROC_RENAME]; +} + +static int +nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir, + struct inode *new_dir) +{ + if (nfs_async_handle_expired_key(task)) + return 0; + nfs_mark_for_revalidate(old_dir); + nfs_mark_for_revalidate(new_dir); + return 1; +} + static int nfs_proc_rename(struct inode *old_dir, struct qstr *old_name, struct inode *new_dir, struct qstr *new_name) @@ -703,6 +720,8 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .unlink_setup = nfs_proc_unlink_setup, .unlink_done = nfs_proc_unlink_done, .rename = nfs_proc_rename, + .rename_setup = nfs_proc_rename_setup, + .rename_done = nfs_proc_rename_done, .link = nfs_proc_link, .symlink = nfs_proc_symlink, .mkdir = nfs_proc_mkdir, diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index c3ce865294f1..698b3e6367ff 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -307,6 +307,174 @@ nfs_complete_unlink(struct dentry *dentry, struct inode *inode) nfs_free_unlinkdata(data); } +/* Cancel a queued async unlink. Called when a sillyrename run fails. */ +static void +nfs_cancel_async_unlink(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { + struct nfs_unlinkdata *data = dentry->d_fsdata; + + dentry->d_flags &= ~DCACHE_NFSFS_RENAMED; + spin_unlock(&dentry->d_lock); + nfs_free_unlinkdata(data); + return; + } + spin_unlock(&dentry->d_lock); +} + +struct nfs_renamedata { + struct nfs_renameargs args; + struct nfs_renameres res; + struct rpc_cred *cred; + struct inode *old_dir; + struct dentry *old_dentry; + struct nfs_fattr old_fattr; + struct inode *new_dir; + struct dentry *new_dentry; + struct nfs_fattr new_fattr; +}; + +/** + * nfs_async_rename_done - Sillyrename post-processing + * @task: rpc_task of the sillyrename + * @calldata: nfs_renamedata for the sillyrename + * + * Do the directory attribute updates and the d_move + */ +static void nfs_async_rename_done(struct rpc_task *task, void *calldata) +{ + struct nfs_renamedata *data = calldata; + struct inode *old_dir = data->old_dir; + struct inode *new_dir = data->new_dir; + + if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) { + nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client); + return; + } + + if (task->tk_status != 0) { + nfs_cancel_async_unlink(data->old_dentry); + return; + } + + nfs_set_verifier(data->old_dentry, nfs_save_change_attribute(old_dir)); + d_move(data->old_dentry, data->new_dentry); +} + +/** + * nfs_async_rename_release - Release the sillyrename data. + * @calldata: the struct nfs_renamedata to be released + */ +static void nfs_async_rename_release(void *calldata) +{ + struct nfs_renamedata *data = calldata; + struct super_block *sb = data->old_dir->i_sb; + + if (data->old_dentry->d_inode) + nfs_mark_for_revalidate(data->old_dentry->d_inode); + + dput(data->old_dentry); + dput(data->new_dentry); + iput(data->old_dir); + iput(data->new_dir); + nfs_sb_deactive(sb); + put_rpccred(data->cred); + kfree(data); +} + +#if defined(CONFIG_NFS_V4_1) +static void nfs_rename_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs_renamedata *data = calldata; + struct nfs_server *server = NFS_SERVER(data->old_dir); + + if (nfs4_setup_sequence(server, &data->args.seq_args, + &data->res.seq_res, 1, task)) + return; + rpc_call_start(task); +} +#endif /* CONFIG_NFS_V4_1 */ + +static const struct rpc_call_ops nfs_rename_ops = { + .rpc_call_done = nfs_async_rename_done, + .rpc_release = nfs_async_rename_release, +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_rename_prepare, +#endif /* CONFIG_NFS_V4_1 */ +}; + +/** + * nfs_async_rename - perform an asynchronous rename operation + * @old_dir: directory that currently holds the dentry to be renamed + * @new_dir: target directory for the rename + * @old_dentry: original dentry to be renamed + * @new_dentry: dentry to which the old_dentry should be renamed + * + * It's expected that valid references to the dentries and inodes are held + */ +static struct rpc_task * +nfs_async_rename(struct inode *old_dir, struct inode *new_dir, + struct dentry *old_dentry, struct dentry *new_dentry) +{ + struct nfs_renamedata *data; + struct rpc_message msg = { }; + struct rpc_task_setup task_setup_data = { + .rpc_message = &msg, + .callback_ops = &nfs_rename_ops, + .workqueue = nfsiod_workqueue, + .rpc_client = NFS_CLIENT(old_dir), + .flags = RPC_TASK_ASYNC, + }; + struct rpc_task *task; + + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) + return ERR_PTR(-ENOMEM); + task_setup_data.callback_data = data, + + data->cred = rpc_lookup_cred(); + if (IS_ERR(data->cred)) { + task = (struct rpc_task *)data->cred; + kfree(data); + return task; + } + + msg.rpc_argp = &data->args; + msg.rpc_resp = &data->res; + msg.rpc_cred = data->cred; + + /* set up nfs_renamedata */ + data->old_dir = old_dir; + atomic_inc(&old_dir->i_count); + data->new_dir = new_dir; + atomic_inc(&new_dir->i_count); + data->old_dentry = dget(old_dentry); + data->new_dentry = dget(new_dentry); + nfs_fattr_init(&data->old_fattr); + nfs_fattr_init(&data->new_fattr); + + /* set up nfs_renameargs */ + data->args.old_dir = NFS_FH(old_dir); + data->args.old_name = &old_dentry->d_name; + data->args.new_dir = NFS_FH(new_dir); + data->args.new_name = &new_dentry->d_name; + + /* set up nfs_renameres */ + data->res.old_fattr = &data->old_fattr; + data->res.new_fattr = &data->new_fattr; + + nfs_sb_active(old_dir->i_sb); + + NFS_PROTO(data->old_dir)->rename_setup(&msg, old_dir); + + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + nfs_async_rename_release(data); + + return task; +} + /** * nfs_sillyrename - Perform a silly-rename of a dentry * @dir: inode of directory that contains dentry @@ -328,8 +496,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) const int countersize = sizeof(sillycounter)*2; const int slen = sizeof(".nfs")+fileidsize+countersize-1; char silly[slen+1]; - struct qstr qsilly; struct dentry *sdentry; + struct rpc_task *task; int error = -EIO; dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", @@ -371,17 +539,27 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) goto out; } while (sdentry->d_inode != NULL); /* need negative lookup */ - qsilly.name = silly; - qsilly.len = strlen(silly); - error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, dir, &qsilly); - if (dentry->d_inode) - nfs_mark_for_revalidate(dentry->d_inode); - if (!error) { - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - d_move(dentry, sdentry); - error = nfs_async_unlink(dir, dentry); - /* If we return 0 we don't unlink */ + /* queue unlink first. Can't do this from rpc_release as it + * has to allocate memory + */ + error = nfs_async_unlink(dir, dentry); + if (error) + goto out_dput; + + /* run the rename task, undo unlink if it fails */ + task = nfs_async_rename(dir, dir, dentry, sdentry); + if (IS_ERR(task)) { + error = -EBUSY; + nfs_cancel_async_unlink(dentry); + goto out_dput; } + + /* wait for the RPC task to complete, unless a SIGKILL intervenes */ + error = rpc_wait_for_completion_task(task); + if (error == 0) + error = task->tk_status; + rpc_put_task(task); +out_dput: dput(sdentry); out: return error; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9ad132e13d12..172df83ac54b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1018,6 +1018,8 @@ struct nfs_rpc_ops { int (*unlink_done) (struct rpc_task *, struct inode *); int (*rename) (struct inode *, struct qstr *, struct inode *, struct qstr *); + void (*rename_setup) (struct rpc_message *msg, struct inode *dir); + int (*rename_done) (struct rpc_task *task, struct inode *old_dir, struct inode *new_dir); int (*link) (struct inode *, struct inode *, struct qstr *); int (*symlink) (struct inode *, struct dentry *, struct page *, unsigned int, struct iattr *); -- cgit v1.2.3 From b4687da7fc5f741af7fee9b0248a2cf2ad9c4478 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 21 Sep 2010 16:55:48 -0400 Subject: SUNRPC: Refactor logic to NUL-terminate strings in pages Clean up: Introduce a helper to '\0'-terminate XDR strings that are placed in a page in the page cache. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs2xdr.c | 6 +----- fs/nfs/nfs3xdr.c | 6 +----- fs/nfs/nfs4xdr.c | 5 +---- include/linux/sunrpc/xdr.h | 1 + net/sunrpc/xdr.c | 17 +++++++++++++++++ 5 files changed, 21 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index e15bc0306d0c..79c74387a2fe 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -596,7 +596,6 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy) struct kvec *iov = rcvbuf->head; size_t hdrlen; u32 len, recvd; - char *kaddr; int status; if ((status = ntohl(*p++))) @@ -623,10 +622,7 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy) return -EIO; } - /* NULL terminate the string we got */ - kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0); - kaddr[len+rcvbuf->page_base] = '\0'; - kunmap_atomic(kaddr, KM_USER0); + xdr_terminate_string(rcvbuf, len); return 0; } diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 89c40f8ec6e6..52b2fda66e63 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -824,7 +824,6 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) struct kvec *iov = rcvbuf->head; size_t hdrlen; u32 len, recvd; - char *kaddr; int status; status = ntohl(*p++); @@ -857,10 +856,7 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) return -EIO; } - /* NULL terminate the string we got */ - kaddr = (char*)kmap_atomic(rcvbuf->pages[0], KM_USER0); - kaddr[len+rcvbuf->page_base] = '\0'; - kunmap_atomic(kaddr, KM_USER0); + xdr_terminate_string(rcvbuf, len); return 0; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index b0bd7efe8100..86ab69eb149c 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4299,7 +4299,6 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) size_t hdrlen; u32 len, recvd; __be32 *p; - char *kaddr; int status; status = decode_op_hdr(xdr, OP_READLINK); @@ -4330,9 +4329,7 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) * and and null-terminate the text (the VFS expects * null-termination). */ - kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0); - kaddr[len+rcvbuf->page_base] = '\0'; - kunmap_atomic(kaddr, KM_USER0); + xdr_terminate_string(rcvbuf, len); return 0; out_overflow: print_overflow_msg(__func__, xdr); diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 35cf2e8cd7c6..8c1dcbb54d89 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -108,6 +108,7 @@ void xdr_encode_pages(struct xdr_buf *, struct page **, unsigned int, unsigned int); void xdr_inline_pages(struct xdr_buf *, unsigned int, struct page **, unsigned int, unsigned int); +void xdr_terminate_string(struct xdr_buf *, const u32); static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int len) { diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 3bbef7f5f826..e0725d9d8107 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -111,6 +111,23 @@ xdr_decode_string_inplace(__be32 *p, char **sp, } EXPORT_SYMBOL_GPL(xdr_decode_string_inplace); +/** + * xdr_terminate_string - '\0'-terminate a string residing in an xdr_buf + * @buf: XDR buffer where string resides + * @len: length of string, in bytes + * + */ +void +xdr_terminate_string(struct xdr_buf *buf, const u32 len) +{ + char *kaddr; + + kaddr = kmap_atomic(buf->pages[0], KM_USER0); + kaddr[buf->page_base + len] = '\0'; + kunmap_atomic(kaddr, KM_USER0); +} +EXPORT_SYMBOL(xdr_terminate_string); + void xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base, unsigned int len) -- cgit v1.2.3 From 5eebde23223aeb0ad2d9e3be6590ff8bbfab0fc2 Mon Sep 17 00:00:00 2001 From: Suresh Jayaraman Date: Thu, 23 Sep 2010 08:55:58 -0400 Subject: nfs: introduce mount option '-olocal_lock' to make locks local NFS clients since 2.6.12 support flock locks by emulating fcntl byte-range locks. Due to this, some windows applications which seem to use both flock (share mode lock mapped as flock by Samba) and fcntl locks sequentially on the same file, can't lock as they falsely assume the file is already locked. The problem was reported on a setup with windows clients accessing excel files on a Samba exported share which is originally a NFS mount from a NetApp filer. Older NFS clients (< 2.6.12) did not see this problem as flock locks were considered local. To support legacy flock behavior, this patch adds a mount option "-olocal_lock=" which can take the following values: 'none' - Neither flock locks nor POSIX locks are local 'flock' - flock locks are local 'posix' - fcntl/POSIX locks are local 'all' - Both flock locks and POSIX locks are local Testing: - This patch was tested by using -olocal_lock option with different values and the NLM calls were noted from the network packet captured. 'none' - NLM calls were seen during both flock() and fcntl(), flock lock was granted, fcntl was denied 'flock' - no NLM calls for flock(), NLM call was seen for fcntl(), granted 'posix' - NLM call was seen for flock() - granted, no NLM call for fcntl() 'all' - no NLM calls were seen during both flock() and fcntl() - No bugs were seen during NFSv4 locking/unlocking in general and NFSv4 reboot recovery. Cc: Neil Brown Signed-off-by: Suresh Jayaraman Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 6 +++-- fs/nfs/file.c | 45 +++++++++++++++++++++++++----------- fs/nfs/super.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++- include/linux/nfs_mount.h | 3 +++ 4 files changed, 97 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 4e7df2adb212..5f01f42b3991 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -635,7 +635,8 @@ static int nfs_create_rpc_client(struct nfs_client *clp, */ static void nfs_destroy_server(struct nfs_server *server) { - if (!(server->flags & NFS_MOUNT_NONLM)) + if (!(server->flags & NFS_MOUNT_LOCAL_FLOCK) || + !(server->flags & NFS_MOUNT_LOCAL_FCNTL)) nlmclnt_done(server->nlm_host); } @@ -657,7 +658,8 @@ static int nfs_start_lockd(struct nfs_server *server) if (nlm_init.nfs_version > 3) return 0; - if (server->flags & NFS_MOUNT_NONLM) + if ((server->flags & NFS_MOUNT_LOCAL_FLOCK) && + (server->flags & NFS_MOUNT_LOCAL_FCNTL)) return 0; switch (clp->cl_proto) { diff --git a/fs/nfs/file.c b/fs/nfs/file.c index eb51bd6201da..59cbe1ba0511 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -684,7 +684,8 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, return ret; } -static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) +static int +do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) { struct inode *inode = filp->f_mapping->host; int status = 0; @@ -699,7 +700,7 @@ static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) if (nfs_have_delegation(inode, FMODE_READ)) goto out_noconflict; - if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) + if (is_local) goto out_noconflict; status = NFS_PROTO(inode)->lock(filp, cmd, fl); @@ -730,7 +731,8 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl) return res; } -static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) +static int +do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) { struct inode *inode = filp->f_mapping->host; int status; @@ -745,15 +747,19 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) * If we're signalled while cleaning up locks on process exit, we * still need to complete the unlock. */ - /* Use local locking if mounted with "-onolock" */ - if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) + /* + * Use local locking if mounted with "-onolock" or with appropriate + * "-olocal_lock=" + */ + if (!is_local) status = NFS_PROTO(inode)->lock(filp, cmd, fl); else status = do_vfs_lock(filp, fl); return status; } -static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) +static int +do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) { struct inode *inode = filp->f_mapping->host; int status; @@ -766,8 +772,11 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) if (status != 0) goto out; - /* Use local locking if mounted with "-onolock" */ - if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) + /* + * Use local locking if mounted with "-onolock" or with appropriate + * "-olocal_lock=" + */ + if (!is_local) status = NFS_PROTO(inode)->lock(filp, cmd, fl); else status = do_vfs_lock(filp, fl); @@ -791,6 +800,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) { struct inode *inode = filp->f_mapping->host; int ret = -ENOLCK; + int is_local = 0; dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n", filp->f_path.dentry->d_parent->d_name.name, @@ -804,6 +814,9 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) goto out_err; + if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FCNTL) + is_local = 1; + if (NFS_PROTO(inode)->lock_check_bounds != NULL) { ret = NFS_PROTO(inode)->lock_check_bounds(fl); if (ret < 0) @@ -811,11 +824,11 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) } if (IS_GETLK(cmd)) - ret = do_getlk(filp, cmd, fl); + ret = do_getlk(filp, cmd, fl, is_local); else if (fl->fl_type == F_UNLCK) - ret = do_unlk(filp, cmd, fl); + ret = do_unlk(filp, cmd, fl, is_local); else - ret = do_setlk(filp, cmd, fl); + ret = do_setlk(filp, cmd, fl, is_local); out_err: return ret; } @@ -825,6 +838,9 @@ out_err: */ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) { + struct inode *inode = filp->f_mapping->host; + int is_local = 0; + dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n", filp->f_path.dentry->d_parent->d_name.name, filp->f_path.dentry->d_name.name, @@ -833,14 +849,17 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) if (!(fl->fl_flags & FL_FLOCK)) return -ENOLCK; + if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK) + is_local = 1; + /* We're simulating flock() locks using posix locks on the server */ fl->fl_owner = (fl_owner_t)filp; fl->fl_start = 0; fl->fl_end = OFFSET_MAX; if (fl->fl_type == F_UNLCK) - return do_unlk(filp, cmd, fl); - return do_setlk(filp, cmd, fl); + return do_unlk(filp, cmd, fl, is_local); + return do_setlk(filp, cmd, fl, is_local); } /* diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ec3966e4706b..2e866d86c220 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -100,6 +100,7 @@ enum { Opt_addr, Opt_mountaddr, Opt_clientaddr, Opt_lookupcache, Opt_fscache_uniq, + Opt_local_lock, /* Special mount options */ Opt_userspace, Opt_deprecated, Opt_sloppy, @@ -171,6 +172,7 @@ static const match_table_t nfs_mount_option_tokens = { { Opt_lookupcache, "lookupcache=%s" }, { Opt_fscache_uniq, "fsc=%s" }, + { Opt_local_lock, "local_lock=%s" }, { Opt_err, NULL } }; @@ -236,6 +238,22 @@ static match_table_t nfs_lookupcache_tokens = { { Opt_lookupcache_err, NULL } }; +enum { + Opt_local_lock_all, Opt_local_lock_flock, Opt_local_lock_posix, + Opt_local_lock_none, + + Opt_local_lock_err +}; + +static match_table_t nfs_local_lock_tokens = { + { Opt_local_lock_all, "all" }, + { Opt_local_lock_flock, "flock" }, + { Opt_local_lock_posix, "posix" }, + { Opt_local_lock_none, "none" }, + + { Opt_local_lock_err, NULL } +}; + static void nfs_umount_begin(struct super_block *); static int nfs_statfs(struct dentry *, struct kstatfs *); @@ -1009,9 +1027,13 @@ static int nfs_parse_mount_options(char *raw, break; case Opt_lock: mnt->flags &= ~NFS_MOUNT_NONLM; + mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | + NFS_MOUNT_LOCAL_FCNTL); break; case Opt_nolock: mnt->flags |= NFS_MOUNT_NONLM; + mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK | + NFS_MOUNT_LOCAL_FCNTL); break; case Opt_v2: mnt->flags &= ~NFS_MOUNT_VER3; @@ -1412,6 +1434,34 @@ static int nfs_parse_mount_options(char *raw, mnt->fscache_uniq = string; mnt->options |= NFS_OPTION_FSCACHE; break; + case Opt_local_lock: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + token = match_token(string, nfs_local_lock_tokens, + args); + kfree(string); + switch (token) { + case Opt_local_lock_all: + mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK | + NFS_MOUNT_LOCAL_FCNTL); + break; + case Opt_local_lock_flock: + mnt->flags |= NFS_MOUNT_LOCAL_FLOCK; + break; + case Opt_local_lock_posix: + mnt->flags |= NFS_MOUNT_LOCAL_FCNTL; + break; + case Opt_local_lock_none: + mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | + NFS_MOUNT_LOCAL_FCNTL); + break; + default: + dfprintk(MOUNT, "NFS: invalid " + "local_lock argument\n"); + return 0; + }; + break; /* * Special options @@ -1817,6 +1867,12 @@ static int nfs_validate_mount_data(void *options, if (!args->nfs_server.hostname) goto out_nomem; + if (!(data->flags & NFS_MOUNT_NONLM)) + args->flags &= ~(NFS_MOUNT_LOCAL_FLOCK| + NFS_MOUNT_LOCAL_FCNTL); + else + args->flags |= (NFS_MOUNT_LOCAL_FLOCK| + NFS_MOUNT_LOCAL_FCNTL); /* * The legacy version 6 binary mount data from userspace has a * field used only to transport selinux information into the @@ -2433,7 +2489,8 @@ static void nfs4_fill_super(struct super_block *sb) static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args) { - args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3); + args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3| + NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL); } static int nfs4_validate_text_mount_data(void *options, diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h index 5d59ae861aa6..576bddd72e04 100644 --- a/include/linux/nfs_mount.h +++ b/include/linux/nfs_mount.h @@ -71,4 +71,7 @@ struct nfs_mount_data { #define NFS_MOUNT_NORESVPORT 0x40000 #define NFS_MOUNT_LEGACY_INTERFACE 0x80000 +#define NFS_MOUNT_LOCAL_FLOCK 0x100000 +#define NFS_MOUNT_LOCAL_FCNTL 0x200000 + #endif -- cgit v1.2.3 From dfb4f309830359352539919f23accc59a20a3758 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Fri, 24 Sep 2010 09:17:01 -0400 Subject: NFSv4.1: keep seq_res.sr_slot as pointer rather than an index Having to explicitly initialize sr_slotid to NFS4_MAX_SLOT_TABLE resulted in numerous bugs. Keeping the current slot as a pointer to the slot table is more straight forward and robust as it's implicitly set up to NULL wherever the seq_res member is initialized to zeroes. Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 52 ++++++++++++++++++++----------------------------- fs/nfs/nfs4xdr.c | 6 ++---- fs/nfs/read.c | 1 - fs/nfs/unlink.c | 3 +-- fs/nfs/write.c | 2 -- include/linux/nfs_xdr.h | 2 +- 6 files changed, 25 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 72aa7067b85d..aee1bcc1fcc9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -334,10 +334,12 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp * Must be called while holding tbl->slot_tbl_lock */ static void -nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) +nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *free_slot) { + int free_slotid = free_slot - tbl->slots; int slotid = free_slotid; + BUG_ON(slotid < 0 || slotid >= NFS4_MAX_SLOT_TABLE); /* clear used bit in bitmap */ __clear_bit(slotid, tbl->used_slots); @@ -379,7 +381,7 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) struct nfs4_slot_table *tbl; tbl = &res->sr_session->fc_slot_table; - if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) { + if (!res->sr_slot) { /* just wake up the next guy waiting since * we may have not consumed a slot after all */ dprintk("%s: No slot\n", __func__); @@ -387,17 +389,15 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) } spin_lock(&tbl->slot_tbl_lock); - nfs4_free_slot(tbl, res->sr_slotid); + nfs4_free_slot(tbl, res->sr_slot); nfs41_check_drain_session_complete(res->sr_session); spin_unlock(&tbl->slot_tbl_lock); - res->sr_slotid = NFS4_MAX_SLOT_TABLE; + res->sr_slot = NULL; } static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) { unsigned long timestamp; - struct nfs4_slot_table *tbl; - struct nfs4_slot *slot; struct nfs_client *clp; /* @@ -410,17 +410,14 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * res->sr_status = NFS_OK; /* -ERESTARTSYS can result in skipping nfs41_sequence_setup */ - if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) + if (!res->sr_slot) goto out; - tbl = &res->sr_session->fc_slot_table; - slot = tbl->slots + res->sr_slotid; - /* Check the SEQUENCE operation status */ switch (res->sr_status) { case 0: /* Update the slot's sequence and clientid lease timer */ - ++slot->seq_nr; + ++res->sr_slot->seq_nr; timestamp = res->sr_renewal_time; clp = res->sr_session->clp; do_renew_lease(clp, timestamp); @@ -433,12 +430,14 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * * returned NFS4ERR_DELAY as per Section 2.10.6.2 * of RFC5661. */ - dprintk("%s: slot=%d seq=%d: Operation in progress\n", - __func__, res->sr_slotid, slot->seq_nr); + dprintk("%s: slot=%ld seq=%d: Operation in progress\n", + __func__, + res->sr_slot - res->sr_session->fc_slot_table.slots, + res->sr_slot->seq_nr); goto out_retry; default: /* Just update the slot sequence no. */ - ++slot->seq_nr; + ++res->sr_slot->seq_nr; } out: /* The session may be reset by one of the error handlers. */ @@ -505,10 +504,9 @@ static int nfs41_setup_sequence(struct nfs4_session *session, dprintk("--> %s\n", __func__); /* slot already allocated? */ - if (res->sr_slotid != NFS4_MAX_SLOT_TABLE) + if (res->sr_slot != NULL) return 0; - res->sr_slotid = NFS4_MAX_SLOT_TABLE; tbl = &session->fc_slot_table; spin_lock(&tbl->slot_tbl_lock); @@ -550,7 +548,7 @@ static int nfs41_setup_sequence(struct nfs4_session *session, dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr); res->sr_session = session; - res->sr_slotid = slotid; + res->sr_slot = slot; res->sr_renewal_time = jiffies; res->sr_status_flags = 0; /* @@ -576,8 +574,9 @@ int nfs4_setup_sequence(const struct nfs_server *server, goto out; } - dprintk("--> %s clp %p session %p sr_slotid %d\n", - __func__, session->clp, session, res->sr_slotid); + dprintk("--> %s clp %p session %p sr_slot %ld\n", + __func__, session->clp, session, res->sr_slot ? + res->sr_slot - session->fc_slot_table.slots : -1); ret = nfs41_setup_sequence(session, args, res, cache_reply, task); @@ -650,7 +649,7 @@ static int nfs4_call_sync_sequence(struct nfs_server *server, .callback_data = &data }; - res->sr_slotid = NFS4_MAX_SLOT_TABLE; + res->sr_slot = NULL; if (privileged) task_setup.callback_ops = &nfs41_call_priv_sync_ops; task = rpc_run_task(&task_setup); @@ -735,7 +734,6 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p) p->o_res.server = p->o_arg.server; nfs_fattr_init(&p->f_attr); nfs_fattr_init(&p->dir_attr); - p->o_res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; } static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, @@ -1975,7 +1973,6 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i calldata->res.fattr = &calldata->fattr; calldata->res.seqid = calldata->arg.seqid; calldata->res.server = server; - calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; path_get(path); calldata->path = *path; @@ -2550,7 +2547,7 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) args->bitmask = server->cache_consistency_bitmask; res->server = server; - res->seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; + res->seq_res.sr_slot = NULL; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; } @@ -2576,7 +2573,6 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir) msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; arg->bitmask = server->attr_bitmask; res->server = server; - res->seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; } static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, @@ -3646,7 +3642,6 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co memcpy(&data->stateid, stateid, sizeof(data->stateid)); data->res.fattr = &data->fattr; data->res.server = server; - data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; nfs_fattr_init(data->res.fattr); data->timestamp = jiffies; data->rpc_status = 0; @@ -3799,7 +3794,6 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, p->arg.fl = &p->fl; p->arg.seqid = seqid; p->res.seqid = seqid; - p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; p->arg.stateid = &lsp->ls_stateid; p->lsp = lsp; atomic_inc(&lsp->ls_count); @@ -3979,7 +3973,6 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; p->arg.lock_owner.id = lsp->ls_id.id; p->res.lock_seqid = p->arg.lock_seqid; - p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; p->lsp = lsp; p->server = server; atomic_inc(&lsp->ls_count); @@ -4612,7 +4605,6 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) }; int status; - res.lr_seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; dprintk("--> %s\n", __func__); task = rpc_run_task(&task_setup); @@ -5105,12 +5097,11 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_ if (!atomic_inc_not_zero(&clp->cl_count)) return ERR_PTR(-EIO); - calldata = kmalloc(sizeof(*calldata), GFP_NOFS); + calldata = kzalloc(sizeof(*calldata), GFP_NOFS); if (calldata == NULL) { nfs_put_client(clp); return ERR_PTR(-ENOMEM); } - calldata->res.sr_slotid = NFS4_MAX_SLOT_TABLE; msg.rpc_argp = &calldata->args; msg.rpc_resp = &calldata->res; calldata->clp = clp; @@ -5242,7 +5233,6 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp) goto out; calldata->clp = clp; calldata->arg.one_fs = 0; - calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; msg.rpc_argp = &calldata->arg; msg.rpc_resp = &calldata->res; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 86ab69eb149c..3feace66b981 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4665,7 +4665,6 @@ static int decode_sequence(struct xdr_stream *xdr, struct rpc_rqst *rqstp) { #if defined(CONFIG_NFS_V4_1) - struct nfs4_slot *slot; struct nfs4_sessionid id; u32 dummy; int status; @@ -4697,15 +4696,14 @@ static int decode_sequence(struct xdr_stream *xdr, goto out_overflow; /* seqid */ - slot = &res->sr_session->fc_slot_table.slots[res->sr_slotid]; dummy = be32_to_cpup(p++); - if (dummy != slot->seq_nr) { + if (dummy != res->sr_slot->seq_nr) { dprintk("%s Invalid sequence number\n", __func__); goto out_err; } /* slot id */ dummy = be32_to_cpup(p++); - if (dummy != res->sr_slotid) { + if (dummy != res->sr_slot - res->sr_session->fc_slot_table.slots) { dprintk("%s Invalid slot id\n", __func__); goto out_err; } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 87adc2744246..79859c81a943 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -46,7 +46,6 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); p->npages = pagecount; - p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; if (pagecount <= ARRAY_SIZE(p->page_array)) p->pagevec = p->page_array; else { diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 47530aacebfd..9a16bad5d2ea 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -262,7 +262,6 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry) status = PTR_ERR(data->cred); goto out_free; } - data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; data->res.dir_attr = &data->dir_attr; status = -EBUSY; @@ -427,7 +426,7 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir, .flags = RPC_TASK_ASYNC, }; - data = kmalloc(sizeof(*data), GFP_KERNEL); + data = kzalloc(sizeof(*data), GFP_KERNEL); if (data == NULL) return ERR_PTR(-ENOMEM); task_setup_data.callback_data = data, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 874972d9427c..4d6d35dd2b4b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -55,7 +55,6 @@ struct nfs_write_data *nfs_commitdata_alloc(void) if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); - p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; } return p; } @@ -75,7 +74,6 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); p->npages = pagecount; - p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; if (pagecount <= ARRAY_SIZE(p->page_array)) p->pagevec = p->page_array; else { diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 172df83ac54b..5772b2c2f063 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -170,7 +170,7 @@ struct nfs4_sequence_args { struct nfs4_sequence_res { struct nfs4_session *sr_session; - u8 sr_slotid; /* slot used to send request */ + struct nfs4_slot *sr_slot; /* slot used to send request */ int sr_status; /* sequence operation status */ unsigned long sr_renewal_time; u32 sr_status_flags; -- cgit v1.2.3 From 955a857e062642cd3ebe1dc7bb38c0f85d8f8f17 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 29 Sep 2010 15:41:49 -0400 Subject: NFS: new idmapper This patch creates a new idmapper system that uses the request-key function to place a call into userspace to map user and group ids to names. The old idmapper was single threaded, which prevented more than one request from running at a single time. This means that a user would have to wait for an upcall to finish before accessing a cached result. The upcall result is stored on a keyring of type id_resolver. See the file Documentation/filesystems/nfs/idmapper.txt for instructions. Signed-off-by: Bryan Schumaker [Trond: fix up the return value of nfs_idmap_lookup_name and clean up code] Signed-off-by: Trond Myklebust --- Documentation/filesystems/nfs/00-INDEX | 2 + Documentation/filesystems/nfs/idmapper.txt | 67 +++++++++ fs/nfs/Kconfig | 11 ++ fs/nfs/idmap.c | 211 ++++++++++++++++++++++++++++- fs/nfs/inode.c | 7 + fs/nfs/nfs4xdr.c | 4 +- fs/nfs/sysctl.c | 2 + include/linux/nfs_idmap.h | 31 ++++- 8 files changed, 329 insertions(+), 6 deletions(-) create mode 100644 Documentation/filesystems/nfs/idmapper.txt (limited to 'include') diff --git a/Documentation/filesystems/nfs/00-INDEX b/Documentation/filesystems/nfs/00-INDEX index 2f68cd688769..3225a5662114 100644 --- a/Documentation/filesystems/nfs/00-INDEX +++ b/Documentation/filesystems/nfs/00-INDEX @@ -14,3 +14,5 @@ nfsroot.txt - short guide on setting up a diskless box with NFS root filesystem. rpc-cache.txt - introduction to the caching mechanisms in the sunrpc layer. +idmapper.txt + - information for configuring request-keys to be used by idmapper diff --git a/Documentation/filesystems/nfs/idmapper.txt b/Documentation/filesystems/nfs/idmapper.txt new file mode 100644 index 000000000000..c3852041a21f --- /dev/null +++ b/Documentation/filesystems/nfs/idmapper.txt @@ -0,0 +1,67 @@ + +========= +ID Mapper +========= +Id mapper is used by NFS to translate user and group ids into names, and to +translate user and group names into ids. Part of this translation involves +performing an upcall to userspace to request the information. Id mapper will +user request-key to perform this upcall and cache the result. The program +/usr/sbin/nfs.upcall should be called by request-key, and will perform the +translation and initialize a key with the resulting information. + + NFS_USE_NEW_IDMAPPER must be selected when configuring the kernel to use this + feature. + +=========== +Configuring +=========== +The file /etc/request-key.conf will need to be modified so /sbin/request-key can +direct the upcall. The following line should be added: + +#OP TYPE DESCRIPTION CALLOUT INFO PROGRAM ARG1 ARG2 ARG3 ... +#====== ======= =============== =============== =============================== +create id_resolver * * /usr/sbin/nfs.upcall %k %d 600 + +This will direct all id_resolver requests to the program /usr/sbin/nfs.upcall. +The last parameter, 600, defines how many seconds into the future the key will +expire. This parameter is optional for /usr/sbin/nfs.upcall. When the timeout +is not specified, nfs.upcall will default to 600 seconds. + +id mapper uses for key descriptions: + uid: Find the UID for the given user + gid: Find the GID for the given group + user: Find the user name for the given UID + group: Find the group name for the given GID + +You can handle any of these individually, rather than using the generic upcall +program. If you would like to use your own program for a uid lookup then you +would edit your request-key.conf so it look similar to this: + +#OP TYPE DESCRIPTION CALLOUT INFO PROGRAM ARG1 ARG2 ARG3 ... +#====== ======= =============== =============== =============================== +create id_resolver uid:* * /some/other/program %k %d 600 +create id_resolver * * /usr/sbin/nfs.upcall %k %d 600 + +Notice that the new line was added above the line for the generic program. +request-key will find the first matching line and corresponding program. In +this case, /some/other/program will handle all uid lookups and +/usr/sbin/nfs.upcall will handle gid, user, and group lookups. + +See for more information about the +request-key function. + + +========== +nfs.upcall +========== +nfs.upcall is designed to be called by request-key, and should not be run "by +hand". This program takes two arguments, a serialized key and a key +description. The serialized key is first converted into a key_serial_t, and +then passed as an argument to keyctl_instantiate (both are part of keyutils.h). + +The actual lookups are performed by functions found in nfsidmap.h. nfs.upcall +determines the correct function to call by looking at the first part of the +description string. For example, a uid lookup description will appear as +"uid:user@domain". + +nfs.upcall will return 0 if the key was instantiated, and non-zero otherwise. diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 6c2aad49d731..3f69752d6f18 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -116,3 +116,14 @@ config NFS_USE_KERNEL_DNS select DNS_RESOLVER select KEYS default y + +config NFS_USE_NEW_IDMAPPER + bool "Use the new idmapper upcall routine" + depends on NFS_V4 && KEYS + help + Say Y here if you want NFS to use the new idmapper upcall functions. + You will need /sbin/request-key (usually provided by the keyutils + package). For details, read + . + + If you are unsure, say N. diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 21a84d45916f..dec47ed8b6b9 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -34,6 +34,212 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#ifdef CONFIG_NFS_USE_NEW_IDMAPPER + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define NFS_UINT_MAXLEN 11 + +const struct cred *id_resolver_cache; + +struct key_type key_type_id_resolver = { + .name = "id_resolver", + .instantiate = user_instantiate, + .match = user_match, + .revoke = user_revoke, + .destroy = user_destroy, + .describe = user_describe, + .read = user_read, +}; + +int nfs_idmap_init(void) +{ + struct cred *cred; + struct key *keyring; + int ret = 0; + + printk(KERN_NOTICE "Registering the %s key type\n", key_type_id_resolver.name); + + cred = prepare_kernel_cred(NULL); + if (!cred) + return -ENOMEM; + + keyring = key_alloc(&key_type_keyring, ".id_resolver", 0, 0, cred, + (KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ, + KEY_ALLOC_NOT_IN_QUOTA); + if (IS_ERR(keyring)) { + ret = PTR_ERR(keyring); + goto failed_put_cred; + } + + ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL); + if (ret < 0) + goto failed_put_key; + + ret = register_key_type(&key_type_id_resolver); + if (ret < 0) + goto failed_put_key; + + cred->thread_keyring = keyring; + cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; + id_resolver_cache = cred; + return 0; + +failed_put_key: + key_put(keyring); +failed_put_cred: + put_cred(cred); + return ret; +} + +void nfs_idmap_quit(void) +{ + key_revoke(id_resolver_cache->thread_keyring); + unregister_key_type(&key_type_id_resolver); + put_cred(id_resolver_cache); +} + +/* + * Assemble the description to pass to request_key() + * This function will allocate a new string and update dest to point + * at it. The caller is responsible for freeing dest. + * + * On error 0 is returned. Otherwise, the length of dest is returned. + */ +static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen, + const char *type, size_t typelen, char **desc) +{ + char *cp; + size_t desclen = typelen + namelen + 2; + + *desc = kmalloc(desclen, GFP_KERNEL); + if (!desc) + return -ENOMEM; + + cp = *desc; + memcpy(cp, type, typelen); + cp += typelen; + *cp++ = ':'; + + memcpy(cp, name, namelen); + cp += namelen; + *cp = '\0'; + return desclen; +} + +static ssize_t nfs_idmap_request_key(const char *name, size_t namelen, + const char *type, void *data, size_t data_size) +{ + const struct cred *saved_cred; + struct key *rkey; + char *desc; + struct user_key_payload *payload; + ssize_t ret; + + ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc); + if (ret <= 0) + goto out; + + saved_cred = override_creds(id_resolver_cache); + rkey = request_key(&key_type_id_resolver, desc, ""); + revert_creds(saved_cred); + kfree(desc); + if (IS_ERR(rkey)) { + ret = PTR_ERR(rkey); + goto out; + } + + rcu_read_lock(); + rkey->perm |= KEY_USR_VIEW; + + ret = key_validate(rkey); + if (ret < 0) + goto out_up; + + payload = rcu_dereference(rkey->payload.data); + if (IS_ERR_OR_NULL(payload)) { + ret = PTR_ERR(payload); + goto out_up; + } + + ret = payload->datalen; + if (ret > 0 && ret <= data_size) + memcpy(data, payload->data, ret); + else + ret = -EINVAL; + +out_up: + rcu_read_unlock(); + key_put(rkey); +out: + return ret; +} + + +/* ID -> Name */ +static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf, size_t buflen) +{ + char id_str[NFS_UINT_MAXLEN]; + int id_len; + ssize_t ret; + + id_len = snprintf(id_str, sizeof(id_str), "%u", id); + ret = nfs_idmap_request_key(id_str, id_len, type, buf, buflen); + if (ret < 0) + return -EINVAL; + return ret; +} + +/* Name -> ID */ +static int nfs_idmap_lookup_id(const char *name, size_t namelen, + const char *type, __u32 *id) +{ + char id_str[NFS_UINT_MAXLEN]; + long id_long; + ssize_t data_size; + int ret = 0; + + data_size = nfs_idmap_request_key(name, namelen, type, id_str, NFS_UINT_MAXLEN); + if (data_size <= 0) { + ret = -EINVAL; + } else { + ret = strict_strtol(id_str, 10, &id_long); + *id = (__u32)id_long; + } + return ret; +} + +int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) +{ + return nfs_idmap_lookup_id(name, namelen, "uid", uid); +} + +int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *gid) +{ + return nfs_idmap_lookup_id(name, namelen, "gid", gid); +} + +int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen) +{ + return nfs_idmap_lookup_name(uid, "user", buf, buflen); +} +int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t buflen) +{ + return nfs_idmap_lookup_name(gid, "group", buf, buflen); +} + +#else /* CONFIG_NFS_USE_IDMAPPER not defined */ + #include #include #include @@ -503,16 +709,17 @@ int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namele return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); } -int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf) +int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen) { struct idmap *idmap = clp->cl_idmap; return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); } -int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf) +int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen) { struct idmap *idmap = clp->cl_idmap; return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); } +#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */ diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 18be041abd23..f2d2c801e0af 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1526,6 +1526,10 @@ static int __init init_nfs_fs(void) { int err; + err = nfs_idmap_init(); + if (err < 0) + goto out9; + err = nfs_dns_resolver_init(); if (err < 0) goto out8; @@ -1590,6 +1594,8 @@ out6: out7: nfs_dns_resolver_destroy(); out8: + nfs_idmap_quit(); +out9: return err; } @@ -1602,6 +1608,7 @@ static void __exit exit_nfs_fs(void) nfs_destroy_nfspagecache(); nfs_fscache_unregister(); nfs_dns_resolver_destroy(); + nfs_idmap_quit(); #ifdef CONFIG_PROC_FS rpc_proc_unregister("nfs"); #endif diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 3feace66b981..6ea5c9392fe4 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -816,7 +816,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const if (iap->ia_valid & ATTR_MODE) len += 4; if (iap->ia_valid & ATTR_UID) { - owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name); + owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name, IDMAP_NAMESZ); if (owner_namelen < 0) { dprintk("nfs: couldn't resolve uid %d to string\n", iap->ia_uid); @@ -828,7 +828,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const len += 4 + (XDR_QUADLEN(owner_namelen) << 2); } if (iap->ia_valid & ATTR_GID) { - owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group); + owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group, IDMAP_NAMESZ); if (owner_grouplen < 0) { dprintk("nfs: couldn't resolve gid %d to string\n", iap->ia_gid); diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index ad4d2e787b20..978aaeb8a093 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c @@ -32,6 +32,7 @@ static ctl_table nfs_cb_sysctls[] = { .extra1 = (int *)&nfs_set_port_min, .extra2 = (int *)&nfs_set_port_max, }, +#ifndef CONFIG_NFS_USE_NEW_IDMAPPER { .procname = "idmap_cache_timeout", .data = &nfs_idmap_cache_timeout, @@ -39,6 +40,7 @@ static ctl_table nfs_cb_sysctls[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, +#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */ #endif { .procname = "nfs_mountpoint_timeout", diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h index 91a1c24e0cbf..e8352dc5afb5 100644 --- a/include/linux/nfs_idmap.h +++ b/include/linux/nfs_idmap.h @@ -66,13 +66,40 @@ struct idmap_msg { /* Forward declaration to make this header independent of others */ struct nfs_client; +#ifdef CONFIG_NFS_USE_NEW_IDMAPPER + +int nfs_idmap_init(void); +void nfs_idmap_quit(void); + +static inline int nfs_idmap_new(struct nfs_client *clp) +{ + return 0; +} + +static inline void nfs_idmap_delete(struct nfs_client *clp) +{ +} + +#else /* CONFIG_NFS_USE_NEW_IDMAPPER not set */ + +static inline int nfs_idmap_init(void) +{ + return 0; +} + +static inline void nfs_idmap_quit(void) +{ +} + int nfs_idmap_new(struct nfs_client *); void nfs_idmap_delete(struct nfs_client *); +#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */ + int nfs_map_name_to_uid(struct nfs_client *, const char *, size_t, __u32 *); int nfs_map_group_to_gid(struct nfs_client *, const char *, size_t, __u32 *); -int nfs_map_uid_to_name(struct nfs_client *, __u32, char *); -int nfs_map_gid_to_group(struct nfs_client *, __u32, char *); +int nfs_map_uid_to_name(struct nfs_client *, __u32, char *, size_t); +int nfs_map_gid_to_group(struct nfs_client *, __u32, char *, size_t); extern unsigned int nfs_idmap_cache_timeout; #endif /* __KERNEL__ */ -- cgit v1.2.3 From ba8e452a4fe64a51b74d43761e14d99f0666cc45 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 19 Oct 2010 19:58:49 -0400 Subject: SUNRPC: Add a helper function xdr_inline_peek We sometimes need to be able to read ahead in an xdr_stream without incrementing the current pointer position. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 1 + net/sunrpc/xdr.c | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 8c1dcbb54d89..ab91d86565fd 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -201,6 +201,7 @@ extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base, unsigned int len); extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); +extern __be32 *xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes); extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index e0725d9d8107..cd9e841e7492 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -572,6 +572,27 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) } EXPORT_SYMBOL_GPL(xdr_init_decode); +/** + * xdr_inline_peek - Allow read-ahead in the XDR data stream + * @xdr: pointer to xdr_stream struct + * @nbytes: number of bytes of data to decode + * + * Check if the input buffer is long enough to enable us to decode + * 'nbytes' more bytes of data starting at the current position. + * If so return the current pointer without updating the current + * pointer position. + */ +__be32 * xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes) +{ + __be32 *p = xdr->p; + __be32 *q = p + XDR_QUADLEN(nbytes); + + if (unlikely(q > xdr->end || q < p)) + return NULL; + return p; +} +EXPORT_SYMBOL_GPL(xdr_inline_peek); + /** * xdr_inline_decode - Retrieve non-page XDR data to decode * @xdr: pointer to xdr_stream struct -- cgit v1.2.3 From babddc72a9468884ce1a23db3c3d54b0afa299f0 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 20 Oct 2010 15:44:29 -0400 Subject: NFS: decode_dirent should use an xdr_stream Convert nfs*xdr.c to use an xdr stream in decode_dirent. This will prevent a kernel oops that has been occuring when reading a vmapped page. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 29 ++++++++++----- fs/nfs/internal.h | 6 ++-- fs/nfs/nfs2xdr.c | 39 ++++++++++++++++++--- fs/nfs/nfs3xdr.c | 93 +++++++++++++++++++++++++++++++++++++++++++++---- fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4xdr.c | 71 ++++++++++++++++++++++++++++--------- include/linux/nfs_xdr.h | 2 +- 7 files changed, 202 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index fd30f185ec01..88cbcda76856 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -171,7 +171,7 @@ struct nfs_cache_array { #define MAX_READDIR_ARRAY ((PAGE_SIZE - sizeof(struct nfs_cache_array)) / sizeof(struct nfs_cache_array_entry)) -typedef __be32 * (*decode_dirent_t)(__be32 *, struct nfs_entry *, int); +typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, int); typedef struct { struct file *file; struct page *page; @@ -357,13 +357,11 @@ error: /* Fill in an entry based on the xdr code stored in desc->page */ static -int xdr_decode(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, __be32 **ptr) +int xdr_decode(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, struct xdr_stream *stream) { - __be32 *p = *ptr; - p = desc->decode(p, entry, desc->plus); + __be32 *p = desc->decode(stream, entry, desc->plus); if (IS_ERR(p)) return PTR_ERR(p); - *ptr = p; entry->fattr->time_start = desc->timestamp; entry->fattr->gencount = desc->gencount; @@ -438,10 +436,23 @@ out: /* Perform conversion from xdr to cache array */ static void nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, - struct page *xdr_page, struct page *page) + struct page *xdr_page, struct page *page, unsigned int buflen) { + struct xdr_stream stream; + struct xdr_buf buf; __be32 *ptr = kmap(xdr_page); - while (xdr_decode(desc, entry, &ptr) == 0) { + + buf.head->iov_base = xdr_page; + buf.head->iov_len = buflen; + buf.tail->iov_len = 0; + buf.page_base = 0; + buf.page_len = 0; + buf.buflen = buf.head->iov_len; + buf.len = buf.head->iov_len; + + xdr_init_decode(&stream, &buf, ptr); + + while (xdr_decode(desc, entry, &stream) == 0) { if (nfs_readdir_add_to_array(entry, page) == -1) break; if (desc->plus == 1) @@ -458,6 +469,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct file *file = desc->file; struct nfs_cache_array *array; int status = 0; + unsigned int array_size = 1; entry.prev_cookie = 0; entry.cookie = *desc->dir_cookie; @@ -476,9 +488,10 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, goto out_release_array; do { status = nfs_readdir_xdr_filler(xdr_page, desc, &entry, file, inode); + if (status < 0) break; - nfs_readdir_page_filler(desc, &entry, xdr_page, page); + nfs_readdir_page_filler(desc, &entry, xdr_page, page, array_size * PAGE_SIZE); } while (array->eof_index < 0 && array->size < MAX_READDIR_ARRAY); put_page(xdr_page); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index c961bc92c107..74b015598a43 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -181,15 +181,15 @@ extern void nfs_destroy_directcache(void); /* nfs2xdr.c */ extern int nfs_stat_to_errno(int); extern struct rpc_procinfo nfs_procedures[]; -extern __be32 * nfs_decode_dirent(__be32 *, struct nfs_entry *, int); +extern __be32 *nfs_decode_dirent(struct xdr_stream *, struct nfs_entry *, int); /* nfs3xdr.c */ extern struct rpc_procinfo nfs3_procedures[]; -extern __be32 *nfs3_decode_dirent(__be32 *, struct nfs_entry *, int); +extern __be32 *nfs3_decode_dirent(struct xdr_stream *, struct nfs_entry *, int); /* nfs4xdr.c */ #ifdef CONFIG_NFS_V4 -extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus); +extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *entry, int plus); #endif #ifdef CONFIG_NFS_V4_1 extern const u32 nfs41_maxread_overhead; diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 79c74387a2fe..0210c752e743 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -500,25 +500,56 @@ err_unmap: goto out; } +static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) +{ + dprintk("nfs: %s: prematurely hit end of receive buffer. " + "Remaining buffer length is %tu words.\n", + func, xdr->end - xdr->p); +} + __be32 * -nfs_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) +nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, int plus) { - if (!*p++) { - if (!*p) + __be32 *p; + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + if (!ntohl(*p++)) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + if (!ntohl(*p++)) return ERR_PTR(-EAGAIN); entry->eof = 1; return ERR_PTR(-EBADCOOKIE); } + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + entry->ino = ntohl(*p++); entry->len = ntohl(*p++); + + p = xdr_inline_decode(xdr, entry->len + 4); + if (unlikely(!p)) + goto out_overflow; entry->name = (const char *) p; p += XDR_QUADLEN(entry->len); entry->prev_cookie = entry->cookie; entry->cookie = ntohl(*p++); - entry->eof = !p[0] && p[1]; + + p = xdr_inline_peek(xdr, 8); + if (p != NULL) + entry->eof = !p[0] && p[1]; + else + entry->eof = 0; return p; + +out_overflow: + print_overflow_msg(__func__, xdr); + return ERR_PTR(-EIO); } /* diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 52b2fda66e63..d562c8d9d56e 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -100,6 +100,13 @@ static const umode_t nfs_type2fmt[] = { [NF3FIFO] = S_IFIFO, }; +static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) +{ + dprintk("nfs: %s: prematurely hit end of receive buffer. " + "Remaining buffer length is %tu words.\n", + func, xdr->end - xdr->p); +} + /* * Common NFS XDR functions as inlines */ @@ -119,6 +126,29 @@ xdr_decode_fhandle(__be32 *p, struct nfs_fh *fh) return NULL; } +static inline __be32 * +xdr_decode_fhandle_stream(struct xdr_stream *xdr, struct nfs_fh *fh) +{ + __be32 *p; + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + fh->size = ntohl(*p++); + + if (fh->size <= NFS3_FHSIZE) { + p = xdr_inline_decode(xdr, fh->size); + if (unlikely(!p)) + goto out_overflow; + memcpy(fh->data, p, fh->size); + return p + XDR_QUADLEN(fh->size); + } + return NULL; + +out_overflow: + print_overflow_msg(__func__, xdr); + return ERR_PTR(-EIO); +} + /* * Encode/decode time. */ @@ -240,6 +270,26 @@ xdr_decode_post_op_attr(__be32 *p, struct nfs_fattr *fattr) return p; } +static inline __be32 * +xdr_decode_post_op_attr_stream(struct xdr_stream *xdr, struct nfs_fattr *fattr) +{ + __be32 *p; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + if (ntohl(*p++)) { + p = xdr_inline_decode(xdr, 84); + if (unlikely(!p)) + goto out_overflow; + p = xdr_decode_fattr(p, fattr); + } + return p; +out_overflow: + print_overflow_msg(__func__, xdr); + return ERR_PTR(-EIO); +} + static inline __be32 * xdr_decode_pre_op_attr(__be32 *p, struct nfs_fattr *fattr) { @@ -616,19 +666,33 @@ err_unmap: } __be32 * -nfs3_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) +nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, int plus) { + __be32 *p; struct nfs_entry old = *entry; - if (!*p++) { - if (!*p) + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + if (!ntohl(*p++)) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + if (!ntohl(*p++)) return ERR_PTR(-EAGAIN); entry->eof = 1; return ERR_PTR(-EBADCOOKIE); } + p = xdr_inline_decode(xdr, 12); + if (unlikely(!p)) + goto out_overflow; p = xdr_decode_hyper(p, &entry->ino); entry->len = ntohl(*p++); + + p = xdr_inline_decode(xdr, entry->len + 8); + if (unlikely(!p)) + goto out_overflow; entry->name = (const char *) p; p += XDR_QUADLEN(entry->len); entry->prev_cookie = entry->cookie; @@ -636,10 +700,17 @@ nfs3_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) if (plus) { entry->fattr->valid = 0; - p = xdr_decode_post_op_attr(p, entry->fattr); + p = xdr_decode_post_op_attr_stream(xdr, entry->fattr); + if (IS_ERR(p)) + goto out_overflow_exit; /* In fact, a post_op_fh3: */ + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; if (*p++) { - p = xdr_decode_fhandle(p, entry->fh); + p = xdr_decode_fhandle_stream(xdr, entry->fh); + if (IS_ERR(p)) + goto out_overflow_exit; /* Ugh -- server reply was truncated */ if (p == NULL) { dprintk("NFS: FH truncated\n"); @@ -650,8 +721,18 @@ nfs3_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) memset((u8*)(entry->fh), 0, sizeof(*entry->fh)); } - entry->eof = !p[0] && p[1]; + p = xdr_inline_peek(xdr, 8); + if (p != NULL) + entry->eof = !p[0] && p[1]; + else + entry->eof = 0; + return p; + +out_overflow: + print_overflow_msg(__func__, xdr); +out_overflow_exit: + return ERR_PTR(-EIO); } /* diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index d24a8e07b5e2..c58ea6377506 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -331,7 +331,7 @@ extern void nfs_free_seqid(struct nfs_seqid *seqid); extern const nfs4_stateid zero_stateid; /* nfs4xdr.c */ -extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus); +extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *entry, int plus); extern struct rpc_procinfo nfs4_procedures[]; struct nfs4_mount_data; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 6ea5c9392fe4..a4919e999354 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3950,13 +3950,13 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl) __be32 *p; uint32_t namelen, type; - p = xdr_inline_decode(xdr, 32); + p = xdr_inline_decode(xdr, 32); /* read 32 bytes */ if (unlikely(!p)) goto out_overflow; - p = xdr_decode_hyper(p, &offset); + p = xdr_decode_hyper(p, &offset); /* read 2 8-byte long words */ p = xdr_decode_hyper(p, &length); - type = be32_to_cpup(p++); - if (fl != NULL) { + type = be32_to_cpup(p++); /* 4 byte read */ + if (fl != NULL) { /* manipulate file lock */ fl->fl_start = (loff_t)offset; fl->fl_end = fl->fl_start + (loff_t)length - 1; if (length == ~(uint64_t)0) @@ -3966,9 +3966,9 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl) fl->fl_type = F_RDLCK; fl->fl_pid = 0; } - p = xdr_decode_hyper(p, &clientid); - namelen = be32_to_cpup(p); - p = xdr_inline_decode(xdr, namelen); + p = xdr_decode_hyper(p, &clientid); /* read 8 bytes */ + namelen = be32_to_cpup(p); /* read 4 bytes */ /* have read all 32 bytes now */ + p = xdr_inline_decode(xdr, namelen); /* variable size field */ if (likely(p)) return -NFS4ERR_DENIED; out_overflow: @@ -5755,21 +5755,33 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p, } #endif /* CONFIG_NFS_V4_1 */ -__be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) +__be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, int plus) { uint32_t bitmap[2] = {0}; uint32_t len; - - if (!*p++) { - if (!*p) + __be32 *p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + if (!ntohl(*p++)) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + if (!ntohl(*p++)) return ERR_PTR(-EAGAIN); entry->eof = 1; return ERR_PTR(-EBADCOOKIE); } + p = xdr_inline_decode(xdr, 12); + if (unlikely(!p)) + goto out_overflow; entry->prev_cookie = entry->cookie; p = xdr_decode_hyper(p, &entry->cookie); entry->len = ntohl(*p++); + + p = xdr_inline_decode(xdr, entry->len + 4); + if (unlikely(!p)) + goto out_overflow; entry->name = (const char *) p; p += XDR_QUADLEN(entry->len); @@ -5782,29 +5794,54 @@ __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) len = ntohl(*p++); /* bitmap length */ if (len-- > 0) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; bitmap[0] = ntohl(*p++); if (len-- > 0) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; bitmap[1] = ntohl(*p++); p += len; } } + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; len = XDR_QUADLEN(ntohl(*p++)); /* attribute buffer length */ if (len > 0) { if (bitmap[0] & FATTR4_WORD0_RDATTR_ERROR) { bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR; /* Ignore the return value of rdattr_error for now */ - p++; - len--; + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; } - if (bitmap[0] == 0 && bitmap[1] == FATTR4_WORD1_MOUNTED_ON_FILEID) + if (bitmap[0] == 0 && bitmap[1] == FATTR4_WORD1_MOUNTED_ON_FILEID) { + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; xdr_decode_hyper(p, &entry->ino); - else if (bitmap[0] == FATTR4_WORD0_FILEID) + } else if (bitmap[0] == FATTR4_WORD0_FILEID) { + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; xdr_decode_hyper(p, &entry->ino); - p += len; + } } - entry->eof = !p[0] && p[1]; + p = xdr_inline_peek(xdr, 8); + if (p != NULL) + entry->eof = !p[0] && p[1]; + else + entry->eof = 0; + return p; + +out_overflow: + print_overflow_msg(__func__, xdr); + return ERR_PTR(-EIO); } /* diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 5772b2c2f063..ca0e8fd7feec 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1036,7 +1036,7 @@ struct nfs_rpc_ops { int (*pathconf) (struct nfs_server *, struct nfs_fh *, struct nfs_pathconf *); int (*set_capabilities)(struct nfs_server *, struct nfs_fh *); - __be32 *(*decode_dirent)(__be32 *, struct nfs_entry *, int plus); + __be32 *(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int plus); void (*read_setup) (struct nfs_read_data *, struct rpc_message *); int (*read_done) (struct rpc_task *, struct nfs_read_data *); void (*write_setup) (struct nfs_write_data *, struct rpc_message *); -- cgit v1.2.3 From 56e4ebf877b6043c289bda32a5a7385b80c17dee Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 20 Oct 2010 15:44:37 -0400 Subject: NFS: readdir with vmapped pages We can use vmapped pages to read more information from the network at once. This will reduce the number of calls needed to complete a readdir. Signed-off-by: Bryan Schumaker [trondmy: Added #include for linux/vmalloc.h> in fs/nfs/dir.c] Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 4 +-- fs/nfs/dir.c | 66 ++++++++++++++++++++++++++++++++++++++++--------- fs/nfs/internal.h | 6 +++++ fs/nfs/nfs3proc.c | 4 +-- fs/nfs/nfs4proc.c | 8 +++--- fs/nfs/proc.c | 4 +-- include/linux/nfs_xdr.h | 2 +- 7 files changed, 71 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 5f01f42b3991..876ba8592706 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -903,8 +903,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo * server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); server->dtsize = nfs_block_size(fsinfo->dtpref, NULL); - if (server->dtsize > PAGE_CACHE_SIZE) - server->dtsize = PAGE_CACHE_SIZE; + if (server->dtsize > PAGE_CACHE_SIZE * NFS_MAX_READDIR_PAGES) + server->dtsize = PAGE_CACHE_SIZE * NFS_MAX_READDIR_PAGES; if (server->dtsize > server->rsize) server->dtsize = server->rsize; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 88cbcda76856..5d7da3ad4e85 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "delegation.h" #include "iostat.h" @@ -327,7 +328,7 @@ out: /* Fill a page with xdr information before transferring to the cache page */ static -int nfs_readdir_xdr_filler(struct page *xdr_page, nfs_readdir_descriptor_t *desc, +int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, struct file *file, struct inode *inode) { struct rpc_cred *cred = nfs_file_cred(file); @@ -337,7 +338,7 @@ int nfs_readdir_xdr_filler(struct page *xdr_page, nfs_readdir_descriptor_t *desc again: timestamp = jiffies; gencount = nfs_inc_attr_generation_counter(); - error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, entry->cookie, xdr_page, + error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, entry->cookie, pages, NFS_SERVER(inode)->dtsize, desc->plus); if (error < 0) { /* We requested READDIRPLUS, but the server doesn't grok it */ @@ -436,11 +437,11 @@ out: /* Perform conversion from xdr to cache array */ static void nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, - struct page *xdr_page, struct page *page, unsigned int buflen) + void *xdr_page, struct page *page, unsigned int buflen) { struct xdr_stream stream; struct xdr_buf buf; - __be32 *ptr = kmap(xdr_page); + __be32 *ptr = xdr_page; buf.head->iov_base = xdr_page; buf.head->iov_len = buflen; @@ -458,18 +459,59 @@ void nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *e if (desc->plus == 1) nfs_prime_dcache(desc->file->f_path.dentry, entry); } - kunmap(xdr_page); +} + +static +void nfs_readdir_free_pagearray(struct page **pages, unsigned int npages) +{ + unsigned int i; + for (i = 0; i < npages; i++) + put_page(pages[i]); +} + +static +void nfs_readdir_free_large_page(void *ptr, struct page **pages, + unsigned int npages) +{ + vm_unmap_ram(ptr, npages); + nfs_readdir_free_pagearray(pages, npages); +} + +/* + * nfs_readdir_large_page will allocate pages that must be freed with a call + * to nfs_readdir_free_large_page + */ +static +void *nfs_readdir_large_page(struct page **pages, unsigned int npages) +{ + void *ptr; + unsigned int i; + + for (i = 0; i < npages; i++) { + struct page *page = alloc_page(GFP_KERNEL); + if (page == NULL) + goto out_freepages; + pages[i] = page; + } + + ptr = vm_map_ram(pages, NFS_MAX_READDIR_PAGES, 0, PAGE_KERNEL); + if (!IS_ERR_OR_NULL(ptr)) + return ptr; +out_freepages: + nfs_readdir_free_pagearray(pages, i); + return NULL; } static int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode) { - struct page *xdr_page; + struct page *pages[NFS_MAX_READDIR_PAGES]; + void *pages_ptr = NULL; struct nfs_entry entry; struct file *file = desc->file; struct nfs_cache_array *array; int status = 0; - unsigned int array_size = 1; + unsigned int array_size = ARRAY_SIZE(pages); entry.prev_cookie = 0; entry.cookie = *desc->dir_cookie; @@ -483,18 +525,18 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, memset(array, 0, sizeof(struct nfs_cache_array)); array->eof_index = -1; - xdr_page = alloc_page(GFP_KERNEL); - if (!xdr_page) + pages_ptr = nfs_readdir_large_page(pages, array_size); + if (!pages_ptr) goto out_release_array; do { - status = nfs_readdir_xdr_filler(xdr_page, desc, &entry, file, inode); + status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode); if (status < 0) break; - nfs_readdir_page_filler(desc, &entry, xdr_page, page, array_size * PAGE_SIZE); + nfs_readdir_page_filler(desc, &entry, pages_ptr, page, array_size * PAGE_SIZE); } while (array->eof_index < 0 && array->size < MAX_READDIR_ARRAY); - put_page(xdr_page); + nfs_readdir_free_large_page(pages_ptr, pages, array_size); out_release_array: nfs_readdir_release_array(page); out: diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 74b015598a43..7b0e894d00c8 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -62,6 +62,12 @@ struct nfs_clone_mount { */ #define NFS_UNSPEC_PORT (-1) +/* + * Maximum number of pages that readdir can use for creating + * a vmapped array of pages. + */ +#define NFS_MAX_READDIR_PAGES 8 + /* * In-kernel mount arguments */ diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index f8446b38dcb4..ce939c062a52 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -630,7 +630,7 @@ out: */ static int nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, - u64 cookie, struct page *page, unsigned int count, int plus) + u64 cookie, struct page **pages, unsigned int count, int plus) { struct inode *dir = dentry->d_inode; __be32 *verf = NFS_COOKIEVERF(dir); @@ -640,7 +640,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, .verf = {verf[0], verf[1]}, .plus = plus, .count = count, - .pages = &page + .pages = pages }; struct nfs3_readdirres res = { .verf = verf, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index aa771c1af115..cb33c73206e3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2823,12 +2823,12 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, } static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, - u64 cookie, struct page *page, unsigned int count, int plus) + u64 cookie, struct page **pages, unsigned int count, int plus) { struct inode *dir = dentry->d_inode; struct nfs4_readdir_arg args = { .fh = NFS_FH(dir), - .pages = &page, + .pages = pages, .pgbase = 0, .count = count, .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask, @@ -2859,14 +2859,14 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, } static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, - u64 cookie, struct page *page, unsigned int count, int plus) + u64 cookie, struct page **pages, unsigned int count, int plus) { struct nfs4_exception exception = { }; int err; do { err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode), _nfs4_proc_readdir(dentry, cred, cookie, - page, count, plus), + pages, count, plus), &exception); } while (exception.retry); return err; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index e5e84aa2af17..58e7f84fc1fd 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -534,14 +534,14 @@ nfs_proc_rmdir(struct inode *dir, struct qstr *name) */ static int nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, - u64 cookie, struct page *page, unsigned int count, int plus) + u64 cookie, struct page **pages, unsigned int count, int plus) { struct inode *dir = dentry->d_inode; struct nfs_readdirargs arg = { .fh = NFS_FH(dir), .cookie = cookie, .count = count, - .pages = &page, + .pages = pages, }; struct rpc_message msg = { .rpc_proc = &nfs_procedures[NFSPROC_READDIR], diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index ca0e8fd7feec..1b9a17a1f235 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1026,7 +1026,7 @@ struct nfs_rpc_ops { int (*mkdir) (struct inode *, struct dentry *, struct iattr *); int (*rmdir) (struct inode *, struct qstr *); int (*readdir) (struct dentry *, struct rpc_cred *, - u64, struct page *, unsigned int, int); + u64, struct page **, unsigned int, int); int (*mknod) (struct inode *, struct dentry *, struct iattr *, dev_t); int (*statfs) (struct nfs_server *, struct nfs_fh *, -- cgit v1.2.3 From 82f2e5472e2304e531c2fa85e457f4a71070044e Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 21 Oct 2010 16:33:18 -0400 Subject: NFS: Readdir plus in v4 By requsting more attributes during a readdir, we can mimic the readdir plus operation that was in NFSv3. To test, I ran the command `ls -lU --color=none` on directories with various numbers of files. Without readdir plus, I see this: n files | 100 | 1,000 | 10,000 | 100,000 | 1,000,000 --------+-----------+-----------+-----------+-----------+---------- real | 0m00.153s | 0m00.589s | 0m05.601s | 0m56.691s | 9m59.128s user | 0m00.007s | 0m00.007s | 0m00.077s | 0m00.703s | 0m06.800s sys | 0m00.010s | 0m00.070s | 0m00.633s | 0m06.423s | 1m10.005s access | 3 | 1 | 1 | 4 | 31 getattr | 2 | 1 | 1 | 1 | 1 lookup | 104 | 1,003 | 10,003 | 100,003 | 1,000,003 readdir | 2 | 16 | 158 | 1,575 | 15,749 total | 111 | 1,021 | 10,163 | 101,583 | 1,015,784 With readdir plus enabled, I see this: n files | 100 | 1,000 | 10,000 | 100,000 | 1,000,000 --------+-----------+-----------+-----------+-----------+---------- real | 0m00.115s | 0m00.206s | 0m01.079s | 0m12.521s | 2m07.528s user | 0m00.003s | 0m00.003s | 0m00.040s | 0m00.290s | 0m03.296s sys | 0m00.007s | 0m00.020s | 0m00.120s | 0m01.357s | 0m17.556s access | 3 | 1 | 1 | 1 | 7 getattr | 2 | 1 | 1 | 1 | 1 lookup | 4 | 3 | 3 | 3 | 3 readdir | 6 | 62 | 630 | 6,300 | 62,993 total | 15 | 67 | 635 | 6,305 | 63,004 Readdir plus disabled has about a 16x increase in the number of rpc calls and is 4 - 5 times slower on large directories. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 5 +++-- fs/nfs/dir.c | 4 ++-- fs/nfs/internal.h | 6 +++--- fs/nfs/nfs2xdr.c | 2 +- fs/nfs/nfs3xdr.c | 2 +- fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 1 + fs/nfs/nfs4xdr.c | 55 ++++++++++++++++++++++++------------------------- include/linux/nfs_xdr.h | 3 ++- 9 files changed, 41 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 876ba8592706..da2f2f024a4d 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1358,8 +1358,9 @@ static int nfs4_init_server(struct nfs_server *server, /* Initialise the client representation from the mount data */ server->flags = data->flags; - server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR| - NFS_CAP_POSIX_LOCK; + server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|NFS_CAP_POSIX_LOCK; + if (!(data->flags & NFS_MOUNT_NORDIRPLUS)) + server->caps |= NFS_CAP_READDIRPLUS; server->options = data->options; /* Get a client record */ diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 0cbb714a09d8..2a768d05a534 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -172,7 +172,7 @@ struct nfs_cache_array { #define MAX_READDIR_ARRAY ((PAGE_SIZE - sizeof(struct nfs_cache_array)) / sizeof(struct nfs_cache_array_entry)) -typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, int); +typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int); typedef struct { struct file *file; struct page *page; @@ -360,7 +360,7 @@ error: static int xdr_decode(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, struct xdr_stream *stream) { - __be32 *p = desc->decode(stream, entry, desc->plus); + __be32 *p = desc->decode(stream, entry, NFS_SERVER(desc->file->f_path.dentry->d_inode), desc->plus); if (IS_ERR(p)) return PTR_ERR(p); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 7b0e894d00c8..db08ff3ff454 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -187,15 +187,15 @@ extern void nfs_destroy_directcache(void); /* nfs2xdr.c */ extern int nfs_stat_to_errno(int); extern struct rpc_procinfo nfs_procedures[]; -extern __be32 *nfs_decode_dirent(struct xdr_stream *, struct nfs_entry *, int); +extern __be32 *nfs_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int); /* nfs3xdr.c */ extern struct rpc_procinfo nfs3_procedures[]; -extern __be32 *nfs3_decode_dirent(struct xdr_stream *, struct nfs_entry *, int); +extern __be32 *nfs3_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int); /* nfs4xdr.c */ #ifdef CONFIG_NFS_V4 -extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *entry, int plus); +extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int); #endif #ifdef CONFIG_NFS_V4_1 extern const u32 nfs41_maxread_overhead; diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 82f026422424..e6bf45710cc7 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -454,7 +454,7 @@ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) } __be32 * -nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, int plus) +nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_server *server, int plus) { __be32 *p; p = xdr_inline_decode(xdr, 4); diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index dc98eb7976c3..31a44df40aea 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -590,7 +590,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res } __be32 * -nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, int plus) +nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_server *server, int plus) { __be32 *p; struct nfs_entry old = *entry; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index c58ea6377506..9fa496387fdf 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -331,7 +331,7 @@ extern void nfs_free_seqid(struct nfs_seqid *seqid); extern const nfs4_stateid zero_stateid; /* nfs4xdr.c */ -extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *entry, int plus); +extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int); extern struct rpc_procinfo nfs4_procedures[]; struct nfs4_mount_data; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cb33c73206e3..f5ab216e8870 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2832,6 +2832,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, .pgbase = 0, .count = count, .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask, + .plus = plus, }; struct nfs4_readdir_res res; struct rpc_message msg = { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index b7eff205d3d8..ccfb1c92b262 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1385,12 +1385,20 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) { - uint32_t attrs[2] = { - FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID, - FATTR4_WORD1_MOUNTED_ON_FILEID, - }; + uint32_t attrs[2] = {0, 0}; __be32 *p; + if (readdir->plus) { + attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE| + FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE; + attrs[1] |= FATTR4_WORD1_MODE|FATTR4_WORD1_NUMLINKS|FATTR4_WORD1_OWNER| + FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV| + FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS| + FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; + } + attrs[0] |= FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID; + attrs[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID; + p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20); *p++ = cpu_to_be32(OP_READDIR); p = xdr_encode_hyper(p, readdir->cookie); @@ -1398,11 +1406,15 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *p++ = cpu_to_be32(readdir->count >> 1); /* We're not doing readdirplus */ *p++ = cpu_to_be32(readdir->count); *p++ = cpu_to_be32(2); - /* Switch to mounted_on_fileid if the server supports it */ - if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) - attrs[0] &= ~FATTR4_WORD0_FILEID; - else - attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; + + if (!readdir->plus) { + /* Switch to mounted_on_fileid if the server supports it */ + if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) + attrs[0] &= ~FATTR4_WORD0_FILEID; + else + attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; + } + *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]); *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]); hdr->nops++; @@ -5768,7 +5780,8 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p, } #endif /* CONFIG_NFS_V4_1 */ -__be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, int plus) +__be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, + struct nfs_server *server, int plus) { uint32_t bitmap[2] = {0}; uint32_t len; @@ -5824,24 +5837,10 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, int goto out_overflow; len = XDR_QUADLEN(ntohl(*p++)); /* attribute buffer length */ if (len > 0) { - if (bitmap[0] & FATTR4_WORD0_RDATTR_ERROR) { - bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR; - /* Ignore the return value of rdattr_error for now */ - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - goto out_overflow; - } - if (bitmap[0] == 0 && bitmap[1] == FATTR4_WORD1_MOUNTED_ON_FILEID) { - p = xdr_inline_decode(xdr, 8); - if (unlikely(!p)) - goto out_overflow; - xdr_decode_hyper(p, &entry->ino); - } else if (bitmap[0] == FATTR4_WORD0_FILEID) { - p = xdr_inline_decode(xdr, 8); - if (unlikely(!p)) - goto out_overflow; - xdr_decode_hyper(p, &entry->ino); - } + if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, server, 1) < 0) + goto out_overflow; + if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID) + entry->ino = entry->fattr->fileid; } p = xdr_inline_peek(xdr, 8); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 1b9a17a1f235..efe2eab8ac94 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -778,6 +778,7 @@ struct nfs4_readdir_arg { struct page ** pages; /* zero-copy data */ unsigned int pgbase; /* zero-copy data */ const u32 * bitmask; + int plus; struct nfs4_sequence_args seq_args; }; @@ -1036,7 +1037,7 @@ struct nfs_rpc_ops { int (*pathconf) (struct nfs_server *, struct nfs_fh *, struct nfs_pathconf *); int (*set_capabilities)(struct nfs_server *, struct nfs_fh *); - __be32 *(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int plus); + __be32 *(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int plus); void (*read_setup) (struct nfs_read_data *, struct rpc_message *); int (*read_done) (struct rpc_task *, struct nfs_read_data *); void (*write_setup) (struct nfs_write_data *, struct rpc_message *); -- cgit v1.2.3 From 6b96724e507fecc3e6440e86426fe4f44359ed66 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Tue, 12 Oct 2010 16:30:05 -0700 Subject: Revalidate caches on lock Instead of blindly zapping the caches, attempt to revalidate them if the server has indicated that it uses high resolution timestamps. NFSv4 should be able to always revalidate the cache since the protocol requires the update of the change attribute on modification of the data. In reality, there are servers (the Linux NFS server for example) that do not obey this requirement and use ctime as the basis for change attribute. Long term, the server needs to be fixed. At this time, and to be on the safe side, continue zapping caches if the server indicates that it does not have a high resolution timestamp. Signed-off-by: Ricardo Labiaga Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 2 ++ fs/nfs/file.c | 19 ++++++++++++++++--- fs/nfs/nfs3xdr.c | 3 ++- include/linux/nfs_fs_sb.h | 1 + include/linux/nfs_xdr.h | 1 + 5 files changed, 22 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index da2f2f024a4d..a63bce8d0596 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -915,6 +915,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo * server->maxfilesize = fsinfo->maxfilesize; + server->time_delta = fsinfo->time_delta; + /* We're airborne Set socket buffersize */ rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 39672b731736..c3f2477c16c1 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -757,6 +757,11 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) return status; } +static int +is_time_granular(struct timespec *ts) { + return ((ts->tv_sec == 0) && (ts->tv_nsec <= 1000)); +} + static int do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) { @@ -781,13 +786,21 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) status = do_vfs_lock(filp, fl); if (status < 0) goto out; + /* - * Make sure we clear the cache whenever we try to get the lock. + * Revalidate the cache if the server has time stamps granular + * enough to detect subsecond changes. Otherwise, clear the + * cache to prevent missing any changes. + * * This makes locking act as a cache coherency point. */ nfs_sync_mapping(filp->f_mapping); - if (!nfs_have_delegation(inode, FMODE_READ)) - nfs_zap_caches(inode); + if (!nfs_have_delegation(inode, FMODE_READ)) { + if (is_time_granular(&NFS_SERVER(inode)->time_delta)) + __nfs_revalidate_inode(NFS_SERVER(inode), inode); + else + nfs_zap_caches(inode); + } out: return status; } diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 31a44df40aea..d9a5e832c257 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1044,8 +1044,9 @@ nfs3_xdr_fsinfores(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *res) res->wtmult = ntohl(*p++); res->dtpref = ntohl(*p++); p = xdr_decode_hyper(p, &res->maxfilesize); + p = xdr_decode_time3(p, &res->time_delta); - /* ignore time_delta and properties */ + /* ignore properties */ res->lease_time = 0; return 0; } diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index c82ee7cd6288..5eef862ec187 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -124,6 +124,7 @@ struct nfs_server { struct nfs_fsid fsid; __u64 maxfilesize; /* maximum file size */ + struct timespec time_delta; /* smallest time granularity */ unsigned long mount_time; /* when this fs was mounted */ dev_t s_dev; /* superblock dev numbers */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index efe2eab8ac94..da7a1300dc60 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -112,6 +112,7 @@ struct nfs_fsinfo { __u32 wtmult; /* writes should be multiple of this */ __u32 dtpref; /* pref. readdir transfer size */ __u64 maxfilesize; + struct timespec time_delta; /* server time granularity */ __u32 lease_time; /* in seconds */ }; -- cgit v1.2.3