From 0eb43812c0270ee3d005ff32f91f7d0a6c4943af Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 26 Aug 2022 19:44:44 -0400 Subject: NFS: Clear the file access cache upon login POSIX typically only refreshes the user's supplementary group information upon login. Since NFS servers may often refresh their concept of the user supplementary group membership at their own cadence, it is possible for the NFS client's access cache to become stale due to the user's group membership changing on the server after the user has already logged in on the client. While it is reasonable to expect that such group membership changes are rare, and that we do not want to optimise the cache to accommodate them, it is also not unreasonable for the user to expect that if they log out and log back in again, that the staleness would clear up. Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f594dac436a7..bccb04af38ae 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2948,9 +2948,28 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, co return NULL; } +static u64 nfs_access_login_time(const struct task_struct *task, + const struct cred *cred) +{ + const struct task_struct *parent; + u64 ret; + + rcu_read_lock(); + for (;;) { + parent = rcu_dereference(task->real_parent); + if (parent == task || cred_fscmp(parent->cred, cred) != 0) + break; + task = parent; + } + ret = task->start_time; + rcu_read_unlock(); + return ret; +} + static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, u32 *mask, bool may_block) { struct nfs_inode *nfsi = NFS_I(inode); + u64 login_time = nfs_access_login_time(current, cred); struct nfs_access_entry *cache; bool retry = true; int err; @@ -2978,6 +2997,9 @@ static int nfs_access_get_cached_locked(struct inode *inode, const struct cred * spin_lock(&inode->i_lock); retry = false; } + err = -ENOENT; + if ((s64)(login_time - cache->timestamp) > 0) + goto out; *mask = cache->mask; list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru); err = 0; @@ -3057,6 +3079,7 @@ static void nfs_access_add_rbtree(struct inode *inode, else goto found; } + set->timestamp = ktime_get_ns(); rb_link_node(&set->rb_node, parent, p); rb_insert_color(&set->rb_node, root_node); list_add_tail(&set->lru, &nfsi->access_cache_entry_lru); -- cgit v1.2.3 From eef7314caf2d73a94b68ba293cd105154d3a664e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2022 16:44:47 -0400 Subject: NFSv4.2: Clear FATTR4_WORD2_SECURITY_LABEL when done decoding We need to clear the FATTR4_WORD2_SECURITY_LABEL bitmap flag irrespective of whether or not the label is too long. Fixes: aa9c2669626c ("NFS: Client implementation of Labeled-NFS") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index acfe5f4bda48..8c5298e37f0f 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4234,6 +4234,7 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap, p = xdr_inline_decode(xdr, len); if (unlikely(!p)) return -EIO; + bitmap[2] &= ~FATTR4_WORD2_SECURITY_LABEL; if (len < NFS4_MAXLABELLEN) { if (label) { if (label->len) { @@ -4246,7 +4247,6 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap, label->lfs = lfs; status = NFS_ATTR_FATTR_V4_SECURITY_LABEL; } - bitmap[2] &= ~FATTR4_WORD2_SECURITY_LABEL; } else printk(KERN_WARNING "%s: label too long (%u)!\n", __func__, len); -- cgit v1.2.3 From c8a62f440229ae7a10874776344dfcc17d860336 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2022 17:41:30 -0400 Subject: NFSv4.2: Always decode the security label If the server returns a reply that includes a security label, then we must decode it whether or not we can store the results. Fixes: 1e2f67da8931 ("NFS: Remove the nfs4_label argument from decode_getattr_*() functions") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 8c5298e37f0f..9103e022376a 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4755,12 +4755,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, if (status < 0) goto xdr_error; - if (fattr->label) { - status = decode_attr_security_label(xdr, bitmap, fattr->label); - if (status < 0) - goto xdr_error; - fattr->valid |= status; - } + status = decode_attr_security_label(xdr, bitmap, fattr->label); + if (status < 0) + goto xdr_error; + fattr->valid |= status; xdr_error: dprintk("%s: xdr returned %d\n", __func__, -status); -- cgit v1.2.3 From 43c1031f7110967c240cb6e922adcfc4b8899183 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2022 18:21:14 -0400 Subject: NFSv4.2: Fix a memory stomp in decode_attr_security_label We must not change the value of label->len if it is zero, since that indicates we stored a label. Fixes: b4487b935452 ("nfs: Fix getxattr kernel panic and memory overflow") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 9103e022376a..deec76cf5afe 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4236,12 +4236,10 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap, return -EIO; bitmap[2] &= ~FATTR4_WORD2_SECURITY_LABEL; if (len < NFS4_MAXLABELLEN) { - if (label) { - if (label->len) { - if (label->len < len) - return -ERANGE; - memcpy(label->label, p, len); - } + if (label && label->len) { + if (label->len < len) + return -ERANGE; + memcpy(label->label, p, len); label->len = len; label->pi = pi; label->lfs = lfs; -- cgit v1.2.3 From c528f70f504434eaff993a5ddd52203a2010d51f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 19 Oct 2022 13:12:11 -0400 Subject: NFSv4.2: Fix initialisation of struct nfs4_label The call to nfs4_label_init_security() should return a fully initialised label. Fixes: aa9c2669626c ("NFS: Client implementation of Labeled-NFS") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 86ed5c0142c3..98a867092039 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -122,6 +122,11 @@ nfs4_label_init_security(struct inode *dir, struct dentry *dentry, if (nfs_server_capable(dir, NFS_CAP_SECURITY_LABEL) == 0) return NULL; + label->lfs = 0; + label->pi = 0; + label->len = 0; + label->label = NULL; + err = security_dentry_init_security(dentry, sattr->ia_mode, &dentry->d_name, NULL, (void **)&label->label, &label->len); @@ -3796,7 +3801,7 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags, struct iattr *attr, int *opened) { struct nfs4_state *state; - struct nfs4_label l = {0, 0, 0, NULL}, *label = NULL; + struct nfs4_label l, *label; label = nfs4_label_init_security(dir, ctx->dentry, attr, &l); @@ -4682,7 +4687,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, int flags) { struct nfs_server *server = NFS_SERVER(dir); - struct nfs4_label l, *ilabel = NULL; + struct nfs4_label l, *ilabel; struct nfs_open_context *ctx; struct nfs4_state *state; int status = 0; @@ -5033,7 +5038,7 @@ static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, struct nfs4_exception exception = { .interruptible = true, }; - struct nfs4_label l, *label = NULL; + struct nfs4_label l, *label; int err; label = nfs4_label_init_security(dir, dentry, sattr, &l); @@ -5074,7 +5079,7 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, struct nfs4_exception exception = { .interruptible = true, }; - struct nfs4_label l, *label = NULL; + struct nfs4_label l, *label; int err; label = nfs4_label_init_security(dir, dentry, sattr, &l); @@ -5193,7 +5198,7 @@ static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, struct nfs4_exception exception = { .interruptible = true, }; - struct nfs4_label l, *label = NULL; + struct nfs4_label l, *label; int err; label = nfs4_label_init_security(dir, dentry, sattr, &l); -- cgit v1.2.3 From 85aa8ddc3818718208c3cfdfda9c8c908c9dead1 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Tue, 20 Sep 2022 13:00:21 -0400 Subject: NFS: Trigger the "ls -l" readdir heuristic sooner Since commit 1a34c8c9a49e ("NFS: Support larger readdir buffers") has updated dtsize, and with recent improvements to the READDIRPLUS helper heuristic, the heuristic may not trigger until many dentries are emitted to userspace. This will cause many thousands of GETATTR calls for "ls -l" when the directory's pagecache has already been populated. This manifests as poor performance for long directory listings after an initially fast "ls -l". Fix this by emitting only 17 entries for any first pass through the NFS directory's ->iterate_shared(), which allows userpace to prime the counters for the heuristic. Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index bccb04af38ae..ea1ceffa1d3a 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1074,6 +1074,8 @@ static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc) return res; } +#define NFS_READDIR_CACHE_MISS_THRESHOLD (16UL) + /* * Once we've found the start of the dirent within a page: fill 'er up... */ @@ -1083,6 +1085,7 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc, struct file *file = desc->file; struct nfs_cache_array *array; unsigned int i; + bool first_emit = !desc->dir_cookie; array = kmap_local_page(desc->page); for (i = desc->cache_entry_index; i < array->size; i++) { @@ -1106,6 +1109,10 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc, desc->ctx->pos = desc->dir_cookie; else desc->ctx->pos++; + if (first_emit && i > NFS_READDIR_CACHE_MISS_THRESHOLD + 1) { + desc->eob = true; + break; + } } if (array->page_is_eof) desc->eof = !desc->eob; @@ -1187,8 +1194,6 @@ out: return status; } -#define NFS_READDIR_CACHE_MISS_THRESHOLD (16UL) - static bool nfs_readdir_handle_cache_misses(struct inode *inode, struct nfs_readdir_descriptor *desc, unsigned int cache_misses, -- cgit v1.2.3 From e83458fce080dc23c25353a1af90bfecf79c7369 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Oct 2022 16:50:12 -0400 Subject: NFSv4: Fix a credential leak in _nfs4_discover_trunking() Fixes: 4f40a5b55446 ("NFSv4: Add an fattr allocation to _nfs4_discover_trunking()") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 98a867092039..bd89c7f06952 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4018,7 +4018,7 @@ static int _nfs4_discover_trunking(struct nfs_server *server, page = alloc_page(GFP_KERNEL); if (!page) - return -ENOMEM; + goto out_put_cred; locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL); if (!locations) goto out_free; @@ -4040,6 +4040,8 @@ out_free_2: kfree(locations); out_free: __free_page(page); +out_put_cred: + put_cred(cred); return status; } -- cgit v1.2.3 From 51069e4aef6257b0454057359faed0ab0c9af083 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 4 Nov 2022 13:20:01 -0400 Subject: NFSv4: Fix a deadlock between nfs4_open_recover_helper() and delegreturn If we're asked to recover open state while a delegation return is outstanding, then the state manager thread cannot use a cached open, so if the server returns a delegation, we can end up deadlocked behind the pending delegreturn. To avoid this problem, let's just ask the server not to give us a delegation unless we're explicitly reclaiming one. Fixes: be36e185bd26 ("NFSv4: nfs4_open_recover_helper() must set share access") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index bd89c7f06952..e51044a5f550 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2131,18 +2131,18 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context } static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, - fmode_t fmode) + fmode_t fmode) { struct nfs4_state *newstate; + struct nfs_server *server = NFS_SB(opendata->dentry->d_sb); + int openflags = opendata->o_arg.open_flags; int ret; if (!nfs4_mode_match_open_stateid(opendata->state, fmode)) return 0; - opendata->o_arg.open_flags = 0; opendata->o_arg.fmode = fmode; - opendata->o_arg.share_access = nfs4_map_atomic_open_share( - NFS_SB(opendata->dentry->d_sb), - fmode, 0); + opendata->o_arg.share_access = + nfs4_map_atomic_open_share(server, fmode, openflags); memset(&opendata->o_res, 0, sizeof(opendata->o_res)); memset(&opendata->c_res, 0, sizeof(opendata->c_res)); nfs4_init_opendata_res(opendata); @@ -2724,10 +2724,15 @@ static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *s struct nfs4_opendata *opendata; int ret; - opendata = nfs4_open_recoverdata_alloc(ctx, state, - NFS4_OPEN_CLAIM_FH); + opendata = nfs4_open_recoverdata_alloc(ctx, state, NFS4_OPEN_CLAIM_FH); if (IS_ERR(opendata)) return PTR_ERR(opendata); + /* + * We're not recovering a delegation, so ask for no delegation. + * Otherwise the recovery thread could deadlock with an outstanding + * delegation return. + */ + opendata->o_arg.open_flags = O_DIRECT; ret = nfs4_open_recover(opendata, state); if (ret == -ESTALE) d_drop(ctx->dentry); -- cgit v1.2.3 From 35e3b6ae84935d0d7ff76cbdaa83411b0ad5e471 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 14 Nov 2022 17:30:39 -0500 Subject: NFS: Fix an Oops in nfs_d_automount() When mounting from a NFSv4 referral, path->dentry can end up being a negative dentry, so derive the struct nfs_server from the dentry itself instead. Fixes: 2b0143b5c986 ("VFS: normal filesystems (and lustre): d_inode() annotations") Signed-off-by: Trond Myklebust --- fs/nfs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 2f336ace7555..88a23af2bd5c 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -147,7 +147,7 @@ struct vfsmount *nfs_d_automount(struct path *path) struct nfs_fs_context *ctx; struct fs_context *fc; struct vfsmount *mnt = ERR_PTR(-ENOMEM); - struct nfs_server *server = NFS_SERVER(d_inode(path->dentry)); + struct nfs_server *server = NFS_SB(path->dentry->d_sb); struct nfs_client *client = server->nfs_client; int timeout = READ_ONCE(nfs_mountpoint_expiry_timeout); int ret; -- cgit v1.2.3 From 5776a9cd2a66cdc037c8e6ee34cde40ee768893d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 16 Nov 2022 11:11:53 -0500 Subject: NFS: Fix a race in nfs_call_unlink() We should check that the filehandles match before transferring the sillyrename data to the newly looked-up dentry in case the name was reused on the server. Signed-off-by: Trond Myklebust --- fs/nfs/unlink.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs') diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 9697cd5d2561..150a953a8be9 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -139,6 +139,7 @@ static int nfs_call_unlink(struct dentry *dentry, struct inode *inode, struct nf */ spin_lock(&alias->d_lock); if (d_really_is_positive(alias) && + !nfs_compare_fh(NFS_FH(inode), NFS_FH(d_inode(alias))) && !(alias->d_flags & DCACHE_NFSFS_RENAMED)) { devname_garbage = alias->d_fsdata; alias->d_fsdata = data; -- cgit v1.2.3 From d01c6ed6db38a4db2921591e4f5425bee1931aca Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 Dec 2022 14:27:05 -0500 Subject: NFS4.x/pnfs: Fix up logging of layout stateids If the layout is invalid, then just log a '0' value. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4trace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 2cff5901c689..e3fbdc8a98eb 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -1815,7 +1815,7 @@ TRACE_EVENT(pnfs_update_layout, __entry->count = count; __entry->iomode = iomode; __entry->reason = reason; - if (lo != NULL) { + if (lo != NULL && pnfs_layout_is_valid(lo)) { __entry->layoutstateid_seq = be32_to_cpu(lo->plh_stateid.seqid); __entry->layoutstateid_hash = @@ -1869,7 +1869,7 @@ DECLARE_EVENT_CLASS(pnfs_layout_event, __entry->pos = pos; __entry->count = count; __entry->iomode = iomode; - if (lo != NULL) { + if (lo != NULL && pnfs_layout_is_valid(lo)) { __entry->layoutstateid_seq = be32_to_cpu(lo->plh_stateid.seqid); __entry->layoutstateid_hash = -- cgit v1.2.3 From 6f1c1d95dc93b52a8ef9cc1f3f610c2d5e6b217b Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Fri, 23 Sep 2022 13:40:14 +0800 Subject: NFS: make sure open context mode have FMODE_EXEC when file open for exec Because file f_mode never have FMODE_EXEC, open context mode won't get FMODE_EXEC from file f_mode. Open context mode only care about FMODE_READ/ FMODE_WRITE/FMODE_EXEC, and all info about open context mode can be convert from file f_flags, so convert file f_flags to open context mode by flags_to_mode(). Signed-off-by: ChenXiaoSong Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 3 ++- fs/nfs/nfs4file.c | 12 ++++-------- 2 files changed, 6 insertions(+), 9 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6b2cfa59a1a2..e98ee7599eeb 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1168,7 +1168,8 @@ int nfs_open(struct inode *inode, struct file *filp) { struct nfs_open_context *ctx; - ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode, filp); + ctx = alloc_nfs_open_context(file_dentry(filp), + flags_to_mode(filp->f_flags), filp); if (IS_ERR(ctx)) return PTR_ERR(ctx); nfs_file_set_open_context(filp, ctx); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 9eb181287879..2563ed8580f3 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -32,7 +32,6 @@ nfs4_file_open(struct inode *inode, struct file *filp) struct dentry *parent = NULL; struct inode *dir; unsigned openflags = filp->f_flags; - fmode_t f_mode; struct iattr attr; int err; @@ -51,17 +50,14 @@ nfs4_file_open(struct inode *inode, struct file *filp) if (err) return err; - f_mode = filp->f_mode; - if ((openflags & O_ACCMODE) == 3) - f_mode |= flags_to_mode(openflags); - /* We can't create new files here */ openflags &= ~(O_CREAT|O_EXCL); parent = dget_parent(dentry); dir = d_inode(parent); - ctx = alloc_nfs_open_context(file_dentry(filp), f_mode, filp); + ctx = alloc_nfs_open_context(file_dentry(filp), + flags_to_mode(openflags), filp); err = PTR_ERR(ctx); if (IS_ERR(ctx)) goto out; @@ -366,8 +362,8 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, goto out_free_name; } - ctx = alloc_nfs_open_context(filep->f_path.dentry, filep->f_mode, - filep); + ctx = alloc_nfs_open_context(filep->f_path.dentry, + flags_to_mode(filep->f_flags), filep); if (IS_ERR(ctx)) { res = ERR_CAST(ctx); goto out_filep; -- cgit v1.2.3 From d564d2c4c2445cb0972453933dc87c2dcaac8597 Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Fri, 23 Sep 2022 13:40:15 +0800 Subject: NFSv4: check FMODE_EXEC from open context mode in nfs4_opendata_access() After converting file f_flags to open context mode by flags_to_mode(), open context mode will have FMODE_EXEC when file open for exec, so we check FMODE_EXEC from open context mode. No functional change, just simplify the code. Signed-off-by: ChenXiaoSong Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e51044a5f550..44f1dcb6020f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2630,8 +2630,7 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data) */ static int nfs4_opendata_access(const struct cred *cred, struct nfs4_opendata *opendata, - struct nfs4_state *state, fmode_t fmode, - int openflags) + struct nfs4_state *state, fmode_t fmode) { struct nfs_access_entry cache; u32 mask, flags; @@ -2642,11 +2641,7 @@ static int nfs4_opendata_access(const struct cred *cred, return 0; mask = 0; - /* - * Use openflags to check for exec, because fmode won't - * always have FMODE_EXEC set when file open for exec. - */ - if (openflags & __FMODE_EXEC) { + if (fmode & FMODE_EXEC) { /* ONLY check for exec rights */ if (S_ISDIR(state->inode->i_mode)) mask = NFS4_ACCESS_LOOKUP; @@ -3034,7 +3029,7 @@ static unsigned nfs4_exclusive_attrset(struct nfs4_opendata *opendata, } static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, - int flags, struct nfs_open_context *ctx) + struct nfs_open_context *ctx) { struct nfs4_state_owner *sp = opendata->owner; struct nfs_server *server = sp->so_server; @@ -3095,8 +3090,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, /* Parse layoutget results before we check for access */ pnfs_parse_lgopen(state->inode, opendata->lgp, ctx); - ret = nfs4_opendata_access(sp->so_cred, opendata, state, - acc_mode, flags); + ret = nfs4_opendata_access(sp->so_cred, opendata, state, acc_mode); if (ret != 0) goto out; @@ -3170,7 +3164,7 @@ static int _nfs4_do_open(struct inode *dir, if (d_really_is_positive(dentry)) opendata->state = nfs4_get_open_state(d_inode(dentry), sp); - status = _nfs4_open_and_get_state(opendata, flags, ctx); + status = _nfs4_open_and_get_state(opendata, ctx); if (status != 0) goto err_opendata_put; state = ctx->state; -- cgit v1.2.3 From 5559405df652008e56eee88872126fe4c451da67 Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Mon, 24 Oct 2022 00:39:45 +0800 Subject: nfs: fix possible null-ptr-deref when parsing param According to commit "vfs: parse: deal with zero length string value", kernel will set the param->string to null pointer in vfs_parse_fs_string() if fs string has zero length. Yet the problem is that, nfs_fs_context_parse_param() will dereferences the param->string, without checking whether it is a null pointer, which may trigger a null-ptr-deref bug. This patch solves it by adding sanity check on param->string in nfs_fs_context_parse_param(). Signed-off-by: Hawkins Jiawei Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/fs_context.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index 09833ec102fc..9bcd53d5c7d4 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -684,6 +684,8 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, return ret; break; case Opt_vers: + if (!param->string) + goto out_invalid_value; trace_nfs_mount_assign(param->key, param->string); ret = nfs_parse_version_string(fc, param->string); if (ret < 0) @@ -696,6 +698,8 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, break; case Opt_proto: + if (!param->string) + goto out_invalid_value; trace_nfs_mount_assign(param->key, param->string); protofamily = AF_INET; switch (lookup_constant(nfs_xprt_protocol_tokens, param->string, -1)) { @@ -732,6 +736,8 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, break; case Opt_mountproto: + if (!param->string) + goto out_invalid_value; trace_nfs_mount_assign(param->key, param->string); mountfamily = AF_INET; switch (lookup_constant(nfs_xprt_protocol_tokens, param->string, -1)) { -- cgit v1.2.3 From ef8d98f20dfc79777d2b66a30926533225dc6efa Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 10 Oct 2022 10:26:51 +1100 Subject: NFS: avoid spurious warning of lost lock that is being unlocked. When the NFSv4 state manager recovers state after a server restart, it reports that locks have been lost if it finds any lock state for which recovery hasn't been successful. i.e. any for which NFS_LOCK_INITIALIZED is not set. However it only tries to recover locks that are still linked to inode->i_flctx. So if a lock has been removed from inode->i_flctx, but the state for that lock has not yet been destroyed, then a spurious warning results. nfs4_proc_unlck() calls locks_lock_inode_wait() - which removes the lock from ->i_flctx - before sending the unlock request to the server and before the final nfs4_put_lock_state() is called. This allows a window in which a spurious warning can be produced. So add a new flag NFS_LOCK_UNLOCKING which is set once the decision has been made to unlock the lock. This will prevent it from triggering any warning. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 3 ++- fs/nfs/nfs4state.c | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index cfef738d765e..5edd1704f735 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -149,6 +149,7 @@ struct nfs4_lock_state { struct nfs4_state * ls_state; /* Pointer to open state */ #define NFS_LOCK_INITIALIZED 0 #define NFS_LOCK_LOST 1 +#define NFS_LOCK_UNLOCKING 2 unsigned long ls_flags; struct nfs_seqid_counter ls_seqid; nfs4_stateid ls_stateid; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 44f1dcb6020f..40d749f29ed3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7023,12 +7023,13 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * mutex_unlock(&sp->so_delegreturn_mutex); goto out; } + lsp = request->fl_u.nfs4_fl.owner; + set_bit(NFS_LOCK_UNLOCKING, &lsp->ls_flags); up_read(&nfsi->rwsem); mutex_unlock(&sp->so_delegreturn_mutex); if (status != 0) goto out; /* Is this a delegated lock? */ - lsp = request->fl_u.nfs4_fl.owner; if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) == 0) goto out; alloc_seqid = NFS_SERVER(inode)->nfs_client->cl_mvops->alloc_seqid; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index a2d2d5d1b088..7c1f43507813 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1619,7 +1619,8 @@ static int __nfs4_reclaim_open_state(struct nfs4_state_owner *sp, struct nfs4_st spin_lock(&state->state_lock); list_for_each_entry(lock, &state->lock_states, ls_locks) { trace_nfs4_state_lock_reclaim(state, lock); - if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags)) + if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags) && + !test_bit(NFS_LOCK_UNLOCKING, &lock->ls_flags)) *lost_locks += 1; } spin_unlock(&state->state_lock); -- cgit v1.2.3 From 36357fe74ef736524a29fbd3952948768510a8b9 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 30 Nov 2022 13:15:25 -0500 Subject: NFSv4.2: Set the correct size scratch buffer for decoding READ_PLUS The scratch_buf array is 16 bytes, but I was passing 32 to the xdr_set_scratch_buffer() function. Fix this by using sizeof(), which is what I probably should have been doing this whole time. Fixes: d3b00a802c84 ("NFS: Replace the READ_PLUS decoding code") Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs42xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index fe1aeb0f048f..2fd465cab631 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -1142,7 +1142,7 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res) if (!segs) return -ENOMEM; - xdr_set_scratch_buffer(xdr, &scratch_buf, 32); + xdr_set_scratch_buffer(xdr, &scratch_buf, sizeof(scratch_buf)); status = -EIO; for (i = 0; i < segments; i++) { status = decode_read_plus_segment(xdr, &segs[i]); -- cgit v1.2.3 From f8527028a7e52da884055c401abc04e0b0c84285 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 30 Nov 2022 13:15:26 -0500 Subject: NFSv4.2: Fix up READ_PLUS alignment Assume that the first segment will be a DATA segment, and place the data directly into the xdr pages so it doesn't need to be shifted. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs42xdr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 2fd465cab631..d80ee88ca996 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -47,13 +47,14 @@ #define decode_deallocate_maxsz (op_decode_hdr_maxsz) #define encode_read_plus_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + 3) -#define NFS42_READ_PLUS_SEGMENT_SIZE (1 /* data_content4 */ + \ +#define NFS42_READ_PLUS_DATA_SEGMENT_SIZE \ + (1 /* data_content4 */ + \ 2 /* data_info4.di_offset */ + \ - 2 /* data_info4.di_length */) + 1 /* data_info4.di_length */) #define decode_read_plus_maxsz (op_decode_hdr_maxsz + \ 1 /* rpr_eof */ + \ 1 /* rpr_contents count */ + \ - 2 * NFS42_READ_PLUS_SEGMENT_SIZE) + NFS42_READ_PLUS_DATA_SEGMENT_SIZE) #define encode_seek_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + \ 2 /* offset */ + \ -- cgit v1.2.3 From a60214c2465493aac0b014d87ee19327b6204c42 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 30 Nov 2022 15:30:47 -0500 Subject: NFS: Allow very small rsize & wsize again 940261a19508 introduced nfs_io_size() to clamp the iosize to a multiple of PAGE_SIZE. This had the unintended side effect of no longer allowing iosizes less than a page, which could be useful in some situations. UDP already has an exception that causes it to fall back on the power-of-two style sizes instead. This patch adds an additional exception for very small iosizes. Reported-by: Jeff Layton Fixes: 940261a19508 ("NFS: Allow setting rsize / wsize to a multiple of PAGE_SIZE") Signed-off-by: Anna Schumaker Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 647fc3f547cb..ae7d4a8c728c 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -739,12 +739,10 @@ unsigned long nfs_io_size(unsigned long iosize, enum xprt_transports proto) iosize = NFS_DEF_FILE_IO_SIZE; else if (iosize >= NFS_MAX_FILE_IO_SIZE) iosize = NFS_MAX_FILE_IO_SIZE; - else - iosize = iosize & PAGE_MASK; - if (proto == XPRT_TRANSPORT_UDP) + if (proto == XPRT_TRANSPORT_UDP || iosize < PAGE_SIZE) return nfs_block_bits(iosize, NULL); - return iosize; + return iosize & PAGE_MASK; } /* -- cgit v1.2.3 From 700fa9b1b361760ca6c28e2e7d6cccbdc9fb6716 Mon Sep 17 00:00:00 2001 From: ye xingchen Date: Mon, 5 Dec 2022 17:13:45 +0800 Subject: NFS: use sysfs_emit() to instead of scnprintf() Follow the advice of the Documentation/filesystems/sysfs.rst and show() should only use sysfs_emit() or sysfs_emit_at() when formatting the value to be returned to user space. Signed-off-by: ye xingchen Signed-off-by: Trond Myklebust --- fs/nfs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 88a23af2bd5c..b0ef7e7ddb30 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -354,7 +354,7 @@ static int param_get_nfs_timeout(char *buffer, const struct kernel_param *kp) num = (num + (HZ - 1)) / HZ; } else num = -1; - return scnprintf(buffer, PAGE_SIZE, "%li\n", num); + return sysfs_emit(buffer, "%li\n", num); } static const struct kernel_param_ops param_ops_nfs_timeout = { -- cgit v1.2.3 From 19cdc8fa5b9b768f5753eefe3da32fcbafcb0d18 Mon Sep 17 00:00:00 2001 From: ye xingchen Date: Mon, 5 Dec 2022 17:19:16 +0800 Subject: fs: nfs: sysfs: use sysfs_emit() to instead of scnprintf() Follow the advice of the Documentation/filesystems/sysfs.rst and show() should only use sysfs_emit() or sysfs_emit_at() when formatting the value to be returned to user space. Signed-off-by: ye xingchen Signed-off-by: Trond Myklebust --- fs/nfs/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index a6f740366963..26f8ece2a997 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -82,7 +82,7 @@ static ssize_t nfs_netns_identifier_show(struct kobject *kobj, ssize_t ret; rcu_read_lock(); - ret = scnprintf(buf, PAGE_SIZE, "%s\n", rcu_dereference(c->identifier)); + ret = sysfs_emit(buf, "%s\n", rcu_dereference(c->identifier)); rcu_read_unlock(); return ret; } -- cgit v1.2.3 From b4e4f66901658fae0614dea5bf91062a5387eda7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 6 Dec 2022 12:42:59 -0500 Subject: NFSv4.x: Fail client initialisation if state manager thread can't run If the state manager thread fails to start, then we should just mark the client initialisation as failed so that other processes or threads don't get stuck in nfs_wait_client_init_complete(). Reported-by: ChenXiaoSong Fixes: 4697bd5e9419 ("NFSv4: Fix a race in the net namespace mount notification") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 7c1f43507813..5720196141e1 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1230,6 +1230,8 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) if (IS_ERR(task)) { printk(KERN_ERR "%s: kthread_run: %ld\n", __func__, PTR_ERR(task)); + if (!nfs_client_init_is_complete(clp)) + nfs_mark_client_ready(clp, PTR_ERR(task)); nfs4_clear_state_manager_bit(clp); clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); nfs_put_client(clp); -- cgit v1.2.3 From 7fd461c47c6cfab4ca4d003790ec276209e52978 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 30 Nov 2022 13:15:27 -0500 Subject: NFSv4.2: Change the default KConfig value for READ_PLUS Now that we've worked out performance issues and have a server patch addressing the failed xfstests, we can safely enable this feature by default. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/Kconfig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 14a72224b657..1ead5bd740c2 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -209,8 +209,8 @@ config NFS_DISABLE_UDP_SUPPORT config NFS_V4_2_READ_PLUS bool "NFS: Enable support for the NFSv4.2 READ_PLUS operation" depends on NFS_V4_2 - default n + default y help - This is intended for developers only. The READ_PLUS operation has - been shown to have issues under specific conditions and should not - be used in production. + Choose Y here to enable the use of READ_PLUS over NFS v4.2. READ_PLUS + attempts to improve read performance by compressing out sparse holes + in the file contents. -- cgit v1.2.3