From 03f318ca652889a1aa407e7088b9a2f6a14ae374 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 8 Jun 2018 12:28:47 -0400 Subject: nfsd4: extend reclaim period for reclaiming clients If the client is only renewing state a little sooner than once a lease period, then it might not discover the server has restarted till close to the end of the grace period, and might run out of time to do the actual reclaim. Extend the grace period by a second each time we notice there are clients still trying to reclaim, up to a limit of another whole lease period. Signed-off-by: J. Bruce Fields --- fs/nfsd/netns.h | 1 + fs/nfsd/nfs4proc.c | 4 ++++ fs/nfsd/nfs4state.c | 31 ++++++++++++++++++++++++++++++- fs/nfsd/nfsctl.c | 1 + 4 files changed, 36 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 36358d435cb0..426f55005697 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -102,6 +102,7 @@ struct nfsd_net { time_t nfsd4_lease; time_t nfsd4_grace; + bool somebody_reclaimed; bool nfsd_net_up; bool lockd_up; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 5d99e8810b85..1929f85b8269 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -354,6 +354,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct svc_fh *resfh = NULL; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); + bool reclaim = false; dprintk("NFSD: nfsd4_open filename %.*s op_openowner %p\n", (int)open->op_fname.len, open->op_fname.data, @@ -424,6 +425,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; + reclaim = true; case NFS4_OPEN_CLAIM_FH: case NFS4_OPEN_CLAIM_DELEG_CUR_FH: status = do_open_fhandle(rqstp, cstate, open); @@ -452,6 +454,8 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, WARN(status && open->op_created, "nfsd4_process_open2 failed to open newly-created file! status=%u\n", be32_to_cpu(status)); + if (reclaim && !status) + nn->somebody_reclaimed = true; out: if (resfh && resfh != &cstate->current_fh) { fh_dup2(&cstate->current_fh, resfh); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 857141446d6b..9b2ce80abee0 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4693,6 +4693,28 @@ nfsd4_end_grace(struct nfsd_net *nn) */ } +/* + * If we've waited a lease period but there are still clients trying to + * reclaim, wait a little longer to give them a chance to finish. + */ +static bool clients_still_reclaiming(struct nfsd_net *nn) +{ + unsigned long now = get_seconds(); + unsigned long double_grace_period_end = nn->boot_time + + 2 * nn->nfsd4_lease; + + if (!nn->somebody_reclaimed) + return false; + nn->somebody_reclaimed = false; + /* + * If we've given them *two* lease times to reclaim, and they're + * still not done, give up: + */ + if (time_after(now, double_grace_period_end)) + return false; + return true; +} + static time_t nfs4_laundromat(struct nfsd_net *nn) { @@ -4706,6 +4728,11 @@ nfs4_laundromat(struct nfsd_net *nn) time_t t, new_timeo = nn->nfsd4_lease; dprintk("NFSD: laundromat service - starting\n"); + + if (clients_still_reclaiming(nn)) { + new_timeo = 0; + goto out; + } nfsd4_end_grace(nn); INIT_LIST_HEAD(&reaplist); spin_lock(&nn->client_lock); @@ -4803,7 +4830,7 @@ nfs4_laundromat(struct nfsd_net *nn) posix_unblock_lock(&nbl->nbl_lock); free_blocked_lock(nbl); } - +out: new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); return new_timeo; } @@ -6053,6 +6080,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, case 0: /* success! */ nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid); status = 0; + if (lock->lk_reclaim) + nn->somebody_reclaimed = true; break; case FILE_LOCK_DEFERRED: nbl = NULL; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index d107b4426f7e..5f22476cf371 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1239,6 +1239,7 @@ static __net_init int nfsd_init_net(struct net *net) goto out_idmap_error; nn->nfsd4_lease = 90; /* default lease time */ nn->nfsd4_grace = 90; + nn->somebody_reclaimed = false; nn->clverifier_counter = prandom_u32(); nn->clientid_counter = prandom_u32(); -- cgit v1.2.3 From d6ebf5088f09472c1136cd506bdc27034a6763f8 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 8 Jun 2018 12:33:17 -0400 Subject: nfsd4: return default lease period I don't have a good rationale for the lease period, but 90 seconds seems long, and as long as we're allowing the server to extend the grace period up to double the lease period, let's half the default to 45. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 5f22476cf371..ec8a214a5089 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1237,8 +1237,8 @@ static __net_init int nfsd_init_net(struct net *net) retval = nfsd_idmap_init(net); if (retval) goto out_idmap_error; - nn->nfsd4_lease = 90; /* default lease time */ - nn->nfsd4_grace = 90; + nn->nfsd4_lease = 45; /* default lease time */ + nn->nfsd4_grace = 45; nn->somebody_reclaimed = false; nn->clverifier_counter = prandom_u32(); nn->clientid_counter = prandom_u32(); -- cgit v1.2.3 From 16945141c3567bb8561de3677de1ec657675bb15 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 25 Apr 2018 14:34:11 -0400 Subject: nfsd: fix NFSv4 time_delta attribute Currently we return the worst-case value of 1 second in the time delta attribute. That's not terribly useful. Instead, return a value calculated from the time granularity supported by the filesystem and the system clock. Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index a96843c59fc1..4161031ae14e 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2006,6 +2006,31 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, return p; } +/* + * ctime (in NFSv4, time_metadata) is not writeable, and the client + * doesn't really care what resolution could theoretically be stored by + * the filesystem. + * + * The client cares how close together changes can be while still + * guaranteeing ctime changes. For most filesystems (which have + * timestamps with nanosecond fields) that is limited by the resolution + * of the time returned from current_time() (which I'm assuming to be + * 1/HZ). + */ +static __be32 *encode_time_delta(__be32 *p, struct inode *inode) +{ + struct timespec ts; + u32 ns; + + ns = max_t(u32, NSEC_PER_SEC/HZ, inode->i_sb->s_time_gran); + ts = ns_to_timespec(ns); + + p = xdr_encode_hyper(p, ts.tv_sec); + *p++ = cpu_to_be32(ts.tv_nsec); + + return p; +} + static __be32 *encode_cinfo(__be32 *p, struct nfsd4_change_info *c) { *p++ = cpu_to_be32(c->atomic); @@ -2797,9 +2822,7 @@ out_acl: p = xdr_reserve_space(xdr, 12); if (!p) goto out_resource; - *p++ = cpu_to_be32(0); - *p++ = cpu_to_be32(1); - *p++ = cpu_to_be32(0); + p = encode_time_delta(p, d_inode(dentry)); } if (bmval1 & FATTR4_WORD1_TIME_METADATA) { p = xdr_reserve_space(xdr, 12); -- cgit v1.2.3 From a85857633b04d57f4524cca0a2bfaf87b2543f9f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 25 Apr 2018 13:26:23 -0400 Subject: nfsd4: support change_attr_type attribute The change attribute is what is used by clients to revalidate their caches. Our server may use i_version or ctime for that purpose. Those choices behave slightly differently, and it may be useful to the client to know which we're using. This attribute tells the client that. The Linux client doesn't yet use this attribute yet, though. Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 10 ++++++++++ fs/nfsd/nfsd.h | 1 + include/linux/nfs4.h | 8 ++++++++ 3 files changed, 19 insertions(+) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 4161031ae14e..fb4991889f89 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2891,6 +2891,16 @@ out_acl: goto out; } + if (bmval2 & FATTR4_WORD2_CHANGE_ATTR_TYPE) { + p = xdr_reserve_space(xdr, 4); + if (!p) + goto out_resource; + if (IS_I_VERSION(d_inode(dentry))) + *p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR); + else + *p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_TIME_METADATA); + } + if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { status = nfsd4_encode_security_label(xdr, rqstp, context, contextlen); diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 3fce905d0365..066899929863 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -360,6 +360,7 @@ void nfsd_lockd_shutdown(void); #define NFSD4_2_SUPPORTED_ATTRS_WORD2 \ (NFSD4_1_SUPPORTED_ATTRS_WORD2 | \ + FATTR4_WORD2_CHANGE_ATTR_TYPE | \ FATTR4_WORD2_MODE_UMASK | \ NFSD4_2_SECURITY_ATTRS) diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 57ffaa20d564..0877ed333733 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -374,6 +374,13 @@ enum lock_type4 { NFS4_WRITEW_LT = 4 }; +enum change_attr_type4 { + NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR = 0, + NFS4_CHANGE_TYPE_IS_VERSION_COUNTER = 1, + NFS4_CHANGE_TYPE_IS_VERSION_COUNTER_NOPNFS = 2, + NFS4_CHANGE_TYPE_IS_TIME_METADATA = 3, + NFS4_CHANGE_TYPE_IS_UNDEFINED = 4 +}; /* Mandatory Attributes */ #define FATTR4_WORD0_SUPPORTED_ATTRS (1UL << 0) @@ -441,6 +448,7 @@ enum lock_type4 { #define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) #define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4) #define FATTR4_WORD2_CLONE_BLKSIZE (1UL << 13) +#define FATTR4_WORD2_CHANGE_ATTR_TYPE (1UL << 15) #define FATTR4_WORD2_SECURITY_LABEL (1UL << 16) #define FATTR4_WORD2_MODE_UMASK (1UL << 17) -- cgit v1.2.3 From 665d507276004823e07596997d49d2705b4598a3 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 11 Apr 2018 16:53:36 -0400 Subject: nfsd4: less confusing nfsd4_compound_in_session Make the function prototype match the name a little better. Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9b2ce80abee0..cfd0ac5c1c5b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2956,11 +2956,11 @@ out_no_session: return status; } -static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid) +static bool nfsd4_compound_in_session(struct nfsd4_compound_state *cstate, struct nfs4_sessionid *sid) { - if (!session) + if (!cstate->session) return false; - return !memcmp(sid, &session->se_sessionid, sizeof(*sid)); + return !memcmp(sid, &cstate->session->se_sessionid, sizeof(*sid)); } __be32 @@ -2975,7 +2975,7 @@ nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_compound_state *cstate, struct nfsd_net *nn = net_generic(net, nfsd_net_id); status = nfserr_not_only_op; - if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) { + if (nfsd4_compound_in_session(cstate, &sessionid->sessionid)) { if (!nfsd4_last_compound_op(r)) goto out; ref_held_by_me++; -- cgit v1.2.3 From ca0552f4641436a1dd5c49906690ee39ab5d43a3 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 11 Apr 2018 17:01:53 -0400 Subject: nfsd4: cleanup sessionid in nfsd4_destroy_session The name of this variable doesn't fit the type. And we only ever use one field of it. Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index cfd0ac5c1c5b..4f13dcb42ab1 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2967,7 +2967,7 @@ __be32 nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { - struct nfsd4_destroy_session *sessionid = &u->destroy_session; + struct nfs4_sessionid *sessionid = &u->destroy_session.sessionid; struct nfsd4_session *ses; __be32 status; int ref_held_by_me = 0; @@ -2975,14 +2975,14 @@ nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_compound_state *cstate, struct nfsd_net *nn = net_generic(net, nfsd_net_id); status = nfserr_not_only_op; - if (nfsd4_compound_in_session(cstate, &sessionid->sessionid)) { + if (nfsd4_compound_in_session(cstate, sessionid)) { if (!nfsd4_last_compound_op(r)) goto out; ref_held_by_me++; } - dump_sessionid(__func__, &sessionid->sessionid); + dump_sessionid(__func__, sessionid); spin_lock(&nn->client_lock); - ses = find_in_sessionid_hashtbl(&sessionid->sessionid, net, &status); + ses = find_in_sessionid_hashtbl(sessionid, net, &status); if (!ses) goto out_client_lock; status = nfserr_wrong_cred; -- cgit v1.2.3 From 4a269efb6f8199b58aa77a1a40404877974feb26 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 13 Apr 2018 17:34:35 -0400 Subject: nfsd: update obselete comment referencing the BKL It's inode->i_lock that's now taken in setlease and break_lease, instead of the big kernel lock. Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 4f13dcb42ab1..19c4d29917ee 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3945,9 +3945,9 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) /* * We're assuming the state code never drops its reference * without first removing the lease. Since we're in this lease - * callback (and since the lease code is serialized by the kernel - * lock) we know the server hasn't removed the lease yet, we know - * it's safe to take a reference. + * callback (and since the lease code is serialized by the + * i_lock) we know the server hasn't removed the lease yet, and + * we know it's safe to take a reference. */ refcount_inc(&dp->dl_stid.sc_count); nfsd4_run_cb(&dp->dl_recall); -- cgit v1.2.3 From 7a04cfda7dfa00173ab165577254ede83f81bf01 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 13 Jun 2018 15:04:59 -0400 Subject: nfsd: clarify check_op_ordering Document a couple things that confused me on a recent reading. Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 1929f85b8269..140b05c8a4be 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1603,7 +1603,7 @@ static const char *nfsd4_op_name(unsigned opnum); */ static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args) { - struct nfsd4_op *op = &args->ops[0]; + struct nfsd4_op *first_op = &args->ops[0]; /* These ordering requirements don't apply to NFSv4.0: */ if (args->minorversion == 0) @@ -1611,12 +1611,17 @@ static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args) /* This is weird, but OK, not our problem: */ if (args->opcnt == 0) return nfs_ok; - if (op->status == nfserr_op_illegal) + if (first_op->status == nfserr_op_illegal) return nfs_ok; - if (!(nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP)) + if (!(nfsd4_ops[first_op->opnum].op_flags & ALLOWED_AS_FIRST_OP)) return nfserr_op_not_in_session; - if (op->opnum == OP_SEQUENCE) + if (first_op->opnum == OP_SEQUENCE) return nfs_ok; + /* + * So first_op is something allowed outside a session, like + * EXCHANGE_ID; but then it has to be the only op in the + * compound: + */ if (args->opcnt != 1) return nfserr_not_only_op; return nfs_ok; -- cgit v1.2.3 From 5b7b15aee641904ae269be9846610a3950cbd64c Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 13 Jun 2018 15:21:35 -0400 Subject: nfsd: fix corrupted reply to badly ordered compound We're encoding a single op in the reply but leaving the number of ops zero, so the reply makes no sense. Somewhat academic as this isn't a case any real client will hit, though in theory perhaps that could change in a future protocol extension. Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 140b05c8a4be..3652f9b1fb68 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1735,6 +1735,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) if (status) { op = &args->ops[0]; op->status = status; + resp->opcnt = 1; goto encode_op; } -- cgit v1.2.3 From 8163496e78db100a6b5cfbdaece385686ae50129 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Tue, 19 Jun 2018 04:57:24 -0400 Subject: nfsd: don't advertise a SCSI layout for an unsupported request_queue Commit 30181faae37f ("nfsd: Check queue type before submitting a SCSI request") did the work of ensuring that we don't send SCSI requests to a request queue that won't support them, but that check is in the GETDEVICEINFO path. Let's not set the SCSI layout in fs_layout_type in the first place, and then we'll have less clients sending GETDEVICEINFO for non-SCSI request queues and less unnecessary WARN_ONs. While we're in here, remove some outdated comments that refer to "overwriting" layout seletion because commit 8a4c3926889e ("nfsd: allow nfsd to advertise multiple layout types") changed things to no longer overwrite the layout type. Signed-off-by: Benjamin Coddington Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4layouts.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 228faf00a594..2b36aa037ce0 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -133,27 +133,20 @@ void nfsd4_setup_layout_type(struct svc_export *exp) if (!(exp->ex_flags & NFSEXP_PNFS)) return; - /* - * If flex file is configured, use it by default. Otherwise - * check if the file system supports exporting a block-like layout. - * If the block device supports reservations prefer the SCSI layout, - * otherwise advertise the block layout. - */ #ifdef CONFIG_NFSD_FLEXFILELAYOUT exp->ex_layout_types |= 1 << LAYOUT_FLEX_FILES; #endif #ifdef CONFIG_NFSD_BLOCKLAYOUT - /* overwrite flex file layout selection if needed */ if (sb->s_export_op->get_uuid && sb->s_export_op->map_blocks && sb->s_export_op->commit_blocks) exp->ex_layout_types |= 1 << LAYOUT_BLOCK_VOLUME; #endif #ifdef CONFIG_NFSD_SCSILAYOUT - /* overwrite block layout selection if needed */ if (sb->s_export_op->map_blocks && sb->s_export_op->commit_blocks && - sb->s_bdev && sb->s_bdev->bd_disk->fops->pr_ops) + sb->s_bdev && sb->s_bdev->bd_disk->fops->pr_ops && + blk_queue_scsi_passthrough(sb->s_bdev->bd_disk->queue)) exp->ex_layout_types |= 1 << LAYOUT_SCSI; #endif } -- cgit v1.2.3 From 64bed6cbe38bc95689fb9399872d9ce250192f90 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 13 Jul 2018 17:22:24 +0300 Subject: nfsd: fix leaked file lock with nfs exported overlayfs nfsd and lockd call vfs_lock_file() to lock/unlock the inode returned by locks_inode(file). Many places in nfsd/lockd code use the inode returned by file_inode(file) for lock manipulation. With Overlayfs, file_inode() (the underlying inode) is not the same object as locks_inode() (the overlay inode). This can result in "Leaked POSIX lock" messages and eventually to a kernel crash as reported by Eddie Horng: https://marc.info/?l=linux-unionfs&m=153086643202072&w=2 Fix all the call sites in nfsd/lockd that should use locks_inode(). This is a correctness bug that manifested when overlayfs gained NFS export support in v4.16. Reported-by: Eddie Horng Tested-by: Eddie Horng Cc: Jeff Layton Fixes: 8383f1748829 ("ovl: wire up NFS export operations") Cc: stable@vger.kernel.org Signed-off-by: Amir Goldstein Signed-off-by: J. Bruce Fields --- fs/lockd/clntlock.c | 2 +- fs/lockd/clntproc.c | 2 +- fs/lockd/svclock.c | 16 ++++++++-------- fs/lockd/svcsubs.c | 4 ++-- fs/nfsd/nfs4state.c | 2 +- include/linux/lockd/lockd.h | 4 ++-- 6 files changed, 15 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 96c1d14c18f1..c2a128678e6e 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -187,7 +187,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock) continue; if (!rpc_cmp_addr(nlm_addr(block->b_host), addr)) continue; - if (nfs_compare_fh(NFS_FH(file_inode(fl_blocked->fl_file)) ,fh) != 0) + if (nfs_compare_fh(NFS_FH(locks_inode(fl_blocked->fl_file)), fh) != 0) continue; /* Alright, we found a lock. Set the return status * and wake up the caller diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index a2c0dfc6fdc0..d20b92f271c2 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -128,7 +128,7 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl) char *nodename = req->a_host->h_rpcclnt->cl_nodename; nlmclnt_next_cookie(&argp->cookie); - memcpy(&lock->fh, NFS_FH(file_inode(fl->fl_file)), sizeof(struct nfs_fh)); + memcpy(&lock->fh, NFS_FH(locks_inode(fl->fl_file)), sizeof(struct nfs_fh)); lock->caller = nodename; lock->oh.data = req->a_owner; lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s", diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 3701bccab478..74330daeab71 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -405,8 +405,8 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, __be32 ret; dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n", - file_inode(file->f_file)->i_sb->s_id, - file_inode(file->f_file)->i_ino, + locks_inode(file->f_file)->i_sb->s_id, + locks_inode(file->f_file)->i_ino, lock->fl.fl_type, lock->fl.fl_pid, (long long)lock->fl.fl_start, (long long)lock->fl.fl_end, @@ -511,8 +511,8 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, __be32 ret; dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n", - file_inode(file->f_file)->i_sb->s_id, - file_inode(file->f_file)->i_ino, + locks_inode(file->f_file)->i_sb->s_id, + locks_inode(file->f_file)->i_ino, lock->fl.fl_type, (long long)lock->fl.fl_start, (long long)lock->fl.fl_end); @@ -566,8 +566,8 @@ nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock) int error; dprintk("lockd: nlmsvc_unlock(%s/%ld, pi=%d, %Ld-%Ld)\n", - file_inode(file->f_file)->i_sb->s_id, - file_inode(file->f_file)->i_ino, + locks_inode(file->f_file)->i_sb->s_id, + locks_inode(file->f_file)->i_ino, lock->fl.fl_pid, (long long)lock->fl.fl_start, (long long)lock->fl.fl_end); @@ -595,8 +595,8 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l int status = 0; dprintk("lockd: nlmsvc_cancel(%s/%ld, pi=%d, %Ld-%Ld)\n", - file_inode(file->f_file)->i_sb->s_id, - file_inode(file->f_file)->i_ino, + locks_inode(file->f_file)->i_sb->s_id, + locks_inode(file->f_file)->i_ino, lock->fl.fl_pid, (long long)lock->fl.fl_start, (long long)lock->fl.fl_end); diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 4ec3d6e03e76..899360ba3b84 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -44,7 +44,7 @@ static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f) static inline void nlm_debug_print_file(char *msg, struct nlm_file *file) { - struct inode *inode = file_inode(file->f_file); + struct inode *inode = locks_inode(file->f_file); dprintk("lockd: %s %s/%ld\n", msg, inode->i_sb->s_id, inode->i_ino); @@ -414,7 +414,7 @@ nlmsvc_match_sb(void *datap, struct nlm_file *file) { struct super_block *sb = datap; - return sb == file_inode(file->f_file)->i_sb; + return sb == locks_inode(file->f_file)->i_sb; } /** diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 19c4d29917ee..c8e4a8f42bbe 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -6322,7 +6322,7 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) return status; } - inode = file_inode(filp); + inode = locks_inode(filp); flctx = inode->i_flctx; if (flctx && !list_empty_careful(&flctx->flc_posix)) { diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 4fd95dbeb52f..b065ef406770 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -299,7 +299,7 @@ int nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr); static inline struct inode *nlmsvc_file_inode(struct nlm_file *file) { - return file_inode(file->f_file); + return locks_inode(file->f_file); } static inline int __nlm_privileged_request4(const struct sockaddr *sap) @@ -359,7 +359,7 @@ static inline int nlm_privileged_requester(const struct svc_rqst *rqstp) static inline int nlm_compare_locks(const struct file_lock *fl1, const struct file_lock *fl2) { - return file_inode(fl1->fl_file) == file_inode(fl2->fl_file) + return locks_inode(fl1->fl_file) == locks_inode(fl2->fl_file) && fl1->fl_pid == fl2->fl_pid && fl1->fl_owner == fl2->fl_owner && fl1->fl_start == fl2->fl_start -- cgit v1.2.3 From 7b4d6da4bb6d0ece17ca27f0b4612df87f873d83 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 31 Jul 2018 21:24:10 -0500 Subject: nfsd: Mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Warning level 2 was used: -Wimplicit-fallthrough=2 Signed-off-by: Gustavo A. R. Silva Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 1f04d2a70d25..144be2dc18d7 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -980,6 +980,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback break; case -ESERVERFAULT: ++session->se_cb_seq_nr; + /* Fall through */ case 1: case -NFS4ERR_BADSESSION: nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status); -- cgit v1.2.3 From 3fd9557aec919e2db99365ad5a2c00d04ae8893c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 27 Jul 2018 11:19:05 -0400 Subject: NFSD: Refactor the generic write vector fill helper fill_in_write_vector() is nearly the same logic as svc_fill_write_vector(), but there are a few differences so that the former can handle multiple WRITE payloads in a single COMPOUND. svc_fill_write_vector() can be adjusted so that it can be used in the NFSv4 WRITE code path too. Instead of assuming the pages are coming from rq_args.pages, have the caller pass in the page list. The immediate benefit is a reduction of code duplication. It also prevents the NFSv4 WRITE decoder from passing an empty vector element when the transport has provided the payload in the xdr_buf's page array. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs3proc.c | 3 ++- fs/nfsd/nfs4proc.c | 23 ++++------------------- fs/nfsd/nfsproc.c | 3 ++- include/linux/sunrpc/svc.h | 1 + net/sunrpc/svc.c | 11 ++++------- 5 files changed, 13 insertions(+), 28 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 6259a4b8579f..8d1c2d1a159b 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -202,7 +202,8 @@ nfsd3_proc_write(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->committed = argp->stable; - nvecs = svc_fill_write_vector(rqstp, &argp->first, cnt); + nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages, + &argp->first, cnt); if (!nvecs) RETURN_STATUS(nfserr_io); nfserr = nfsd_write(rqstp, &resp->fh, argp->offset, diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 3652f9b1fb68..b7bc6e1a85ac 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -986,24 +986,6 @@ out: return status; } -static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write) -{ - int i = 1; - int buflen = write->wr_buflen; - - vec[0].iov_base = write->wr_head.iov_base; - vec[0].iov_len = min_t(int, buflen, write->wr_head.iov_len); - buflen -= vec[0].iov_len; - - while (buflen) { - vec[i].iov_base = page_address(write->wr_pagelist[i - 1]); - vec[i].iov_len = min_t(int, PAGE_SIZE, buflen); - buflen -= vec[i].iov_len; - i++; - } - return i; -} - static __be32 nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -1031,7 +1013,10 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, write->wr_how_written = write->wr_stable_how; gen_boot_verifier(&write->wr_verifier, SVC_NET(rqstp)); - nvecs = fill_in_write_vector(rqstp->rq_vec, write); + nvecs = svc_fill_write_vector(rqstp, write->wr_pagelist, + &write->wr_head, write->wr_buflen); + if (!nvecs) + return nfserr_io; WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec)); status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp, diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index f107f9fa8e15..a6faee562b31 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -218,7 +218,8 @@ nfsd_proc_write(struct svc_rqst *rqstp) SVCFH_fmt(&argp->fh), argp->len, argp->offset); - nvecs = svc_fill_write_vector(rqstp, &argp->first, cnt); + nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages, + &argp->first, cnt); if (!nvecs) return nfserr_io; nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 574368e8a16f..43f88bd7b601 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -496,6 +496,7 @@ void svc_reserve(struct svc_rqst *rqstp, int space); struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu); char * svc_print_addr(struct svc_rqst *, char *, size_t); unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, + struct page **pages, struct kvec *first, size_t total); char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first, size_t total); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 30a4226baf03..2194ed507991 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1537,16 +1537,16 @@ EXPORT_SYMBOL_GPL(svc_max_payload); /** * svc_fill_write_vector - Construct data argument for VFS write call * @rqstp: svc_rqst to operate on + * @pages: list of pages containing data payload * @first: buffer containing first section of write payload * @total: total number of bytes of write payload * - * Returns the number of elements populated in the data argument array. + * Fills in rqstp::rq_vec, and returns the number of elements. */ -unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct kvec *first, - size_t total) +unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct page **pages, + struct kvec *first, size_t total) { struct kvec *vec = rqstp->rq_vec; - struct page **pages; unsigned int i; /* Some types of transport can present the write payload @@ -1560,14 +1560,11 @@ unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct kvec *first, ++i; } - WARN_ON_ONCE(rqstp->rq_arg.page_base != 0); - pages = rqstp->rq_arg.pages; while (total) { vec[i].iov_base = page_address(*pages); vec[i].iov_len = min_t(size_t, total, PAGE_SIZE); total -= vec[i].iov_len; ++i; - ++pages; } -- cgit v1.2.3 From 11b4d66ea3313d9b03a83b80458ddee64990e3c3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 27 Jul 2018 11:19:10 -0400 Subject: NFSD: Handle full-length symlinks I've given up on the idea of zero-copy handling of SYMLINK on the server side. This is because the Linux VFS symlink API requires the symlink pathname to be in a NUL-terminated kmalloc'd buffer. The NUL-termination is going to be problematic (watching out for landing on a page boundary and dealing with a 4096-byte pathname). I don't believe that SYMLINK creation is on a performance path or is requested frequently enough that it will cause noticeable CPU cache pollution due to data copies. There will be two places where a transport callout will be necessary to fill in the rqstp: one will be in the svc_fill_symlink_pathname() helper that is used by NFSv2 and NFSv3, and the other will be in nfsd4_decode_create(). Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs3proc.c | 2 ++ fs/nfsd/nfsproc.c | 2 ++ include/linux/sunrpc/svc.h | 3 ++- net/sunrpc/svc.c | 67 +++++++++++++++++----------------------------- 4 files changed, 31 insertions(+), 43 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 8d1c2d1a159b..9eb8086ea841 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -290,6 +290,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp) RETURN_STATUS(nfserr_nametoolong); argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first, + page_address(rqstp->rq_arg.pages[0]), argp->tlen); if (IS_ERR(argp->tname)) RETURN_STATUS(nfserrno(PTR_ERR(argp->tname))); @@ -303,6 +304,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp) fh_init(&resp->fh, NFS3_FHSIZE); nfserr = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, argp->flen, argp->tname, &resp->fh); + kfree(argp->tname); RETURN_STATUS(nfserr); } diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index a6faee562b31..0d20fd161225 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -454,6 +454,7 @@ nfsd_proc_symlink(struct svc_rqst *rqstp) return nfserr_nametoolong; argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first, + page_address(rqstp->rq_arg.pages[0]), argp->tlen); if (IS_ERR(argp->tname)) return nfserrno(PTR_ERR(argp->tname)); @@ -466,6 +467,7 @@ nfsd_proc_symlink(struct svc_rqst *rqstp) nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen, argp->tname, &newfh); + kfree(argp->tname); fh_put(&argp->ffh); fh_put(&newfh); return nfserr; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 43f88bd7b601..73e130a840ce 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -499,7 +499,8 @@ unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct page **pages, struct kvec *first, size_t total); char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, - struct kvec *first, size_t total); + struct kvec *first, void *p, + size_t total); #define RPC_MAX_ADDRBUFLEN (63U) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 2194ed507991..d13e05f1a990 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1577,65 +1577,48 @@ EXPORT_SYMBOL_GPL(svc_fill_write_vector); * svc_fill_symlink_pathname - Construct pathname argument for VFS symlink call * @rqstp: svc_rqst to operate on * @first: buffer containing first section of pathname + * @p: buffer containing remaining section of pathname * @total: total length of the pathname argument * - * Returns pointer to a NUL-terminated string, or an ERR_PTR. The buffer is - * released automatically when @rqstp is recycled. + * The VFS symlink API demands a NUL-terminated pathname in mapped memory. + * Returns pointer to a NUL-terminated string, or an ERR_PTR. Caller must free + * the returned string. */ char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first, - size_t total) + void *p, size_t total) { - struct xdr_buf *arg = &rqstp->rq_arg; - struct page **pages; - char *result; - - /* VFS API demands a NUL-terminated pathname. This function - * uses a page from @rqstp as the pathname buffer, to enable - * direct placement. Thus the total buffer size is PAGE_SIZE. - * Space in this buffer for NUL-termination requires that we - * cap the size of the returned symlink pathname just a - * little early. - */ - if (total > PAGE_SIZE - 1) - return ERR_PTR(-ENAMETOOLONG); + size_t len, remaining; + char *result, *dst; - /* Some types of transport can present the pathname entirely - * in rq_arg.pages. If not, then copy the pathname into one - * page. - */ - pages = arg->pages; - WARN_ON_ONCE(arg->page_base != 0); - if (first->iov_base == 0) { - result = page_address(*pages); - result[total] = '\0'; - } else { - size_t len, remaining; - char *dst; + result = kmalloc(total + 1, GFP_KERNEL); + if (!result) + return ERR_PTR(-ESERVERFAULT); - result = page_address(*(rqstp->rq_next_page++)); - dst = result; - remaining = total; + dst = result; + remaining = total; - len = min_t(size_t, total, first->iov_len); + len = min_t(size_t, total, first->iov_len); + if (len) { memcpy(dst, first->iov_base, len); dst += len; remaining -= len; + } - /* No more than one page left */ - if (remaining) { - len = min_t(size_t, remaining, PAGE_SIZE); - memcpy(dst, page_address(*pages), len); - dst += len; - } - - *dst = '\0'; + if (remaining) { + len = min_t(size_t, remaining, PAGE_SIZE); + memcpy(dst, p, len); + dst += len; } - /* Sanity check: we don't allow the pathname argument to + *dst = '\0'; + + /* Sanity check: Linux doesn't allow the pathname argument to * contain a NUL byte. */ - if (strlen(result) != total) + if (strlen(result) != total) { + kfree(result); return ERR_PTR(-EINVAL); + } return result; } EXPORT_SYMBOL_GPL(svc_fill_symlink_pathname); -- cgit v1.2.3 From 5ed96bc5451bb736a0b7d5d72a0f444316dc6559 Mon Sep 17 00:00:00 2001 From: nixiaoming Date: Mon, 23 Jul 2018 09:57:11 +0800 Subject: fs/nfsd: Delete invalid assignment statements in nfsd4_decode_exchange_id READ_BUF(8); dummy = be32_to_cpup(p++); dummy = be32_to_cpup(p++); ... READ_BUF(4); dummy = be32_to_cpup(p++); Assigning value to "dummy" here, but that stored value is overwritten before it can be used. At the same time READ_BUF() will re-update the pointer p. delete invalid assignment statements Signed-off-by: nixiaoming Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index fb4991889f89..418fa9c78186 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1390,10 +1390,8 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, p += XDR_QUADLEN(dummy); } - /* ssp_window and ssp_num_gss_handles */ + /* ignore ssp_window and ssp_num_gss_handles: */ READ_BUF(8); - dummy = be32_to_cpup(p++); - dummy = be32_to_cpup(p++); break; default: goto xdr_error; -- cgit v1.2.3 From c2cdc2ab24873289db8588a0a7ccec508d7ad075 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 17 Jul 2018 11:01:25 -0700 Subject: nfsd: constify write_op[] write_op[] is never modified, so make it 'const'. Signed-off-by: Eric Biggers Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index ec8a214a5089..7fb9f7c667b1 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -73,7 +73,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size); #endif -static ssize_t (*write_op[])(struct file *, char *, size_t) = { +static ssize_t (*const write_op[])(struct file *, char *, size_t) = { [NFSD_Fh] = write_filehandle, [NFSD_FO_UnlockIP] = write_unlock_ip, [NFSD_FO_UnlockFS] = write_unlock_fs, -- cgit v1.2.3 From a677a78325b3057ce03b6129f549ece85b0c8dd2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 1 Aug 2018 19:44:05 -0500 Subject: nfsd: use true and false for boolean values Return statements in functions returning bool should use true or false instead of an integer value. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsfh.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index a008e7634181..b319080288c3 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -451,7 +451,7 @@ static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp) switch (fsid_type) { case FSID_DEV: if (!old_valid_dev(exp_sb(exp)->s_dev)) - return 0; + return false; /* FALL THROUGH */ case FSID_MAJOR_MINOR: case FSID_ENCODE_DEV: @@ -461,13 +461,13 @@ static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp) case FSID_UUID8: case FSID_UUID16: if (!is_root_export(exp)) - return 0; + return false; /* fall through */ case FSID_UUID4_INUM: case FSID_UUID16_INUM: return exp->ex_uuid != NULL; } - return 1; + return true; } -- cgit v1.2.3 From 9abdda5ddab8a899ca8c4b859ef0a7710f40e0dd Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 16 Aug 2018 12:05:59 -0400 Subject: sunrpc: Extract target name into svc_cred NFSv4.0 callback needs to know the GSS target name the client used when it established its lease. That information is available from the GSS context created by gssproxy. Make it available in each svc_cred. Note this will also give us access to the real target service principal name (which is typically "nfs", but spec does not require that). Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 7 ++-- include/linux/sunrpc/svcauth.h | 3 ++ net/sunrpc/auth_gss/gss_rpc_upcall.c | 70 +++++++++++++++++++++++------------- 3 files changed, 53 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c8e4a8f42bbe..3322fe35a8fe 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1979,8 +1979,10 @@ static int copy_cred(struct svc_cred *target, struct svc_cred *source) target->cr_principal = kstrdup(source->cr_principal, GFP_KERNEL); target->cr_raw_principal = kstrdup(source->cr_raw_principal, GFP_KERNEL); - if ((source->cr_principal && ! target->cr_principal) || - (source->cr_raw_principal && ! target->cr_raw_principal)) + target->cr_targ_princ = kstrdup(source->cr_targ_princ, GFP_KERNEL); + if ((source->cr_principal && !target->cr_principal) || + (source->cr_raw_principal && !target->cr_raw_principal) || + (source->cr_targ_princ && !target->cr_targ_princ)) return -ENOMEM; target->cr_flavor = source->cr_flavor; @@ -2057,6 +2059,7 @@ same_creds(struct svc_cred *cr1, struct svc_cred *cr2) || (!gid_eq(cr1->cr_gid, cr2->cr_gid)) || !groups_equal(cr1->cr_group_info, cr2->cr_group_info)) return false; + /* XXX: check that cr_targ_princ fields match ? */ if (cr1->cr_principal == cr2->cr_principal) return true; if (!cr1->cr_principal || !cr2->cr_principal) diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 7c3656505847..04e404a07882 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -31,6 +31,7 @@ struct svc_cred { /* name of form servicetype@hostname, passed down by * rpc.svcgssd, or computed from the above: */ char *cr_principal; + char *cr_targ_princ; struct gss_api_mech *cr_gss_mech; }; @@ -39,6 +40,7 @@ static inline void init_svc_cred(struct svc_cred *cred) cred->cr_group_info = NULL; cred->cr_raw_principal = NULL; cred->cr_principal = NULL; + cred->cr_targ_princ = NULL; cred->cr_gss_mech = NULL; } @@ -48,6 +50,7 @@ static inline void free_svc_cred(struct svc_cred *cred) put_group_info(cred->cr_group_info); kfree(cred->cr_raw_principal); kfree(cred->cr_principal); + kfree(cred->cr_targ_princ); gss_mech_put(cred->cr_gss_mech); init_svc_cred(cred); } diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index 1c7c49dbf8ba..73dcda060335 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -234,6 +234,35 @@ static int gssp_alloc_receive_pages(struct gssx_arg_accept_sec_context *arg) return 0; } +static char *gssp_stringify(struct xdr_netobj *netobj) +{ + return kstrndup(netobj->data, netobj->len, GFP_KERNEL); +} + +static void gssp_hostbased_service(char **principal) +{ + char *c; + + if (!*principal) + return; + + /* terminate and remove realm part */ + c = strchr(*principal, '@'); + if (c) { + *c = '\0'; + + /* change service-hostname delimiter */ + c = strchr(*principal, '/'); + if (c) + *c = '@'; + } + if (!c) { + /* not a service principal */ + kfree(*principal); + *principal = NULL; + } +} + /* * Public functions */ @@ -262,6 +291,7 @@ int gssp_accept_sec_context_upcall(struct net *net, */ .exported_context_token.len = GSSX_max_output_handle_sz, .mech.len = GSS_OID_MAX_LEN, + .targ_name.display_name.len = GSSX_max_princ_sz, .src_name.display_name.len = GSSX_max_princ_sz }; struct gssx_res_accept_sec_context res = { @@ -275,6 +305,7 @@ int gssp_accept_sec_context_upcall(struct net *net, .rpc_cred = NULL, /* FIXME ? */ }; struct xdr_netobj client_name = { 0 , NULL }; + struct xdr_netobj target_name = { 0, NULL }; int ret; if (data->in_handle.len != 0) @@ -285,8 +316,6 @@ int gssp_accept_sec_context_upcall(struct net *net, if (ret) return ret; - /* use nfs/ for targ_name ? */ - ret = gssp_call(net, &msg); gssp_free_receive_pages(&arg); @@ -304,6 +333,7 @@ int gssp_accept_sec_context_upcall(struct net *net, kfree(rctxh.mech.data); } client_name = rctxh.src_name.display_name; + target_name = rctxh.targ_name.display_name; } if (res.options.count == 1) { @@ -325,32 +355,22 @@ int gssp_accept_sec_context_upcall(struct net *net, } /* convert to GSS_NT_HOSTBASED_SERVICE form and set into creds */ - if (data->found_creds && client_name.data != NULL) { - char *c; - - data->creds.cr_raw_principal = kstrndup(client_name.data, - client_name.len, GFP_KERNEL); - - data->creds.cr_principal = kstrndup(client_name.data, - client_name.len, GFP_KERNEL); - if (data->creds.cr_principal) { - /* terminate and remove realm part */ - c = strchr(data->creds.cr_principal, '@'); - if (c) { - *c = '\0'; - - /* change service-hostname delimiter */ - c = strchr(data->creds.cr_principal, '/'); - if (c) *c = '@'; - } - if (!c) { - /* not a service principal */ - kfree(data->creds.cr_principal); - data->creds.cr_principal = NULL; - } + if (data->found_creds) { + if (client_name.data) { + data->creds.cr_raw_principal = + gssp_stringify(&client_name); + data->creds.cr_principal = + gssp_stringify(&client_name); + gssp_hostbased_service(&data->creds.cr_principal); + } + if (target_name.data) { + data->creds.cr_targ_princ = + gssp_stringify(&target_name); + gssp_hostbased_service(&data->creds.cr_targ_princ); } } kfree(client_name.data); + kfree(target_name.data); return ret; } -- cgit v1.2.3 From cb25e7b293ec5cd82bc958a6c89dadeb198504b4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 16 Aug 2018 12:06:04 -0400 Subject: nfsd: Use correct credential for NFSv4.0 callback with GSS I've had trouble when operating a multi-homed Linux NFS server with Kerberos using NFSv4.0. Lately, I've seen my clients reporting this (and then hanging): May 9 11:43:26 manet kernel: NFS: NFSv4 callback contains invalid cred The client-side commit f11b2a1cfbf5 ("nfs4: copy acceptor name from context to nfs_client") appears to be related, but I suspect this problem has been going on for some time before that. RFC 7530 Section 3.3.3 says: > For Kerberos V5, nfs/hostname would be a server principal in the > Kerberos Key Distribution Center database. This is the same > principal the client acquired a GSS-API context for when it issued > the SETCLIENTID operation ... In other words, an NFSv4.0 client expects that the server will use the same GSS principal for callback that the client used to establish its lease. For example, if the client used the service principal "nfs@server.domain" to establish its lease, the server is required to use "nfs@server.domain" when performing NFSv4.0 callback operations. The Linux NFS server currently does not. It uses a common service principal for all callback connections. Sometimes this works as expected, and other times -- for example, when the server is accessible via multiple hostnames -- it won't work at all. This patch scrapes the target name from the client credential, and uses that for the NFSv4.0 callback credential. That should be correct much more often. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 144be2dc18d7..ba8e902ec4ad 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -769,7 +769,14 @@ void cleanup_callback_cred(void) static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses) { if (clp->cl_minorversion == 0) { - return get_rpccred(callback_cred); + char *principal = clp->cl_cred.cr_targ_princ ? + clp->cl_cred.cr_targ_princ : "nfs"; + struct rpc_cred *cred; + + cred = rpc_lookup_machine_cred(principal); + if (!IS_ERR(cred)) + get_rpccred(cred); + return cred; } else { struct rpc_auth *auth = client->cl_auth; struct auth_cred acred = {}; -- cgit v1.2.3 From a26dd64f5477968d730cf92868b4092314b8e45e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 16 Aug 2018 12:06:09 -0400 Subject: nfsd: Remove callback_cred Clean up: The global callback_cred is no longer used, so it can be removed. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 20 -------------------- fs/nfsd/nfs4state.c | 10 ++-------- fs/nfsd/state.h | 2 -- 3 files changed, 2 insertions(+), 30 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index ba8e902ec4ad..601bf33c26a0 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -746,26 +746,6 @@ static int max_cb_time(struct net *net) return max(nn->nfsd4_lease/10, (time_t)1) * HZ; } -static struct rpc_cred *callback_cred; - -int set_callback_cred(void) -{ - if (callback_cred) - return 0; - callback_cred = rpc_lookup_machine_cred("nfs"); - if (!callback_cred) - return -ENOMEM; - return 0; -} - -void cleanup_callback_cred(void) -{ - if (callback_cred) { - put_rpccred(callback_cred); - callback_cred = NULL; - } -} - static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses) { if (clp->cl_minorversion == 0) { diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3322fe35a8fe..b0ca0efd2875 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -7231,14 +7231,10 @@ nfs4_state_start(void) { int ret; - ret = set_callback_cred(); - if (ret) - return ret; - laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4"); if (laundry_wq == NULL) { ret = -ENOMEM; - goto out_cleanup_cred; + goto out; } ret = nfsd4_create_callback_queue(); if (ret) @@ -7249,8 +7245,7 @@ nfs4_state_start(void) out_free_laundry: destroy_workqueue(laundry_wq); -out_cleanup_cred: - cleanup_callback_cred(); +out: return ret; } @@ -7287,7 +7282,6 @@ nfs4_state_shutdown(void) { destroy_workqueue(laundry_wq); nfsd4_destroy_callback_queue(); - cleanup_callback_cred(); } static void diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index f3772ea8ba0d..0b15dac7e609 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -617,8 +617,6 @@ extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn); extern __be32 nfs4_check_open_reclaim(clientid_t *clid, struct nfsd4_compound_state *cstate, struct nfsd_net *nn); -extern int set_callback_cred(void); -extern void cleanup_callback_cred(void); extern void nfsd4_probe_callback(struct nfs4_client *clp); extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); -- cgit v1.2.3