diff options
Diffstat (limited to 'fs/nfsd')
-rw-r--r-- | fs/nfsd/Kconfig | 28 | ||||
-rw-r--r-- | fs/nfsd/Makefile | 4 | ||||
-rw-r--r-- | fs/nfsd/blocklayout.c | 298 | ||||
-rw-r--r-- | fs/nfsd/blocklayoutxdr.c | 77 | ||||
-rw-r--r-- | fs/nfsd/blocklayoutxdr.h | 14 | ||||
-rw-r--r-- | fs/nfsd/lockd.c | 2 | ||||
-rw-r--r-- | fs/nfsd/netns.h | 2 | ||||
-rw-r--r-- | fs/nfsd/nfs3proc.c | 7 | ||||
-rw-r--r-- | fs/nfsd/nfs3xdr.c | 6 | ||||
-rw-r--r-- | fs/nfsd/nfs4callback.c | 6 | ||||
-rw-r--r-- | fs/nfsd/nfs4layouts.c | 104 | ||||
-rw-r--r-- | fs/nfsd/nfs4proc.c | 80 | ||||
-rw-r--r-- | fs/nfsd/nfs4recover.c | 47 | ||||
-rw-r--r-- | fs/nfsd/nfs4state.c | 364 | ||||
-rw-r--r-- | fs/nfsd/nfs4xdr.c | 55 | ||||
-rw-r--r-- | fs/nfsd/nfscache.c | 32 | ||||
-rw-r--r-- | fs/nfsd/nfsfh.c | 5 | ||||
-rw-r--r-- | fs/nfsd/nfsfh.h | 47 | ||||
-rw-r--r-- | fs/nfsd/nfssvc.c | 75 | ||||
-rw-r--r-- | fs/nfsd/pnfs.h | 8 | ||||
-rw-r--r-- | fs/nfsd/state.h | 51 | ||||
-rw-r--r-- | fs/nfsd/trace.c | 2 | ||||
-rw-r--r-- | fs/nfsd/trace.h | 43 | ||||
-rw-r--r-- | fs/nfsd/vfs.c | 58 | ||||
-rw-r--r-- | fs/nfsd/vfs.h | 25 | ||||
-rw-r--r-- | fs/nfsd/xdr4.h | 12 |
26 files changed, 1119 insertions, 333 deletions
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index a0b77fc1bd39..c9f583d7bac8 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -84,12 +84,30 @@ config NFSD_V4 If unsure, say N. config NFSD_PNFS - bool "NFSv4.1 server support for Parallel NFS (pNFS)" - depends on NFSD_V4 + bool + +config NFSD_BLOCKLAYOUT + bool "NFSv4.1 server support for pNFS block layouts" + depends on NFSD_V4 && BLOCK + select NFSD_PNFS + help + This option enables support for the exporting pNFS block layouts + in the kernel's NFS server. The pNFS block layout enables NFS + clients to directly perform I/O to block devices accesible to both + the server and the clients. See RFC 5663 for more details. + + If unsure, say N. + +config NFSD_SCSILAYOUT + bool "NFSv4.1 server support for pNFS SCSI layouts" + depends on NFSD_V4 && BLOCK + select NFSD_PNFS help - This option enables support for the parallel NFS features of the - minor version 1 of the NFSv4 protocol (RFC5661) in the kernel's NFS - server. + This option enables support for the exporting pNFS SCSI layouts + in the kernel's NFS server. The pNFS SCSI layout enables NFS + clients to directly perform I/O to SCSI devices accesible to both + the server and the clients. See draft-ietf-nfsv4-scsi-layout for + more details. If unsure, say N. diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index 9a6028e120c6..3ae5f3c77e28 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile @@ -17,4 +17,6 @@ nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ nfs4acl.o nfs4callback.o nfs4recover.o -nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o blocklayout.o blocklayoutxdr.o +nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o +nfsd-$(CONFIG_NFSD_BLOCKLAYOUT) += blocklayout.o blocklayoutxdr.o +nfsd-$(CONFIG_NFSD_SCSILAYOUT) += blocklayout.o blocklayoutxdr.o diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index c29d9421bd5e..e55b5242614d 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -1,11 +1,14 @@ /* - * Copyright (c) 2014 Christoph Hellwig. + * Copyright (c) 2014-2016 Christoph Hellwig. */ #include <linux/exportfs.h> #include <linux/genhd.h> #include <linux/slab.h> +#include <linux/pr.h> #include <linux/nfsd/debug.h> +#include <scsi/scsi_proto.h> +#include <scsi/scsi_common.h> #include "blocklayoutxdr.h" #include "pnfs.h" @@ -13,37 +16,6 @@ #define NFSDDBG_FACILITY NFSDDBG_PNFS -static int -nfsd4_block_get_device_info_simple(struct super_block *sb, - struct nfsd4_getdeviceinfo *gdp) -{ - struct pnfs_block_deviceaddr *dev; - struct pnfs_block_volume *b; - - dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) + - sizeof(struct pnfs_block_volume), GFP_KERNEL); - if (!dev) - return -ENOMEM; - gdp->gd_device = dev; - - dev->nr_volumes = 1; - b = &dev->volumes[0]; - - b->type = PNFS_BLOCK_VOLUME_SIMPLE; - b->simple.sig_len = PNFS_BLOCK_UUID_LEN; - return sb->s_export_op->get_uuid(sb, b->simple.sig, &b->simple.sig_len, - &b->simple.offset); -} - -static __be32 -nfsd4_block_proc_getdeviceinfo(struct super_block *sb, - struct nfsd4_getdeviceinfo *gdp) -{ - if (sb->s_bdev != sb->s_bdev->bd_contains) - return nfserr_inval; - return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp)); -} - static __be32 nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp, struct nfsd4_layoutget *args) @@ -141,20 +113,13 @@ out_layoutunavailable: } static __be32 -nfsd4_block_proc_layoutcommit(struct inode *inode, - struct nfsd4_layoutcommit *lcp) +nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp, + struct iomap *iomaps, int nr_iomaps) { loff_t new_size = lcp->lc_last_wr + 1; struct iattr iattr = { .ia_valid = 0 }; - struct iomap *iomaps; - int nr_iomaps; int error; - nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout, - lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits); - if (nr_iomaps < 0) - return nfserrno(nr_iomaps); - if (lcp->lc_mtime.tv_nsec == UTIME_NOW || timespec_compare(&lcp->lc_mtime, &inode->i_mtime) < 0) lcp->lc_mtime = current_fs_time(inode->i_sb); @@ -172,6 +137,54 @@ nfsd4_block_proc_layoutcommit(struct inode *inode, return nfserrno(error); } +#ifdef CONFIG_NFSD_BLOCKLAYOUT +static int +nfsd4_block_get_device_info_simple(struct super_block *sb, + struct nfsd4_getdeviceinfo *gdp) +{ + struct pnfs_block_deviceaddr *dev; + struct pnfs_block_volume *b; + + dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) + + sizeof(struct pnfs_block_volume), GFP_KERNEL); + if (!dev) + return -ENOMEM; + gdp->gd_device = dev; + + dev->nr_volumes = 1; + b = &dev->volumes[0]; + + b->type = PNFS_BLOCK_VOLUME_SIMPLE; + b->simple.sig_len = PNFS_BLOCK_UUID_LEN; + return sb->s_export_op->get_uuid(sb, b->simple.sig, &b->simple.sig_len, + &b->simple.offset); +} + +static __be32 +nfsd4_block_proc_getdeviceinfo(struct super_block *sb, + struct nfs4_client *clp, + struct nfsd4_getdeviceinfo *gdp) +{ + if (sb->s_bdev != sb->s_bdev->bd_contains) + return nfserr_inval; + return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp)); +} + +static __be32 +nfsd4_block_proc_layoutcommit(struct inode *inode, + struct nfsd4_layoutcommit *lcp) +{ + struct iomap *iomaps; + int nr_iomaps; + + nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout, + lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits); + if (nr_iomaps < 0) + return nfserrno(nr_iomaps); + + return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps); +} + const struct nfsd4_layout_ops bl_layout_ops = { /* * Pretend that we send notification to the client. This is a blatant @@ -190,3 +203,206 @@ const struct nfsd4_layout_ops bl_layout_ops = { .encode_layoutget = nfsd4_block_encode_layoutget, .proc_layoutcommit = nfsd4_block_proc_layoutcommit, }; +#endif /* CONFIG_NFSD_BLOCKLAYOUT */ + +#ifdef CONFIG_NFSD_SCSILAYOUT +static int nfsd4_scsi_identify_device(struct block_device *bdev, + struct pnfs_block_volume *b) +{ + struct request_queue *q = bdev->bd_disk->queue; + struct request *rq; + size_t bufflen = 252, len, id_len; + u8 *buf, *d, type, assoc; + int error; + + buf = kzalloc(bufflen, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + rq = blk_get_request(q, READ, GFP_KERNEL); + if (IS_ERR(rq)) { + error = -ENOMEM; + goto out_free_buf; + } + blk_rq_set_block_pc(rq); + + error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL); + if (error) + goto out_put_request; + + rq->cmd[0] = INQUIRY; + rq->cmd[1] = 1; + rq->cmd[2] = 0x83; + rq->cmd[3] = bufflen >> 8; + rq->cmd[4] = bufflen & 0xff; + rq->cmd_len = COMMAND_SIZE(INQUIRY); + + error = blk_execute_rq(rq->q, NULL, rq, 1); + if (error) { + pr_err("pNFS: INQUIRY 0x83 failed with: %x\n", + rq->errors); + goto out_put_request; + } + + len = (buf[2] << 8) + buf[3] + 4; + if (len > bufflen) { + pr_err("pNFS: INQUIRY 0x83 response invalid (len = %zd)\n", + len); + goto out_put_request; + } + + d = buf + 4; + for (d = buf + 4; d < buf + len; d += id_len + 4) { + id_len = d[3]; + type = d[1] & 0xf; + assoc = (d[1] >> 4) & 0x3; + + /* + * We only care about a EUI-64 and NAA designator types + * with LU association. + */ + if (assoc != 0x00) + continue; + if (type != 0x02 && type != 0x03) + continue; + if (id_len != 8 && id_len != 12 && id_len != 16) + continue; + + b->scsi.code_set = PS_CODE_SET_BINARY; + b->scsi.designator_type = type == 0x02 ? + PS_DESIGNATOR_EUI64 : PS_DESIGNATOR_NAA; + b->scsi.designator_len = id_len; + memcpy(b->scsi.designator, d + 4, id_len); + + /* + * If we found a 8 or 12 byte descriptor continue on to + * see if a 16 byte one is available. If we find a + * 16 byte descriptor we're done. + */ + if (id_len == 16) + break; + } + +out_put_request: + blk_put_request(rq); +out_free_buf: + kfree(buf); + return error; +} + +#define NFSD_MDS_PR_KEY 0x0100000000000000 + +/* + * We use the client ID as a unique key for the reservations. + * This allows us to easily fence a client when recalls fail. + */ +static u64 nfsd4_scsi_pr_key(struct nfs4_client *clp) +{ + return ((u64)clp->cl_clientid.cl_boot << 32) | clp->cl_clientid.cl_id; +} + +static int +nfsd4_block_get_device_info_scsi(struct super_block *sb, + struct nfs4_client *clp, + struct nfsd4_getdeviceinfo *gdp) +{ + struct pnfs_block_deviceaddr *dev; + struct pnfs_block_volume *b; + const struct pr_ops *ops; + int error; + + dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) + + sizeof(struct pnfs_block_volume), GFP_KERNEL); + if (!dev) + return -ENOMEM; + gdp->gd_device = dev; + + dev->nr_volumes = 1; + b = &dev->volumes[0]; + + b->type = PNFS_BLOCK_VOLUME_SCSI; + b->scsi.pr_key = nfsd4_scsi_pr_key(clp); + + error = nfsd4_scsi_identify_device(sb->s_bdev, b); + if (error) + return error; + + ops = sb->s_bdev->bd_disk->fops->pr_ops; + if (!ops) { + pr_err("pNFS: device %s does not support PRs.\n", + sb->s_id); + return -EINVAL; + } + + error = ops->pr_register(sb->s_bdev, 0, NFSD_MDS_PR_KEY, true); + if (error) { + pr_err("pNFS: failed to register key for device %s.\n", + sb->s_id); + return -EINVAL; + } + + error = ops->pr_reserve(sb->s_bdev, NFSD_MDS_PR_KEY, + PR_EXCLUSIVE_ACCESS_REG_ONLY, 0); + if (error) { + pr_err("pNFS: failed to reserve device %s.\n", + sb->s_id); + return -EINVAL; + } + + return 0; +} + +static __be32 +nfsd4_scsi_proc_getdeviceinfo(struct super_block *sb, + struct nfs4_client *clp, + struct nfsd4_getdeviceinfo *gdp) +{ + if (sb->s_bdev != sb->s_bdev->bd_contains) + return nfserr_inval; + return nfserrno(nfsd4_block_get_device_info_scsi(sb, clp, gdp)); +} +static __be32 +nfsd4_scsi_proc_layoutcommit(struct inode *inode, + struct nfsd4_layoutcommit *lcp) +{ + struct iomap *iomaps; + int nr_iomaps; + + nr_iomaps = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout, + lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits); + if (nr_iomaps < 0) + return nfserrno(nr_iomaps); + + return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps); +} + +static void +nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls) +{ + struct nfs4_client *clp = ls->ls_stid.sc_client; + struct block_device *bdev = ls->ls_file->f_path.mnt->mnt_sb->s_bdev; + + bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY, + nfsd4_scsi_pr_key(clp), 0, true); +} + +const struct nfsd4_layout_ops scsi_layout_ops = { + /* + * Pretend that we send notification to the client. This is a blatant + * lie to force recent Linux clients to cache our device IDs. + * We rarely ever change the device ID, so the harm of leaking deviceids + * for a while isn't too bad. Unfortunately RFC5661 is a complete mess + * in this regard, but I filed errata 4119 for this a while ago, and + * hopefully the Linux client will eventually start caching deviceids + * without this again. + */ + .notify_types = + NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE, + .proc_getdeviceinfo = nfsd4_scsi_proc_getdeviceinfo, + .encode_getdeviceinfo = nfsd4_block_encode_getdeviceinfo, + .proc_layoutget = nfsd4_block_proc_layoutget, + .encode_layoutget = nfsd4_block_encode_layoutget, + .proc_layoutcommit = nfsd4_scsi_proc_layoutcommit, + .fence_client = nfsd4_scsi_fence_client, +}; +#endif /* CONFIG_NFSD_SCSILAYOUT */ diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c index 6d834dc9bbc8..6c3b316f932e 100644 --- a/fs/nfsd/blocklayoutxdr.c +++ b/fs/nfsd/blocklayoutxdr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Christoph Hellwig. + * Copyright (c) 2014-2016 Christoph Hellwig. */ #include <linux/sunrpc/svc.h> #include <linux/exportfs.h> @@ -53,6 +53,18 @@ nfsd4_block_encode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b) p = xdr_encode_hyper(p, b->simple.offset); p = xdr_encode_opaque(p, b->simple.sig, b->simple.sig_len); break; + case PNFS_BLOCK_VOLUME_SCSI: + len = 4 + 4 + 4 + 4 + b->scsi.designator_len + 8; + p = xdr_reserve_space(xdr, len); + if (!p) + return -ETOOSMALL; + + *p++ = cpu_to_be32(b->type); + *p++ = cpu_to_be32(b->scsi.code_set); + *p++ = cpu_to_be32(b->scsi.designator_type); + p = xdr_encode_opaque(p, b->scsi.designator, b->scsi.designator_len); + p = xdr_encode_hyper(p, b->scsi.pr_key); + break; default: return -ENOTSUPP; } @@ -93,18 +105,22 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, u32 block_size) { struct iomap *iomaps; - u32 nr_iomaps, expected, i; + u32 nr_iomaps, i; if (len < sizeof(u32)) { dprintk("%s: extent array too small: %u\n", __func__, len); return -EINVAL; } + len -= sizeof(u32); + if (len % PNFS_BLOCK_EXTENT_SIZE) { + dprintk("%s: extent array invalid: %u\n", __func__, len); + return -EINVAL; + } nr_iomaps = be32_to_cpup(p++); - expected = sizeof(__be32) + nr_iomaps * PNFS_BLOCK_EXTENT_SIZE; - if (len != expected) { + if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE) { dprintk("%s: extent array size mismatch: %u/%u\n", - __func__, len, expected); + __func__, len, nr_iomaps); return -EINVAL; } @@ -155,3 +171,54 @@ fail: kfree(iomaps); return -EINVAL; } + +int +nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, + u32 block_size) +{ + struct iomap *iomaps; + u32 nr_iomaps, expected, i; + + if (len < sizeof(u32)) { + dprintk("%s: extent array too small: %u\n", __func__, len); + return -EINVAL; + } + + nr_iomaps = be32_to_cpup(p++); + expected = sizeof(__be32) + nr_iomaps * PNFS_SCSI_RANGE_SIZE; + if (len != expected) { + dprintk("%s: extent array size mismatch: %u/%u\n", + __func__, len, expected); + return -EINVAL; + } + + iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL); + if (!iomaps) { + dprintk("%s: failed to allocate extent array\n", __func__); + return -ENOMEM; + } + + for (i = 0; i < nr_iomaps; i++) { + u64 val; + + p = xdr_decode_hyper(p, &val); + if (val & (block_size - 1)) { + dprintk("%s: unaligned offset 0x%llx\n", __func__, val); + goto fail; + } + iomaps[i].offset = val; + + p = xdr_decode_hyper(p, &val); + if (val & (block_size - 1)) { + dprintk("%s: unaligned length 0x%llx\n", __func__, val); + goto fail; + } + iomaps[i].length = val; + } + + *iomapp = iomaps; + return nr_iomaps; +fail: + kfree(iomaps); + return -EINVAL; +} diff --git a/fs/nfsd/blocklayoutxdr.h b/fs/nfsd/blocklayoutxdr.h index 6de925fe8499..397bc7563a49 100644 --- a/fs/nfsd/blocklayoutxdr.h +++ b/fs/nfsd/blocklayoutxdr.h @@ -15,6 +15,11 @@ struct pnfs_block_extent { enum pnfs_block_extent_state es; }; +struct pnfs_block_range { + u64 foff; + u64 len; +}; + /* * Random upper cap for the uuid length to avoid unbounded allocation. * Not actually limited by the protocol. @@ -29,6 +34,13 @@ struct pnfs_block_volume { u32 sig_len; u8 sig[PNFS_BLOCK_UUID_LEN]; } simple; + struct { + enum scsi_code_set code_set; + enum scsi_designator_type designator_type; + int designator_len; + u8 designator[256]; + u64 pr_key; + } scsi; }; }; @@ -43,5 +55,7 @@ __be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr, struct nfsd4_layoutget *lgp); int nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, u32 block_size); +int nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, + u32 block_size); #endif /* _NFSD_BLOCKLAYOUTXDR_H */ diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c index 77e7a5cca888..1a03bc3059e8 100644 --- a/fs/nfsd/lockd.c +++ b/fs/nfsd/lockd.c @@ -58,7 +58,7 @@ nlm_fclose(struct file *filp) fput(filp); } -static struct nlmsvc_binding nfsd_nlm_ops = { +static const struct nlmsvc_binding nfsd_nlm_ops = { .fopen = nlm_fopen, /* open file for locking */ .fclose = nlm_fclose, /* close file */ }; diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index d8b16c2568f3..5fbf3bbd00d0 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -92,7 +92,7 @@ struct nfsd_net { struct file *rec_file; bool in_grace; - struct nfsd4_client_tracking_ops *client_tracking_ops; + const struct nfsd4_client_tracking_ops *client_tracking_ops; time_t nfsd4_lease; time_t nfsd4_grace; diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 7b755b7f785c..51c3b06e8036 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -147,6 +147,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, { __be32 nfserr; u32 max_blocksize = svc_max_payload(rqstp); + unsigned long cnt = min(argp->count, max_blocksize); dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n", SVCFH_fmt(&argp->fh), @@ -157,7 +158,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof) * + 1 (xdr opaque byte count) = 26 */ - resp->count = min(argp->count, max_blocksize); + resp->count = cnt; svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); fh_copy(&resp->fh, &argp->fh); @@ -167,8 +168,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, &resp->count); if (nfserr == 0) { struct inode *inode = d_inode(resp->fh.fh_dentry); - - resp->eof = (argp->offset + resp->count) >= inode->i_size; + resp->eof = nfsd_eof_on_read(cnt, resp->count, argp->offset, + inode->i_size); } RETURN_STATUS(nfserr); diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index f6e7cbabac5a..2246454dec76 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -262,11 +262,11 @@ void fill_post_wcc(struct svc_fh *fhp) err = fh_getattr(fhp, &fhp->fh_post_attr); fhp->fh_post_change = d_inode(fhp->fh_dentry)->i_version; if (err) { - fhp->fh_post_saved = 0; + fhp->fh_post_saved = false; /* Grab the ctime anyway - set_change_info might use it */ fhp->fh_post_attr.ctime = d_inode(fhp->fh_dentry)->i_ctime; } else - fhp->fh_post_saved = 1; + fhp->fh_post_saved = true; } /* @@ -823,7 +823,7 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, } else dchild = dget(dparent); } else - dchild = lookup_one_len(name, dparent, namlen); + dchild = lookup_one_len_unlocked(name, dparent, namlen); if (IS_ERR(dchild)) return rv; if (d_mountpoint(dchild)) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index e7f50c4081d6..7389cb1d7409 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -792,12 +792,16 @@ static void warn_no_callback_path(struct nfs4_client *clp, int reason) static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason) { + if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags)) + return; clp->cl_cb_state = NFSD4_CB_DOWN; warn_no_callback_path(clp, reason); } static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason) { + if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags)) + return; clp->cl_cb_state = NFSD4_CB_FAULT; warn_no_callback_path(clp, reason); } @@ -1143,7 +1147,7 @@ nfsd4_run_cb_work(struct work_struct *work) } void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, - struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op) + const struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op) { cb->cb_clp = clp; cb->cb_msg.rpc_proc = &nfs4_cb_procedures[op]; diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index ebf90e487c75..825c7bc8d789 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2014 Christoph Hellwig. */ +#include <linux/blkdev.h> #include <linux/kmod.h> #include <linux/file.h> #include <linux/jhash.h> @@ -22,11 +23,16 @@ struct nfs4_layout { static struct kmem_cache *nfs4_layout_cache; static struct kmem_cache *nfs4_layout_stateid_cache; -static struct nfsd4_callback_ops nfsd4_cb_layout_ops; +static const struct nfsd4_callback_ops nfsd4_cb_layout_ops; static const struct lock_manager_operations nfsd4_layouts_lm_ops; const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] = { +#ifdef CONFIG_NFSD_BLOCKLAYOUT [LAYOUT_BLOCK_VOLUME] = &bl_layout_ops, +#endif +#ifdef CONFIG_NFSD_SCSILAYOUT + [LAYOUT_SCSI] = &scsi_layout_ops, +#endif }; /* pNFS device ID to export fsid mapping */ @@ -121,10 +127,24 @@ void nfsd4_setup_layout_type(struct svc_export *exp) if (!(exp->ex_flags & NFSEXP_PNFS)) return; + /* + * Check if the file system supports exporting a block-like layout. + * If the block device supports reservations prefer the SCSI layout, + * otherwise advertise the block layout. + */ +#ifdef CONFIG_NFSD_BLOCKLAYOUT if (sb->s_export_op->get_uuid && sb->s_export_op->map_blocks && sb->s_export_op->commit_blocks) exp->ex_layout_type = LAYOUT_BLOCK_VOLUME; +#endif +#ifdef CONFIG_NFSD_SCSILAYOUT + /* overwrite block layout selection if needed */ + if (sb->s_export_op->map_blocks && + sb->s_export_op->commit_blocks && + sb->s_bdev && sb->s_bdev->bd_disk->fops->pr_ops) + exp->ex_layout_type = LAYOUT_SCSI; +#endif } static void @@ -201,6 +221,7 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate, INIT_LIST_HEAD(&ls->ls_perfile); spin_lock_init(&ls->ls_lock); INIT_LIST_HEAD(&ls->ls_layouts); + mutex_init(&ls->ls_mutex); ls->ls_layout_type = layout_type; nfsd4_init_cb(&ls->ls_recall, clp, &nfsd4_cb_layout_ops, NFSPROC4_CLNT_CB_LAYOUT); @@ -262,19 +283,23 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp, status = nfserr_jukebox; if (!ls) goto out; + mutex_lock(&ls->ls_mutex); } else { ls = container_of(stid, struct nfs4_layout_stateid, ls_stid); status = nfserr_bad_stateid; + mutex_lock(&ls->ls_mutex); if (stateid->si_generation > stid->sc_stateid.si_generation) - goto out_put_stid; + goto out_unlock_stid; if (layout_type != ls->ls_layout_type) - goto out_put_stid; + goto out_unlock_stid; } *lsp = ls; return 0; +out_unlock_stid: + mutex_unlock(&ls->ls_mutex); out_put_stid: nfs4_put_stid(stid); out: @@ -296,8 +321,6 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls) trace_layout_recall(&ls->ls_stid.sc_stateid); atomic_inc(&ls->ls_stid.sc_count); - update_stateid(&ls->ls_stid.sc_stateid); - memcpy(&ls->ls_recall_sid, &ls->ls_stid.sc_stateid, sizeof(stateid_t)); nfsd4_run_cb(&ls->ls_recall); out_unlock: @@ -406,8 +429,7 @@ nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls) list_add_tail(&new->lo_perstate, &ls->ls_layouts); new = NULL; done: - update_stateid(&ls->ls_stid.sc_stateid); - memcpy(&lgp->lg_sid, &ls->ls_stid.sc_stateid, sizeof(stateid_t)); + nfs4_inc_and_copy_stateid(&lgp->lg_sid, &ls->ls_stid); spin_unlock(&ls->ls_lock); out: spin_unlock(&fp->fi_lock); @@ -481,11 +503,8 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp, } } if (!list_empty(&ls->ls_layouts)) { - if (found) { - update_stateid(&ls->ls_stid.sc_stateid); - memcpy(&lrp->lr_sid, &ls->ls_stid.sc_stateid, - sizeof(stateid_t)); - } + if (found) + nfs4_inc_and_copy_stateid(&lrp->lr_sid, &ls->ls_stid); lrp->lrs_present = 1; } else { trace_layoutstate_unhash(&ls->ls_stid.sc_stateid); @@ -494,6 +513,7 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp, } spin_unlock(&ls->ls_lock); + mutex_unlock(&ls->ls_mutex); nfs4_put_stid(&ls->ls_stid); nfsd4_free_layouts(&reaplist); return nfs_ok; @@ -590,8 +610,6 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls) rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str)); - trace_layout_recall_fail(&ls->ls_stid.sc_stateid); - printk(KERN_WARNING "nfsd: client %s failed to respond to layout recall. " " Fencing..\n", addr_str); @@ -608,34 +626,67 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls) } } +static void +nfsd4_cb_layout_prepare(struct nfsd4_callback *cb) +{ + struct nfs4_layout_stateid *ls = + container_of(cb, struct nfs4_layout_stateid, ls_recall); + + mutex_lock(&ls->ls_mutex); + nfs4_inc_and_copy_stateid(&ls->ls_recall_sid, &ls->ls_stid); + mutex_unlock(&ls->ls_mutex); +} + static int nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) { struct nfs4_layout_stateid *ls = container_of(cb, struct nfs4_layout_stateid, ls_recall); + struct nfsd_net *nn; + ktime_t now, cutoff; + const struct nfsd4_layout_ops *ops; LIST_HEAD(reaplist); + switch (task->tk_status) { case 0: - return 1; + case -NFS4ERR_DELAY: + /* + * Anything left? If not, then call it done. Note that we don't + * take the spinlock since this is an optimization and nothing + * should get added until the cb counter goes to zero. + */ + if (list_empty(&ls->ls_layouts)) + return 1; + + /* Poll the client until it's done with the layout */ + now = ktime_get(); + nn = net_generic(ls->ls_stid.sc_client->net, nfsd_net_id); + + /* Client gets 2 lease periods to return it */ + cutoff = ktime_add_ns(task->tk_start, + nn->nfsd4_lease * NSEC_PER_SEC * 2); + + if (ktime_before(now, cutoff)) { + rpc_delay(task, HZ/100); /* 10 mili-seconds */ + return 0; + } + /* Fallthrough */ case -NFS4ERR_NOMATCHING_LAYOUT: trace_layout_recall_done(&ls->ls_stid.sc_stateid); task->tk_status = 0; return 1; - case -NFS4ERR_DELAY: - /* Poll the client until it's done with the layout */ - /* FIXME: cap number of retries. - * The pnfs standard states that we need to only expire - * the client after at-least "lease time" .eg lease-time * 2 - * when failing to communicate a recall - */ - rpc_delay(task, HZ/100); /* 10 mili-seconds */ - return 0; default: /* * Unknown error or non-responding client, we'll need to fence. */ - nfsd4_cb_layout_fail(ls); + trace_layout_recall_fail(&ls->ls_stid.sc_stateid); + + ops = nfsd4_layout_ops[ls->ls_layout_type]; + if (ops->fence_client) + ops->fence_client(ls); + else + nfsd4_cb_layout_fail(ls); return -1; } } @@ -654,7 +705,8 @@ nfsd4_cb_layout_release(struct nfsd4_callback *cb) nfs4_put_stid(&ls->ls_stid); } -static struct nfsd4_callback_ops nfsd4_cb_layout_ops = { +static const struct nfsd4_callback_ops nfsd4_cb_layout_ops = { + .prepare = nfsd4_cb_layout_prepare, .done = nfsd4_cb_layout_done, .release = nfsd4_cb_layout_release, }; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 4ce6b97b31ad..de1ff1d98bb1 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -55,10 +55,10 @@ nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u struct inode *inode = d_inode(resfh->fh_dentry); int status; - mutex_lock(&inode->i_mutex); + inode_lock(inode); status = security_inode_setsecctx(resfh->fh_dentry, label->data, label->len); - mutex_unlock(&inode->i_mutex); + inode_unlock(inode); if (status) /* @@ -774,8 +774,9 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); /* check stateid */ - status = nfs4_preprocess_stateid_op(rqstp, cstate, &read->rd_stateid, - RD_STATE, &read->rd_filp, &read->rd_tmp_file); + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, + &read->rd_stateid, RD_STATE, + &read->rd_filp, &read->rd_tmp_file); if (status) { dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); goto out; @@ -863,12 +864,10 @@ static __be32 nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_secinfo *secinfo) { - struct svc_fh resfh; struct svc_export *exp; struct dentry *dentry; __be32 err; - fh_init(&resfh, NFS4_FHSIZE); err = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, NFSD_MAY_EXEC); if (err) return err; @@ -877,6 +876,7 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &exp, &dentry); if (err) return err; + fh_unlock(&cstate->current_fh); if (d_really_is_negative(dentry)) { exp_put(exp); err = nfserr_noent; @@ -921,7 +921,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { status = nfs4_preprocess_stateid_op(rqstp, cstate, - &setattr->sa_stateid, WR_STATE, NULL, NULL); + &cstate->current_fh, &setattr->sa_stateid, + WR_STATE, NULL, NULL); if (status) { dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); return status; @@ -985,8 +986,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (write->wr_offset >= OFFSET_MAX) return nfserr_inval; - status = nfs4_preprocess_stateid_op(rqstp, cstate, stateid, WR_STATE, - &filp, NULL); + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, + stateid, WR_STATE, &filp, NULL); if (status) { dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); return status; @@ -1010,13 +1011,54 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } static __be32 +nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_clone *clone) +{ + struct file *src, *dst; + __be32 status; + + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh, + &clone->cl_src_stateid, RD_STATE, + &src, NULL); + if (status) { + dprintk("NFSD: %s: couldn't process src stateid!\n", __func__); + goto out; + } + + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, + &clone->cl_dst_stateid, WR_STATE, + &dst, NULL); + if (status) { + dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__); + goto out_put_src; + } + + /* fix up for NFS-specific error code */ + if (!S_ISREG(file_inode(src)->i_mode) || + !S_ISREG(file_inode(dst)->i_mode)) { + status = nfserr_wrong_type; + goto out_put_dst; + } + + status = nfsd4_clone_file_range(src, clone->cl_src_pos, + dst, clone->cl_dst_pos, clone->cl_count); + +out_put_dst: + fput(dst); +out_put_src: + fput(src); +out: + return status; +} + +static __be32 nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_fallocate *fallocate, int flags) { __be32 status = nfserr_notsupp; struct file *file; - status = nfs4_preprocess_stateid_op(rqstp, cstate, + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &fallocate->falloc_stateid, WR_STATE, &file, NULL); if (status != nfs_ok) { @@ -1055,7 +1097,7 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status; struct file *file; - status = nfs4_preprocess_stateid_op(rqstp, cstate, + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &seek->seek_stateid, RD_STATE, &file, NULL); if (status) { @@ -1226,8 +1268,10 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp, goto out; nfserr = nfs_ok; - if (gdp->gd_maxcount != 0) - nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp); + if (gdp->gd_maxcount != 0) { + nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, + cstate->session->se_client, gdp); + } gdp->gd_notify_types &= ops->notify_types; out: @@ -1309,6 +1353,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp, nfserr = nfsd4_insert_layout(lgp, ls); out_put_stid: + mutex_unlock(&ls->ls_mutex); nfs4_put_stid(&ls->ls_stid); out: return nfserr; @@ -1362,6 +1407,9 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp, goto out; } + /* LAYOUTCOMMIT does not require any serialization */ + mutex_unlock(&ls->ls_mutex); + if (new_size > i_size_read(inode)) { lcp->lc_size_chg = 1; lcp->lc_newsize = new_size; @@ -2275,6 +2323,12 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_name = "OP_DEALLOCATE", .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, + [OP_CLONE] = { + .op_func = (nfsd4op_func)nfsd4_clone, + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_name = "OP_CLONE", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, + }, [OP_SEEK] = { .op_func = (nfsd4op_func)nfsd4_seek, .op_name = "OP_SEEK", diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index e3d47091b191..66eaeb1e8c2c 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -32,10 +32,10 @@ * */ +#include <crypto/hash.h> #include <linux/file.h> #include <linux/slab.h> #include <linux/namei.h> -#include <linux/crypto.h> #include <linux/sched.h> #include <linux/fs.h> #include <linux/module.h> @@ -104,29 +104,35 @@ static int nfs4_make_rec_clidname(char *dname, const struct xdr_netobj *clname) { struct xdr_netobj cksum; - struct hash_desc desc; - struct scatterlist sg; + struct crypto_shash *tfm; int status; dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n", clname->len, clname->data); - desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; - desc.tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(desc.tfm)) { - status = PTR_ERR(desc.tfm); + tfm = crypto_alloc_shash("md5", 0, 0); + if (IS_ERR(tfm)) { + status = PTR_ERR(tfm); goto out_no_tfm; } - cksum.len = crypto_hash_digestsize(desc.tfm); + cksum.len = crypto_shash_digestsize(tfm); cksum.data = kmalloc(cksum.len, GFP_KERNEL); if (cksum.data == NULL) { status = -ENOMEM; goto out; } - sg_init_one(&sg, clname->data, clname->len); + { + SHASH_DESC_ON_STACK(desc, tfm); + + desc->tfm = tfm; + desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; + + status = crypto_shash_digest(desc, clname->data, clname->len, + cksum.data); + shash_desc_zero(desc); + } - status = crypto_hash_digest(&desc, &sg, sg.length, cksum.data); if (status) goto out; @@ -135,7 +141,7 @@ nfs4_make_rec_clidname(char *dname, const struct xdr_netobj *clname) status = 0; out: kfree(cksum.data); - crypto_free_hash(desc.tfm); + crypto_free_shash(tfm); out_no_tfm: return status; } @@ -192,7 +198,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) dir = nn->rec_file->f_path.dentry; /* lock the parent */ - mutex_lock(&d_inode(dir)->i_mutex); + inode_lock(d_inode(dir)); dentry = lookup_one_len(dname, dir, HEXDIR_LEN-1); if (IS_ERR(dentry)) { @@ -213,7 +219,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) out_put: dput(dentry); out_unlock: - mutex_unlock(&d_inode(dir)->i_mutex); + inode_unlock(d_inode(dir)); if (status == 0) { if (nn->in_grace) { crp = nfs4_client_to_reclaim(dname, nn); @@ -286,7 +292,7 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) } status = iterate_dir(nn->rec_file, &ctx.ctx); - mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT); + inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); list_for_each_entry_safe(entry, tmp, &ctx.names, list) { if (!status) { @@ -302,7 +308,7 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) list_del(&entry->list); kfree(entry); } - mutex_unlock(&d_inode(dir)->i_mutex); + inode_unlock(d_inode(dir)); nfs4_reset_creds(original_cred); list_for_each_entry_safe(entry, tmp, &ctx.names, list) { @@ -322,7 +328,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn) dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name); dir = nn->rec_file->f_path.dentry; - mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT); + inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); dentry = lookup_one_len(name, dir, namlen); if (IS_ERR(dentry)) { status = PTR_ERR(dentry); @@ -335,7 +341,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn) out: dput(dentry); out_unlock: - mutex_unlock(&d_inode(dir)->i_mutex); + inode_unlock(d_inode(dir)); return status; } @@ -631,7 +637,7 @@ nfsd4_check_legacy_client(struct nfs4_client *clp) return -ENOENT; } -static struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = { +static const struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = { .init = nfsd4_legacy_tracking_init, .exit = nfsd4_legacy_tracking_exit, .create = nfsd4_create_clid_dir, @@ -1050,7 +1056,7 @@ out_err: printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret); } -static struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = { +static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = { .init = nfsd4_init_cld_pipe, .exit = nfsd4_remove_cld_pipe, .create = nfsd4_cld_create, @@ -1260,6 +1266,7 @@ nfsd4_umh_cltrack_init(struct net *net) /* XXX: The usermode helper s not working in container yet. */ if (net != &init_net) { pr_warn("NFSD: attempt to initialize umh client tracking in a container ignored.\n"); + kfree(grace_start); return -EINVAL; } @@ -1394,7 +1401,7 @@ nfsd4_umh_cltrack_grace_done(struct nfsd_net *nn) kfree(legacy); } -static struct nfsd4_client_tracking_ops nfsd4_umh_tracking_ops = { +static const struct nfsd4_client_tracking_ops nfsd4_umh_tracking_ops = { .init = nfsd4_umh_cltrack_init, .exit = NULL, .create = nfsd4_umh_cltrack_create, diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 0f1d5691b795..0462eeddfff9 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -98,7 +98,7 @@ static struct kmem_cache *odstate_slab; static void free_session(struct nfsd4_session *); -static struct nfsd4_callback_ops nfsd4_cb_recall_ops; +static const struct nfsd4_callback_ops nfsd4_cb_recall_ops; static bool is_session_dead(struct nfsd4_session *ses) { @@ -575,6 +575,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; /* Will be incremented before return to client: */ atomic_set(&stid->sc_count, 1); + spin_lock_init(&stid->sc_lock); /* * It shouldn't be a problem to reuse an opaque stateid value. @@ -745,6 +746,18 @@ nfs4_put_stid(struct nfs4_stid *s) put_nfs4_file(fp); } +void +nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid) +{ + stateid_t *src = &stid->sc_stateid; + + spin_lock(&stid->sc_lock); + if (unlikely(++src->si_generation == 0)) + src->si_generation = 1; + memcpy(dst, src, sizeof(*dst)); + spin_unlock(&stid->sc_lock); +} + static void nfs4_put_deleg_lease(struct nfs4_file *fp) { struct file *filp = NULL; @@ -765,16 +778,68 @@ void nfs4_unhash_stid(struct nfs4_stid *s) s->sc_type = 0; } -static void +/** + * nfs4_get_existing_delegation - Discover if this delegation already exists + * @clp: a pointer to the nfs4_client we're granting a delegation to + * @fp: a pointer to the nfs4_file we're granting a delegation on + * + * Return: + * On success: NULL if an existing delegation was not found. + * + * On error: -EAGAIN if one was previously granted to this nfs4_client + * for this nfs4_file. + * + */ + +static int +nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp) +{ + struct nfs4_delegation *searchdp = NULL; + struct nfs4_client *searchclp = NULL; + + lockdep_assert_held(&state_lock); + lockdep_assert_held(&fp->fi_lock); + + list_for_each_entry(searchdp, &fp->fi_delegations, dl_perfile) { + searchclp = searchdp->dl_stid.sc_client; + if (clp == searchclp) { + return -EAGAIN; + } + } + return 0; +} + +/** + * hash_delegation_locked - Add a delegation to the appropriate lists + * @dp: a pointer to the nfs4_delegation we are adding. + * @fp: a pointer to the nfs4_file we're granting a delegation on + * + * Return: + * On success: NULL if the delegation was successfully hashed. + * + * On error: -EAGAIN if one was previously granted to this + * nfs4_client for this nfs4_file. Delegation is not hashed. + * + */ + +static int hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) { + int status; + struct nfs4_client *clp = dp->dl_stid.sc_client; + lockdep_assert_held(&state_lock); lockdep_assert_held(&fp->fi_lock); + status = nfs4_get_existing_delegation(clp, fp); + if (status) + return status; + ++fp->fi_delegees; atomic_inc(&dp->dl_stid.sc_count); dp->dl_stid.sc_type = NFS4_DELEG_STID; list_add(&dp->dl_perfile, &fp->fi_delegations); - list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); + list_add(&dp->dl_perclnt, &clp->cl_delegations); + return 0; } static bool @@ -1792,15 +1857,28 @@ static void copy_clid(struct nfs4_client *target, struct nfs4_client *source) target->cl_clientid.cl_id = source->cl_clientid.cl_id; } -static int copy_cred(struct svc_cred *target, struct svc_cred *source) +int strdup_if_nonnull(char **target, char *source) { - if (source->cr_principal) { - target->cr_principal = - kstrdup(source->cr_principal, GFP_KERNEL); - if (target->cr_principal == NULL) + if (source) { + *target = kstrdup(source, GFP_KERNEL); + if (!*target) return -ENOMEM; } else - target->cr_principal = NULL; + *target = NULL; + return 0; +} + +static int copy_cred(struct svc_cred *target, struct svc_cred *source) +{ + int ret; + + ret = strdup_if_nonnull(&target->cr_principal, source->cr_principal); + if (ret) + return ret; + ret = strdup_if_nonnull(&target->cr_raw_principal, + source->cr_raw_principal); + if (ret) + return ret; target->cr_flavor = source->cr_flavor; target->cr_uid = source->cr_uid; target->cr_gid = source->cr_gid; @@ -1904,6 +1982,9 @@ static bool mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp) return false; if (!svc_rqst_integrity_protected(rqstp)) return false; + if (cl->cl_cred.cr_raw_principal) + return 0 == strcmp(cl->cl_cred.cr_raw_principal, + cr->cr_raw_principal); if (!cr->cr_principal) return false; return 0 == strcmp(cl->cl_cred.cr_principal, cr->cr_principal); @@ -2175,7 +2256,8 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) base = resp->cstate.data_offset; slot->sl_datalen = buf->len - base; if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen)) - WARN("%s: sessions DRC could not cache compound\n", __func__); + WARN(1, "%s: sessions DRC could not cache compound\n", + __func__); return; } @@ -2256,15 +2338,20 @@ nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid) clid->flags = new->cl_exchange_flags; } +static bool client_has_openowners(struct nfs4_client *clp) +{ + struct nfs4_openowner *oo; + + list_for_each_entry(oo, &clp->cl_openowners, oo_perclient) { + if (!list_empty(&oo->oo_owner.so_stateids)) + return true; + } + return false; +} + static bool client_has_state(struct nfs4_client *clp) { - /* - * Note clp->cl_openowners check isn't quite right: there's no - * need to count owners without stateid's. - * - * Also note we should probably be using this in 4.0 case too. - */ - return !list_empty(&clp->cl_openowners) + return client_has_openowners(clp) #ifdef CONFIG_NFSD_PNFS || !list_empty(&clp->cl_lo_states) #endif @@ -2295,22 +2382,36 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, if (exid->flags & ~EXCHGID4_FLAG_MASK_A) return nfserr_inval; + new = create_client(exid->clname, rqstp, &verf); + if (new == NULL) + return nfserr_jukebox; + switch (exid->spa_how) { case SP4_MACH_CRED: - if (!svc_rqst_integrity_protected(rqstp)) - return nfserr_inval; + if (!svc_rqst_integrity_protected(rqstp)) { + status = nfserr_inval; + goto out_nolock; + } + /* + * Sometimes userspace doesn't give us a principal. + * Which is a bug, really. Anyway, we can't enforce + * MACH_CRED in that case, better to give up now: + */ + if (!new->cl_cred.cr_principal && + !new->cl_cred.cr_raw_principal) { + status = nfserr_serverfault; + goto out_nolock; + } + new->cl_mach_cred = true; case SP4_NONE: break; default: /* checked by xdr code */ WARN_ON_ONCE(1); case SP4_SSV: - return nfserr_encr_alg_unsupp; + status = nfserr_encr_alg_unsupp; + goto out_nolock; } - new = create_client(exid->clname, rqstp, &verf); - if (new == NULL) - return nfserr_jukebox; - /* Cases below refer to rfc 5661 section 18.35.4: */ spin_lock(&nn->client_lock); conf = find_confirmed_client_by_name(&exid->clname, nn); @@ -2372,7 +2473,6 @@ out_new: goto out; } new->cl_minorversion = cstate->minorversion; - new->cl_mach_cred = (exid->spa_how == SP4_MACH_CRED); gen_clid(new, nn); add_to_unconfirmed(new); @@ -2390,6 +2490,7 @@ out_copy: out: spin_unlock(&nn->client_lock); +out_nolock: if (new) expire_client(new); if (unconf) @@ -2486,21 +2587,26 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs return nfs_ok; } +/* + * Server's NFSv4.1 backchannel support is AUTH_SYS-only for now. + * These are based on similar macros in linux/sunrpc/msg_prot.h . + */ +#define RPC_MAX_HEADER_WITH_AUTH_SYS \ + (RPC_CALLHDRSIZE + 2 * (2 + UNX_CALLSLACK)) + +#define RPC_MAX_REPHEADER_WITH_AUTH_SYS \ + (RPC_REPHDRSIZE + (2 + NUL_REPLYSLACK)) + #define NFSD_CB_MAX_REQ_SZ ((NFS4_enc_cb_recall_sz + \ - RPC_MAX_HEADER_WITH_AUTH) * sizeof(__be32)) + RPC_MAX_HEADER_WITH_AUTH_SYS) * sizeof(__be32)) #define NFSD_CB_MAX_RESP_SZ ((NFS4_dec_cb_recall_sz + \ - RPC_MAX_REPHEADER_WITH_AUTH) * sizeof(__be32)) + RPC_MAX_REPHEADER_WITH_AUTH_SYS) * \ + sizeof(__be32)) static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca) { ca->headerpadsz = 0; - /* - * These RPC_MAX_HEADER macros are overkill, especially since we - * don't even do gss on the backchannel yet. But this is still - * less than 1k. Tighten up this estimate in the unlikely event - * it turns out to be a problem for some client: - */ if (ca->maxreq_sz < NFSD_CB_MAX_REQ_SZ) return nfserr_toosmall; if (ca->maxresp_sz < NFSD_CB_MAX_RESP_SZ) @@ -2610,10 +2716,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, goto out_free_conn; } status = nfs_ok; - /* - * We do not support RDMA or persistent sessions - */ + /* Persistent sessions are not supported */ cr_ses->flags &= ~SESSION4_PERSIST; + /* Upshifting from TCP to RDMA is not supported */ cr_ses->flags &= ~SESSION4_RDMA; init_session(rqstp, new, conf, cr_ses); @@ -3049,7 +3154,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* Cases below refer to rfc 3530 section 14.2.33: */ spin_lock(&nn->client_lock); conf = find_confirmed_client_by_name(&clname, nn); - if (conf) { + if (conf && client_has_state(conf)) { /* case 0: */ status = nfserr_clid_inuse; if (clp_used_exchangeid(conf)) @@ -3136,6 +3241,11 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, } else { /* case 3: normal case; new or rebooted client */ old = find_confirmed_client_by_name(&unconf->cl_name, nn); if (old) { + status = nfserr_clid_inuse; + if (client_has_state(old) + && !same_creds(&unconf->cl_cred, + &old->cl_cred)) + goto out; status = mark_client_expired_locked(old); if (status) { old = NULL; @@ -3317,6 +3427,27 @@ static const struct nfs4_stateowner_operations openowner_ops = { .so_free = nfs4_free_openowner, }; +static struct nfs4_ol_stateid * +nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) +{ + struct nfs4_ol_stateid *local, *ret = NULL; + struct nfs4_openowner *oo = open->op_openowner; + + lockdep_assert_held(&fp->fi_lock); + + list_for_each_entry(local, &fp->fi_stateids, st_perfile) { + /* ignore lock owners */ + if (local->st_stateowner->so_is_open_owner == 0) + continue; + if (local->st_stateowner == &oo->oo_owner) { + ret = local; + atomic_inc(&ret->st_stid.sc_count); + break; + } + } + return ret; +} + static struct nfs4_openowner * alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, struct nfsd4_compound_state *cstate) @@ -3348,9 +3479,20 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, return ret; } -static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { +static struct nfs4_ol_stateid * +init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, + struct nfsd4_open *open) +{ + struct nfs4_openowner *oo = open->op_openowner; + struct nfs4_ol_stateid *retstp = NULL; + spin_lock(&oo->oo_owner.so_client->cl_lock); + spin_lock(&fp->fi_lock); + + retstp = nfsd4_find_existing_open(fp, open); + if (retstp) + goto out_unlock; atomic_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = NFS4_OPEN_STID; INIT_LIST_HEAD(&stp->st_locks); @@ -3360,12 +3502,14 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, stp->st_access_bmap = 0; stp->st_deny_bmap = 0; stp->st_openstp = NULL; - spin_lock(&oo->oo_owner.so_client->cl_lock); + init_rwsem(&stp->st_rwsem); list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); - spin_lock(&fp->fi_lock); list_add(&stp->st_perfile, &fp->fi_stateids); + +out_unlock: spin_unlock(&fp->fi_lock); spin_unlock(&oo->oo_owner.so_client->cl_lock); + return retstp; } /* @@ -3539,7 +3683,7 @@ static void nfsd4_cb_recall_release(struct nfsd4_callback *cb) nfs4_put_stid(&dp->dl_stid); } -static struct nfsd4_callback_ops nfsd4_cb_recall_ops = { +static const struct nfsd4_callback_ops nfsd4_cb_recall_ops = { .prepare = nfsd4_cb_recall_prepare, .done = nfsd4_cb_recall_done, .release = nfsd4_cb_recall_release, @@ -3776,27 +3920,6 @@ out: return nfs_ok; } -static struct nfs4_ol_stateid * -nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) -{ - struct nfs4_ol_stateid *local, *ret = NULL; - struct nfs4_openowner *oo = open->op_openowner; - - spin_lock(&fp->fi_lock); - list_for_each_entry(local, &fp->fi_stateids, st_perfile) { - /* ignore lock owners */ - if (local->st_stateowner->so_is_open_owner == 0) - continue; - if (local->st_stateowner == &oo->oo_owner) { - ret = local; - atomic_inc(&ret->st_stid.sc_count); - break; - } - } - spin_unlock(&fp->fi_lock); - return ret; -} - static inline int nfs4_access_to_access(u32 nfs4_access) { int flags = 0; @@ -3945,6 +4068,18 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag) return fl; } +/** + * nfs4_setlease - Obtain a delegation by requesting lease from vfs layer + * @dp: a pointer to the nfs4_delegation we're adding. + * + * Return: + * On success: Return code will be 0 on success. + * + * On error: -EAGAIN if there was an existing delegation. + * nonzero if there is an error in other cases. + * + */ + static int nfs4_setlease(struct nfs4_delegation *dp) { struct nfs4_file *fp = dp->dl_stid.sc_file; @@ -3976,16 +4111,19 @@ static int nfs4_setlease(struct nfs4_delegation *dp) goto out_unlock; /* Race breaker */ if (fp->fi_deleg_file) { - status = 0; - ++fp->fi_delegees; - hash_delegation_locked(dp, fp); + status = hash_delegation_locked(dp, fp); goto out_unlock; } fp->fi_deleg_file = filp; - fp->fi_delegees = 1; - hash_delegation_locked(dp, fp); + fp->fi_delegees = 0; + status = hash_delegation_locked(dp, fp); spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); + if (status) { + /* Should never happen, this is a new fi_deleg_file */ + WARN_ON_ONCE(1); + goto out_fput; + } return 0; out_unlock: spin_unlock(&fp->fi_lock); @@ -4005,6 +4143,15 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, if (fp->fi_had_conflict) return ERR_PTR(-EAGAIN); + spin_lock(&state_lock); + spin_lock(&fp->fi_lock); + status = nfs4_get_existing_delegation(clp, fp); + spin_unlock(&fp->fi_lock); + spin_unlock(&state_lock); + + if (status) + return ERR_PTR(status); + dp = alloc_init_deleg(clp, fh, odstate); if (!dp) return ERR_PTR(-ENOMEM); @@ -4023,9 +4170,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, status = -EAGAIN; goto out_unlock; } - ++fp->fi_delegees; - hash_delegation_locked(dp, fp); - status = 0; + status = hash_delegation_locked(dp, fp); out_unlock: spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); @@ -4160,6 +4305,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf struct nfs4_client *cl = open->op_openowner->oo_owner.so_client; struct nfs4_file *fp = NULL; struct nfs4_ol_stateid *stp = NULL; + struct nfs4_ol_stateid *swapstp = NULL; struct nfs4_delegation *dp = NULL; __be32 status; @@ -4173,7 +4319,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf status = nfs4_check_deleg(cl, open, &dp); if (status) goto out; + spin_lock(&fp->fi_lock); stp = nfsd4_find_existing_open(fp, open); + spin_unlock(&fp->fi_lock); } else { open->op_file = NULL; status = nfserr_bad_stateid; @@ -4187,15 +4335,32 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf */ if (stp) { /* Stateid was found, this is an OPEN upgrade */ + down_read(&stp->st_rwsem); status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); - if (status) + if (status) { + up_read(&stp->st_rwsem); goto out; + } } else { stp = open->op_stp; open->op_stp = NULL; - init_open_stateid(stp, fp, open); + swapstp = init_open_stateid(stp, fp, open); + if (swapstp) { + nfs4_put_stid(&stp->st_stid); + stp = swapstp; + down_read(&stp->st_rwsem); + status = nfs4_upgrade_open(rqstp, fp, current_fh, + stp, open); + if (status) { + up_read(&stp->st_rwsem); + goto out; + } + goto upgrade_out; + } + down_read(&stp->st_rwsem); status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); if (status) { + up_read(&stp->st_rwsem); release_open_stateid(stp); goto out; } @@ -4205,8 +4370,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf if (stp->st_clnt_odstate == open->op_odstate) open->op_odstate = NULL; } - update_stateid(&stp->st_stid.sc_stateid); - memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); +upgrade_out: + nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid); + up_read(&stp->st_rwsem); if (nfsd4_has_session(&resp->cstate)) { if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) { @@ -4410,8 +4576,7 @@ static void laundromat_main(struct work_struct *laundry) { time_t t; - struct delayed_work *dwork = container_of(laundry, struct delayed_work, - work); + struct delayed_work *dwork = to_delayed_work(laundry); struct nfsd_net *nn = container_of(dwork, struct nfsd_net, laundromat_work); @@ -4666,10 +4831,9 @@ nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s, */ __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, - struct nfsd4_compound_state *cstate, stateid_t *stateid, - int flags, struct file **filpp, bool *tmp_file) + struct nfsd4_compound_state *cstate, struct svc_fh *fhp, + stateid_t *stateid, int flags, struct file **filpp, bool *tmp_file) { - struct svc_fh *fhp = &cstate->current_fh; struct inode *ino = d_inode(fhp->fh_dentry); struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); @@ -4819,10 +4983,13 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ * revoked delegations are kept only for free_stateid. */ return nfserr_bad_stateid; + down_write(&stp->st_rwsem); status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); - if (status) - return status; - return nfs4_check_fh(current_fh, &stp->st_stid); + if (status == nfs_ok) + status = nfs4_check_fh(current_fh, &stp->st_stid); + if (status != nfs_ok) + up_write(&stp->st_rwsem); + return status; } /* @@ -4869,6 +5036,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs return status; oo = openowner(stp->st_stateowner); if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { + up_write(&stp->st_rwsem); nfs4_put_stid(&stp->st_stid); return nfserr_bad_stateid; } @@ -4899,11 +5067,13 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; oo = openowner(stp->st_stateowner); status = nfserr_bad_stateid; - if (oo->oo_flags & NFS4_OO_CONFIRMED) + if (oo->oo_flags & NFS4_OO_CONFIRMED) { + up_write(&stp->st_rwsem); goto put_stateid; + } oo->oo_flags |= NFS4_OO_CONFIRMED; - update_stateid(&stp->st_stid.sc_stateid); - memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + nfs4_inc_and_copy_stateid(&oc->oc_resp_stateid, &stp->st_stid); + up_write(&stp->st_rwsem); dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n", __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid)); @@ -4975,13 +5145,11 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, goto put_stateid; } nfs4_stateid_downgrade(stp, od->od_share_access); - reset_union_bmap_deny(od->od_share_deny, stp); - - update_stateid(&stp->st_stid.sc_stateid); - memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + nfs4_inc_and_copy_stateid(&od->od_stateid, &stp->st_stid); status = nfs_ok; put_stateid: + up_write(&stp->st_rwsem); nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); @@ -5033,8 +5201,8 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_bump_seqid(cstate, status); if (status) goto out; - update_stateid(&stp->st_stid.sc_stateid); - memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid); + up_write(&stp->st_rwsem); nfsd4_close_open_stateid(stp); @@ -5260,6 +5428,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; + init_rwsem(&stp->st_rwsem); list_add(&stp->st_locks, &open_stp->st_locks); list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); spin_lock(&fp->fi_lock); @@ -5428,6 +5597,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &open_stp, nn); if (status) goto out; + up_write(&open_stp->st_rwsem); open_sop = openowner(open_stp->st_stateowner); status = nfserr_bad_stateid; if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid, @@ -5435,6 +5605,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; status = lookup_or_create_lock_state(cstate, open_stp, lock, &lock_stp, &new); + if (status == nfs_ok) + down_write(&lock_stp->st_rwsem); } else { status = nfs4_preprocess_seqid_op(cstate, lock->lk_old_lock_seqid, @@ -5512,9 +5684,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, err = vfs_lock_file(filp, F_SETLK, file_lock, conflock); switch (-err) { case 0: /* success! */ - update_stateid(&lock_stp->st_stid.sc_stateid); - memcpy(&lock->lk_resp_stateid, &lock_stp->st_stid.sc_stateid, - sizeof(stateid_t)); + nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid); status = 0; break; case (EAGAIN): /* conflock holds conflicting lock */ @@ -5540,6 +5710,8 @@ out: seqid_mutating_err(ntohl(status))) lock_sop->lo_owner.so_seqid++; + up_write(&lock_stp->st_rwsem); + /* * If this is a new, never-before-used stateid, and we are * returning an error, then just go ahead and release it. @@ -5704,11 +5876,11 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n"); goto out_nfserr; } - update_stateid(&stp->st_stid.sc_stateid); - memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + nfs4_inc_and_copy_stateid(&locku->lu_stateid, &stp->st_stid); fput: fput(filp); put_stateid: + up_write(&stp->st_rwsem); nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 51c9e9ca39a4..9df898ba648f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1072,8 +1072,9 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename READ_BUF(4); rename->rn_snamelen = be32_to_cpup(p++); - READ_BUF(rename->rn_snamelen + 4); + READ_BUF(rename->rn_snamelen); SAVEMEM(rename->rn_sname, rename->rn_snamelen); + READ_BUF(4); rename->rn_tnamelen = be32_to_cpup(p++); READ_BUF(rename->rn_tnamelen); SAVEMEM(rename->rn_tname, rename->rn_tnamelen); @@ -1155,13 +1156,14 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient READ_BUF(8); setclientid->se_callback_prog = be32_to_cpup(p++); setclientid->se_callback_netid_len = be32_to_cpup(p++); - - READ_BUF(setclientid->se_callback_netid_len + 4); + READ_BUF(setclientid->se_callback_netid_len); SAVEMEM(setclientid->se_callback_netid_val, setclientid->se_callback_netid_len); + READ_BUF(4); setclientid->se_callback_addr_len = be32_to_cpup(p++); - READ_BUF(setclientid->se_callback_addr_len + 4); + READ_BUF(setclientid->se_callback_addr_len); SAVEMEM(setclientid->se_callback_addr_val, setclientid->se_callback_addr_len); + READ_BUF(4); setclientid->se_callback_ident = be32_to_cpup(p++); DECODE_TAIL; @@ -1675,6 +1677,25 @@ nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp, } static __be32 +nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone) +{ + DECODE_HEAD; + + status = nfsd4_decode_stateid(argp, &clone->cl_src_stateid); + if (status) + return status; + status = nfsd4_decode_stateid(argp, &clone->cl_dst_stateid); + if (status) + return status; + + READ_BUF(8 + 8 + 8); + p = xdr_decode_hyper(p, &clone->cl_src_pos); + p = xdr_decode_hyper(p, &clone->cl_dst_pos); + p = xdr_decode_hyper(p, &clone->cl_count); + DECODE_TAIL; +} + +static __be32 nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) { DECODE_HEAD; @@ -1785,6 +1806,7 @@ static nfsd4_dec nfsd4_dec_ops[] = { [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek, [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone, }; static inline bool @@ -1815,8 +1837,9 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) READ_BUF(4); argp->taglen = be32_to_cpup(p++); - READ_BUF(argp->taglen + 8); + READ_BUF(argp->taglen); SAVEMEM(argp->tag, argp->taglen); + READ_BUF(8); argp->minorversion = be32_to_cpup(p++); argp->opcnt = be32_to_cpup(p++); max_reply += 4 + (XDR_QUADLEN(argp->taglen) << 2); @@ -2838,14 +2861,14 @@ nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd, __be32 nfserr; int ignore_crossmnt = 0; - dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen); + dentry = lookup_one_len_unlocked(name, cd->rd_fhp->fh_dentry, namlen); if (IS_ERR(dentry)) return nfserrno(PTR_ERR(dentry)); if (d_really_is_negative(dentry)) { /* - * nfsd_buffered_readdir drops the i_mutex between - * readdir and calling this callback, leaving a window - * where this directory entry could have gone away. + * we're not holding the i_mutex here, so there's + * a window where this directory entry could have gone + * away. */ dput(dentry); return nfserr_noent; @@ -3040,7 +3063,7 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, p = xdr_encode_opaque_fixed(p, bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); *p++ = cpu_to_be32(bcts->dir); - /* Sorry, we do not yet support RDMA over 4.1: */ + /* Upshifting from TCP to RDMA is not supported */ *p++ = cpu_to_be32(0); } return nfserr; @@ -3342,6 +3365,7 @@ static __be32 nfsd4_encode_splice_read( struct xdr_stream *xdr = &resp->xdr; struct xdr_buf *buf = xdr->buf; u32 eof; + long len; int space_left; __be32 nfserr; __be32 *p = xdr->p - 2; @@ -3350,6 +3374,7 @@ static __be32 nfsd4_encode_splice_read( if (xdr->end - xdr->p < 1) return nfserr_resource; + len = maxcount; nfserr = nfsd_splice_read(read->rd_rqstp, file, read->rd_offset, &maxcount); if (nfserr) { @@ -3362,8 +3387,8 @@ static __be32 nfsd4_encode_splice_read( return nfserr; } - eof = (read->rd_offset + maxcount >= - d_inode(read->rd_fhp->fh_dentry)->i_size); + eof = nfsd_eof_on_read(len, maxcount, read->rd_offset, + d_inode(read->rd_fhp->fh_dentry)->i_size); *(p++) = htonl(eof); *(p++) = htonl(maxcount); @@ -3433,14 +3458,15 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, } read->rd_vlen = v; + len = maxcount; nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen, &maxcount); if (nfserr) return nfserr; xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3)); - eof = (read->rd_offset + maxcount >= - d_inode(read->rd_fhp->fh_dentry)->i_size); + eof = nfsd_eof_on_read(len, maxcount, read->rd_offset, + d_inode(read->rd_fhp->fh_dentry)->i_size); tmp = htonl(eof); write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4); @@ -4292,6 +4318,7 @@ static nfsd4_enc nfsd4_enc_ops[] = { [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop, [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek, [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop, + [OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop, }; /* diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 46ec934f5dee..54cde9a5864e 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -63,7 +63,6 @@ static unsigned int longest_chain; static unsigned int longest_chain_cachesize; static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); -static void cache_cleaner_func(struct work_struct *unused); static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc); static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink, @@ -76,13 +75,6 @@ static struct shrinker nfsd_reply_cache_shrinker = { }; /* - * locking for the reply cache: - * A cache entry is "single use" if c_state == RC_INPROG - * Otherwise, it when accessing _prev or _next, the lock must be held. - */ -static DECLARE_DELAYED_WORK(cache_cleaner, cache_cleaner_func); - -/* * Put a cap on the size of the DRC based on the amount of available * low memory in the machine. * @@ -203,7 +195,6 @@ void nfsd_reply_cache_shutdown(void) unsigned int i; unregister_shrinker(&nfsd_reply_cache_shrinker); - cancel_delayed_work_sync(&cache_cleaner); for (i = 0; i < drc_hashsize; i++) { struct list_head *head = &drc_hashtbl[i].lru_head; @@ -217,10 +208,8 @@ void nfsd_reply_cache_shutdown(void) drc_hashtbl = NULL; drc_hashsize = 0; - if (drc_slab) { - kmem_cache_destroy(drc_slab); - drc_slab = NULL; - } + kmem_cache_destroy(drc_slab); + drc_slab = NULL; } /* @@ -232,7 +221,6 @@ lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) { rp->c_timestamp = jiffies; list_move_tail(&rp->c_lru, &b->lru_head); - schedule_delayed_work(&cache_cleaner, RC_EXPIRE); } static long @@ -266,7 +254,6 @@ prune_cache_entries(void) { unsigned int i; long freed = 0; - bool cancel = true; for (i = 0; i < drc_hashsize; i++) { struct nfsd_drc_bucket *b = &drc_hashtbl[i]; @@ -275,26 +262,11 @@ prune_cache_entries(void) continue; spin_lock(&b->cache_lock); freed += prune_bucket(b); - if (!list_empty(&b->lru_head)) - cancel = false; spin_unlock(&b->cache_lock); } - - /* - * Conditionally rearm the job to run in RC_EXPIRE since we just - * ran the pruner. - */ - if (!cancel) - mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE); return freed; } -static void -cache_cleaner_func(struct work_struct *unused) -{ - prune_cache_entries(); -} - static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) { diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 350041a40fe5..c1681ce894c5 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -631,10 +631,7 @@ fh_put(struct svc_fh *fhp) fh_unlock(fhp); fhp->fh_dentry = NULL; dput(dentry); -#ifdef CONFIG_NFSD_V3 - fhp->fh_pre_saved = 0; - fhp->fh_post_saved = 0; -#endif + fh_clear_wcc(fhp); } fh_drop_write(fhp); if (exp) { diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 1e90dad4926b..f84fe6bf9aee 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -7,6 +7,7 @@ #ifndef _LINUX_NFSD_NFSFH_H #define _LINUX_NFSD_NFSFH_H +#include <linux/crc32.h> #include <linux/sunrpc/svc.h> #include <uapi/linux/nfsd/nfsfh.h> @@ -26,16 +27,16 @@ static inline ino_t u32_to_ino_t(__u32 uino) */ typedef struct svc_fh { struct knfsd_fh fh_handle; /* FH data */ + int fh_maxsize; /* max size for fh_handle */ struct dentry * fh_dentry; /* validated dentry */ struct svc_export * fh_export; /* export pointer */ - int fh_maxsize; /* max size for fh_handle */ - unsigned char fh_locked; /* inode locked by us */ - unsigned char fh_want_write; /* remount protection taken */ + bool fh_locked; /* inode locked by us */ + bool fh_want_write; /* remount protection taken */ #ifdef CONFIG_NFSD_V3 - unsigned char fh_post_saved; /* post-op attrs saved */ - unsigned char fh_pre_saved; /* pre-op attrs saved */ + bool fh_post_saved; /* post-op attrs saved */ + bool fh_pre_saved; /* pre-op attrs saved */ /* Pre-op attributes saved during fh_lock */ __u64 fh_pre_size; /* size before operation */ @@ -205,6 +206,28 @@ static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) return true; } +#ifdef CONFIG_CRC32 +/** + * knfsd_fh_hash - calculate the crc32 hash for the filehandle + * @fh - pointer to filehandle + * + * returns a crc32 hash for the filehandle that is compatible with + * the one displayed by "wireshark". + */ + +static inline u32 +knfsd_fh_hash(struct knfsd_fh *fh) +{ + return ~crc32_le(0xFFFFFFFF, (unsigned char *)&fh->fh_base, fh->fh_size); +} +#else +static inline u32 +knfsd_fh_hash(struct knfsd_fh *fh) +{ + return 0; +} +#endif + #ifdef CONFIG_NFSD_V3 /* * The wcc data stored in current_fh should be cleared @@ -213,8 +236,8 @@ static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) static inline void fh_clear_wcc(struct svc_fh *fhp) { - fhp->fh_post_saved = 0; - fhp->fh_pre_saved = 0; + fhp->fh_post_saved = false; + fhp->fh_pre_saved = false; } /* @@ -231,7 +254,7 @@ fill_pre_wcc(struct svc_fh *fhp) fhp->fh_pre_ctime = inode->i_ctime; fhp->fh_pre_size = inode->i_size; fhp->fh_pre_change = inode->i_version; - fhp->fh_pre_saved = 1; + fhp->fh_pre_saved = true; } } @@ -265,9 +288,9 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass) } inode = d_inode(dentry); - mutex_lock_nested(&inode->i_mutex, subclass); + inode_lock_nested(inode, subclass); fill_pre_wcc(fhp); - fhp->fh_locked = 1; + fhp->fh_locked = true; } static inline void @@ -284,8 +307,8 @@ fh_unlock(struct svc_fh *fhp) { if (fhp->fh_locked) { fill_post_wcc(fhp); - mutex_unlock(&d_inode(fhp->fh_dentry)->i_mutex); - fhp->fh_locked = 0; + inode_unlock(d_inode(fhp->fh_dentry)); + fhp->fh_locked = false; } } diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index ad4e2377dd63..45007acaf364 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -14,9 +14,13 @@ #include <linux/sunrpc/stats.h> #include <linux/sunrpc/svcsock.h> +#include <linux/sunrpc/svc_xprt.h> #include <linux/lockd/bind.h> #include <linux/nfsacl.h> #include <linux/seq_file.h> +#include <linux/inetdevice.h> +#include <net/addrconf.h> +#include <net/ipv6.h> #include <net/net_namespace.h> #include "nfsd.h" #include "cache.h" @@ -306,22 +310,81 @@ static void nfsd_shutdown_net(struct net *net) nfsd_shutdown_generic(); } +static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; + struct net_device *dev = ifa->ifa_dev->dev; + struct net *net = dev_net(dev); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct sockaddr_in sin; + + if (event != NETDEV_DOWN) + goto out; + + if (nn->nfsd_serv) { + dprintk("nfsd_inetaddr_event: removed %pI4\n", &ifa->ifa_local); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = ifa->ifa_local; + svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin); + } + +out: + return NOTIFY_DONE; +} + +static struct notifier_block nfsd_inetaddr_notifier = { + .notifier_call = nfsd_inetaddr_event, +}; + +#if IS_ENABLED(CONFIG_IPV6) +static int nfsd_inet6addr_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; + struct net_device *dev = ifa->idev->dev; + struct net *net = dev_net(dev); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct sockaddr_in6 sin6; + + if (event != NETDEV_DOWN) + goto out; + + if (nn->nfsd_serv) { + dprintk("nfsd_inet6addr_event: removed %pI6\n", &ifa->addr); + sin6.sin6_family = AF_INET6; + sin6.sin6_addr = ifa->addr; + svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6); + } + +out: + return NOTIFY_DONE; +} + +static struct notifier_block nfsd_inet6addr_notifier = { + .notifier_call = nfsd_inet6addr_event, +}; +#endif + static void nfsd_last_thread(struct svc_serv *serv, struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); + unregister_inetaddr_notifier(&nfsd_inetaddr_notifier); +#if IS_ENABLED(CONFIG_IPV6) + unregister_inet6addr_notifier(&nfsd_inet6addr_notifier); +#endif /* * write_ports can create the server without actually starting * any threads--if we get shut down before any threads are * started, then nfsd_last_thread will be run before any of this - * other initialization has been done. + * other initialization has been done except the rpcb information. */ + svc_rpcb_cleanup(serv, net); if (!nn->nfsd_net_up) return; - nfsd_shutdown_net(net); - - svc_rpcb_cleanup(serv, net); + nfsd_shutdown_net(net); printk(KERN_WARNING "nfsd: last server has exited, flushing export " "cache\n"); nfsd_export_flush(net); @@ -425,6 +488,10 @@ int nfsd_create_serv(struct net *net) } set_max_drc(); + register_inetaddr_notifier(&nfsd_inetaddr_notifier); +#if IS_ENABLED(CONFIG_IPV6) + register_inet6addr_notifier(&nfsd_inet6addr_notifier); +#endif do_gettimeofday(&nn->nfssvc_boot); /* record boot time */ return 0; } diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h index d4c4453674c6..7d073b9b1553 100644 --- a/fs/nfsd/pnfs.h +++ b/fs/nfsd/pnfs.h @@ -21,6 +21,7 @@ struct nfsd4_layout_ops { u32 notify_types; __be32 (*proc_getdeviceinfo)(struct super_block *sb, + struct nfs4_client *clp, struct nfsd4_getdeviceinfo *gdevp); __be32 (*encode_getdeviceinfo)(struct xdr_stream *xdr, struct nfsd4_getdeviceinfo *gdevp); @@ -32,10 +33,17 @@ struct nfsd4_layout_ops { __be32 (*proc_layoutcommit)(struct inode *inode, struct nfsd4_layoutcommit *lcp); + + void (*fence_client)(struct nfs4_layout_stateid *ls); }; extern const struct nfsd4_layout_ops *nfsd4_layout_ops[]; +#ifdef CONFIG_NFSD_BLOCKLAYOUT extern const struct nfsd4_layout_ops bl_layout_ops; +#endif +#ifdef CONFIG_NFSD_SCSILAYOUT +extern const struct nfsd4_layout_ops scsi_layout_ops; +#endif __be32 nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stateid_t *stateid, diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 583ffc13cae2..c050c53036a6 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -65,7 +65,7 @@ struct nfsd4_callback { struct nfs4_client *cb_clp; u32 cb_minorversion; struct rpc_message cb_msg; - struct nfsd4_callback_ops *cb_ops; + const struct nfsd4_callback_ops *cb_ops; struct work_struct cb_work; int cb_seq_status; int cb_status; @@ -84,7 +84,7 @@ struct nfsd4_callback_ops { * fields that are of general use to any stateid. */ struct nfs4_stid { - atomic_t sc_count; + atomic_t sc_count; #define NFS4_OPEN_STID 1 #define NFS4_LOCK_STID 2 #define NFS4_DELEG_STID 4 @@ -94,11 +94,12 @@ struct nfs4_stid { #define NFS4_REVOKED_DELEG_STID 16 #define NFS4_CLOSED_DELEG_STID 32 #define NFS4_LAYOUT_STID 64 - unsigned char sc_type; - stateid_t sc_stateid; - struct nfs4_client *sc_client; - struct nfs4_file *sc_file; - void (*sc_free)(struct nfs4_stid *); + unsigned char sc_type; + stateid_t sc_stateid; + spinlock_t sc_lock; + struct nfs4_client *sc_client; + struct nfs4_file *sc_file; + void (*sc_free)(struct nfs4_stid *); }; /* @@ -364,15 +365,6 @@ struct nfs4_client_reclaim { char cr_recdir[HEXDIR_LEN]; /* recover dir */ }; -static inline void -update_stateid(stateid_t *stateid) -{ - stateid->si_generation++; - /* Wraparound recommendation from 3530bis-13 9.1.3.2: */ - if (stateid->si_generation == 0) - stateid->si_generation = 1; -} - /* A reasonable value for REPLAY_ISIZE was estimated as follows: * The OPEN response, typically the largest, requires * 4(status) + 8(stateid) + 20(changeinfo) + 4(rflags) + 8(verifier) + @@ -534,15 +526,16 @@ struct nfs4_file { * Better suggestions welcome. */ struct nfs4_ol_stateid { - struct nfs4_stid st_stid; /* must be first field */ - struct list_head st_perfile; - struct list_head st_perstateowner; - struct list_head st_locks; - struct nfs4_stateowner * st_stateowner; - struct nfs4_clnt_odstate * st_clnt_odstate; - unsigned char st_access_bmap; - unsigned char st_deny_bmap; - struct nfs4_ol_stateid * st_openstp; + struct nfs4_stid st_stid; + struct list_head st_perfile; + struct list_head st_perstateowner; + struct list_head st_locks; + struct nfs4_stateowner *st_stateowner; + struct nfs4_clnt_odstate *st_clnt_odstate; + unsigned char st_access_bmap; + unsigned char st_deny_bmap; + struct nfs4_ol_stateid *st_openstp; + struct rw_semaphore st_rwsem; }; static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s) @@ -561,6 +554,7 @@ struct nfs4_layout_stateid { struct nfsd4_callback ls_recall; stateid_t ls_recall_sid; bool ls_recalled; + struct mutex ls_mutex; }; static inline struct nfs4_layout_stateid *layoutstateid(struct nfs4_stid *s) @@ -584,8 +578,8 @@ struct nfsd4_compound_state; struct nfsd_net; extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, - struct nfsd4_compound_state *cstate, stateid_t *stateid, - int flags, struct file **filp, bool *tmp_file); + struct nfsd4_compound_state *cstate, struct svc_fh *fhp, + stateid_t *stateid, int flags, struct file **filp, bool *tmp_file); __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid, unsigned char typemask, struct nfs4_stid **s, struct nfsd_net *nn); @@ -593,6 +587,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab); void nfs4_unhash_stid(struct nfs4_stid *s); void nfs4_put_stid(struct nfs4_stid *s); +void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid); void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *); extern void nfs4_release_reclaim(struct nfsd_net *); extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, @@ -604,7 +599,7 @@ extern void nfsd4_probe_callback(struct nfs4_client *clp); extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, - struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op); + const struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op); extern void nfsd4_run_cb(struct nfsd4_callback *cb); extern int nfsd4_create_callback_queue(void); extern void nfsd4_destroy_callback_queue(void); diff --git a/fs/nfsd/trace.c b/fs/nfsd/trace.c index 82f89070594c..90967466a1e5 100644 --- a/fs/nfsd/trace.c +++ b/fs/nfsd/trace.c @@ -1,5 +1,3 @@ -#include "state.h" - #define CREATE_TRACE_POINTS #include "trace.h" diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index c668520c344b..3287041905da 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -8,6 +8,49 @@ #define _NFSD_TRACE_H #include <linux/tracepoint.h> +#include "nfsfh.h" + +DECLARE_EVENT_CLASS(nfsd_io_class, + TP_PROTO(struct svc_rqst *rqstp, + struct svc_fh *fhp, + loff_t offset, + int len), + TP_ARGS(rqstp, fhp, offset, len), + TP_STRUCT__entry( + __field(__be32, xid) + __field_struct(struct knfsd_fh, fh) + __field(loff_t, offset) + __field(int, len) + ), + TP_fast_assign( + __entry->xid = rqstp->rq_xid, + fh_copy_shallow(&__entry->fh, &fhp->fh_handle); + __entry->offset = offset; + __entry->len = len; + ), + TP_printk("xid=0x%x fh=0x%x offset=%lld len=%d", + __be32_to_cpu(__entry->xid), knfsd_fh_hash(&__entry->fh), + __entry->offset, __entry->len) +) + +#define DEFINE_NFSD_IO_EVENT(name) \ +DEFINE_EVENT(nfsd_io_class, name, \ + TP_PROTO(struct svc_rqst *rqstp, \ + struct svc_fh *fhp, \ + loff_t offset, \ + int len), \ + TP_ARGS(rqstp, fhp, offset, len)) + +DEFINE_NFSD_IO_EVENT(read_start); +DEFINE_NFSD_IO_EVENT(read_opened); +DEFINE_NFSD_IO_EVENT(read_io_done); +DEFINE_NFSD_IO_EVENT(read_done); +DEFINE_NFSD_IO_EVENT(write_start); +DEFINE_NFSD_IO_EVENT(write_opened); +DEFINE_NFSD_IO_EVENT(write_io_done); +DEFINE_NFSD_IO_EVENT(write_done); + +#include "state.h" DECLARE_EVENT_CLASS(nfsd_stateid_class, TP_PROTO(stateid_t *stp), diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 45c04979e7b3..d40010e4f1a9 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -36,12 +36,14 @@ #endif /* CONFIG_NFSD_V3 */ #ifdef CONFIG_NFSD_V4 +#include "../internal.h" #include "acl.h" #include "idmap.h" #endif /* CONFIG_NFSD_V4 */ #include "nfsd.h" #include "vfs.h" +#include "trace.h" #define NFSDDBG_FACILITY NFSDDBG_FILEOP @@ -217,10 +219,16 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, host_err = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out_nfserr; - /* - * check if we have crossed a mount point ... - */ if (nfsd_mountpoint(dentry, exp)) { + /* + * We don't need the i_mutex after all. It's + * still possible we could open this (regular + * files can be mountpoints too), but the + * i_mutex is just there to prevent renames of + * something that we might be about to delegate, + * and a mountpoint won't be renamed: + */ + fh_unlock(fhp); if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) { dput(dentry); goto out_nfserr; @@ -485,9 +493,9 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, dentry = fhp->fh_dentry; - mutex_lock(&d_inode(dentry)->i_mutex); + inode_lock(d_inode(dentry)); host_error = security_inode_setsecctx(dentry, label->data, label->len); - mutex_unlock(&d_inode(dentry)->i_mutex); + inode_unlock(d_inode(dentry)); return nfserrno(host_error); } #else @@ -498,6 +506,13 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, } #endif +__be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst, + u64 dst_pos, u64 count) +{ + return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos, + count)); +} + __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, loff_t len, int flags) @@ -855,7 +870,7 @@ __be32 nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen, oldfs = get_fs(); set_fs(KERNEL_DS); - host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset); + host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset, 0); set_fs(oldfs); return nfsd_finish_read(file, count, host_err); } @@ -942,7 +957,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, /* Write the data. */ oldfs = get_fs(); set_fs(KERNEL_DS); - host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos); + host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos, 0); set_fs(oldfs); if (host_err < 0) goto out_nfserr; @@ -983,16 +998,23 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct raparms *ra; __be32 err; + trace_read_start(rqstp, fhp, offset, vlen); err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); if (err) return err; ra = nfsd_init_raparms(file); + + trace_read_opened(rqstp, fhp, offset, vlen); err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count); + trace_read_io_done(rqstp, fhp, offset, vlen); + if (ra) nfsd_put_raparams(file, ra); fput(file); + trace_read_done(rqstp, fhp, offset, vlen); + return err; } @@ -1008,24 +1030,31 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, { __be32 err = 0; + trace_write_start(rqstp, fhp, offset, vlen); + if (file) { err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE); if (err) goto out; + trace_write_opened(rqstp, fhp, offset, vlen); err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stablep); + trace_write_io_done(rqstp, fhp, offset, vlen); } else { err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file); if (err) goto out; + trace_write_opened(rqstp, fhp, offset, vlen); if (cnt) err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stablep); + trace_write_io_done(rqstp, fhp, offset, vlen); fput(file); } out: + trace_write_done(rqstp, fhp, offset, vlen); return err; } @@ -1631,7 +1660,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, /* cannot use fh_lock as we need deadlock protective ordering * so do it by hand */ trap = lock_rename(tdentry, fdentry); - ffhp->fh_locked = tfhp->fh_locked = 1; + ffhp->fh_locked = tfhp->fh_locked = true; fill_pre_wcc(ffhp); fill_pre_wcc(tfhp); @@ -1681,7 +1710,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, fill_post_wcc(ffhp); fill_post_wcc(tfhp); unlock_rename(tdentry, fdentry); - ffhp->fh_locked = tfhp->fh_locked = 0; + ffhp->fh_locked = tfhp->fh_locked = false; fh_drop_write(ffhp); out: @@ -1809,7 +1838,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func, offset = *offsetp; while (1) { - struct inode *dir_inode = file_inode(file); unsigned int reclen; cdp->err = nfserr_eof; /* will be cleared on successful read */ @@ -1828,15 +1856,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func, if (!size) break; - /* - * Various filldir functions may end up calling back into - * lookup_one_len() and the file system's ->lookup() method. - * These expect i_mutex to be held, as it would within readdir. - */ - host_err = mutex_lock_killable(&dir_inode->i_mutex); - if (host_err) - break; - de = (struct buffered_dirent *)buf.dirent; while (size > 0) { offset = de->offset; @@ -1853,7 +1872,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func, size -= reclen; de = (struct buffered_dirent *)((char *)de + reclen); } - mutex_unlock(&dir_inode->i_mutex); if (size > 0) /* We bailed out early */ break; diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index fee2451ae248..2d573ec057f8 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -56,6 +56,8 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *, struct xdr_netobj *); __be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *, struct file *, loff_t, loff_t, int); +__be32 nfsd4_clone_file_range(struct file *, u64, struct file *, + u64, u64); #endif /* CONFIG_NFSD_V4 */ __be32 nfsd_create(struct svc_rqst *, struct svc_fh *, char *name, int len, struct iattr *attrs, @@ -112,14 +114,14 @@ static inline int fh_want_write(struct svc_fh *fh) int ret = mnt_want_write(fh->fh_export->ex_path.mnt); if (!ret) - fh->fh_want_write = 1; + fh->fh_want_write = true; return ret; } static inline void fh_drop_write(struct svc_fh *fh) { if (fh->fh_want_write) { - fh->fh_want_write = 0; + fh->fh_want_write = false; mnt_drop_write(fh->fh_export->ex_path.mnt); } } @@ -137,4 +139,23 @@ static inline int nfsd_create_is_exclusive(int createmode) || createmode == NFS4_CREATE_EXCLUSIVE4_1; } +static inline bool nfsd_eof_on_read(long requested, long read, + loff_t offset, loff_t size) +{ + /* We assume a short read means eof: */ + if (requested > read) + return true; + /* + * A non-short read might also reach end of file. The spec + * still requires us to set eof in that case. + * + * Further operations may have modified the file size since + * the read, so the following check is not atomic with the read. + * We've only seen that cause a problem for a client in the case + * where the read returned a count of 0 without setting eof. + * That case was fixed by the addition of the above check. + */ + return (offset + read >= size); +} + #endif /* LINUX_NFSD_VFS_H */ diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 9f991007a578..d9554813e58a 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -491,6 +491,15 @@ struct nfsd4_fallocate { u64 falloc_length; }; +struct nfsd4_clone { + /* request */ + stateid_t cl_src_stateid; + stateid_t cl_dst_stateid; + u64 cl_src_pos; + u64 cl_dst_pos; + u64 cl_count; +}; + struct nfsd4_seek { /* request */ stateid_t seek_stateid; @@ -555,6 +564,7 @@ struct nfsd4_op { /* NFSv4.2 */ struct nfsd4_fallocate allocate; struct nfsd4_fallocate deallocate; + struct nfsd4_clone clone; struct nfsd4_seek seek; } u; struct nfs4_replay * replay; @@ -632,7 +642,7 @@ static inline void set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp) { BUG_ON(!fhp->fh_pre_saved); - cinfo->atomic = fhp->fh_post_saved; + cinfo->atomic = (u32)fhp->fh_post_saved; cinfo->change_supported = IS_I_VERSION(d_inode(fhp->fh_dentry)); cinfo->before_change = fhp->fh_pre_change; |