summaryrefslogtreecommitdiffstats
path: root/fs/nfsd
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfsd')
-rw-r--r--fs/nfsd/Kconfig28
-rw-r--r--fs/nfsd/Makefile4
-rw-r--r--fs/nfsd/blocklayout.c298
-rw-r--r--fs/nfsd/blocklayoutxdr.c77
-rw-r--r--fs/nfsd/blocklayoutxdr.h14
-rw-r--r--fs/nfsd/lockd.c2
-rw-r--r--fs/nfsd/netns.h2
-rw-r--r--fs/nfsd/nfs3proc.c7
-rw-r--r--fs/nfsd/nfs3xdr.c6
-rw-r--r--fs/nfsd/nfs4callback.c6
-rw-r--r--fs/nfsd/nfs4layouts.c104
-rw-r--r--fs/nfsd/nfs4proc.c80
-rw-r--r--fs/nfsd/nfs4recover.c47
-rw-r--r--fs/nfsd/nfs4state.c364
-rw-r--r--fs/nfsd/nfs4xdr.c55
-rw-r--r--fs/nfsd/nfscache.c32
-rw-r--r--fs/nfsd/nfsfh.c5
-rw-r--r--fs/nfsd/nfsfh.h47
-rw-r--r--fs/nfsd/nfssvc.c75
-rw-r--r--fs/nfsd/pnfs.h8
-rw-r--r--fs/nfsd/state.h51
-rw-r--r--fs/nfsd/trace.c2
-rw-r--r--fs/nfsd/trace.h43
-rw-r--r--fs/nfsd/vfs.c58
-rw-r--r--fs/nfsd/vfs.h25
-rw-r--r--fs/nfsd/xdr4.h12
26 files changed, 1119 insertions, 333 deletions
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index a0b77fc1bd39..c9f583d7bac8 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -84,12 +84,30 @@ config NFSD_V4
If unsure, say N.
config NFSD_PNFS
- bool "NFSv4.1 server support for Parallel NFS (pNFS)"
- depends on NFSD_V4
+ bool
+
+config NFSD_BLOCKLAYOUT
+ bool "NFSv4.1 server support for pNFS block layouts"
+ depends on NFSD_V4 && BLOCK
+ select NFSD_PNFS
+ help
+ This option enables support for the exporting pNFS block layouts
+ in the kernel's NFS server. The pNFS block layout enables NFS
+ clients to directly perform I/O to block devices accesible to both
+ the server and the clients. See RFC 5663 for more details.
+
+ If unsure, say N.
+
+config NFSD_SCSILAYOUT
+ bool "NFSv4.1 server support for pNFS SCSI layouts"
+ depends on NFSD_V4 && BLOCK
+ select NFSD_PNFS
help
- This option enables support for the parallel NFS features of the
- minor version 1 of the NFSv4 protocol (RFC5661) in the kernel's NFS
- server.
+ This option enables support for the exporting pNFS SCSI layouts
+ in the kernel's NFS server. The pNFS SCSI layout enables NFS
+ clients to directly perform I/O to SCSI devices accesible to both
+ the server and the clients. See draft-ietf-nfsv4-scsi-layout for
+ more details.
If unsure, say N.
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index 9a6028e120c6..3ae5f3c77e28 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -17,4 +17,6 @@ nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
nfs4acl.o nfs4callback.o nfs4recover.o
-nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o blocklayout.o blocklayoutxdr.o
+nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o
+nfsd-$(CONFIG_NFSD_BLOCKLAYOUT) += blocklayout.o blocklayoutxdr.o
+nfsd-$(CONFIG_NFSD_SCSILAYOUT) += blocklayout.o blocklayoutxdr.o
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index c29d9421bd5e..e55b5242614d 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -1,11 +1,14 @@
/*
- * Copyright (c) 2014 Christoph Hellwig.
+ * Copyright (c) 2014-2016 Christoph Hellwig.
*/
#include <linux/exportfs.h>
#include <linux/genhd.h>
#include <linux/slab.h>
+#include <linux/pr.h>
#include <linux/nfsd/debug.h>
+#include <scsi/scsi_proto.h>
+#include <scsi/scsi_common.h>
#include "blocklayoutxdr.h"
#include "pnfs.h"
@@ -13,37 +16,6 @@
#define NFSDDBG_FACILITY NFSDDBG_PNFS
-static int
-nfsd4_block_get_device_info_simple(struct super_block *sb,
- struct nfsd4_getdeviceinfo *gdp)
-{
- struct pnfs_block_deviceaddr *dev;
- struct pnfs_block_volume *b;
-
- dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) +
- sizeof(struct pnfs_block_volume), GFP_KERNEL);
- if (!dev)
- return -ENOMEM;
- gdp->gd_device = dev;
-
- dev->nr_volumes = 1;
- b = &dev->volumes[0];
-
- b->type = PNFS_BLOCK_VOLUME_SIMPLE;
- b->simple.sig_len = PNFS_BLOCK_UUID_LEN;
- return sb->s_export_op->get_uuid(sb, b->simple.sig, &b->simple.sig_len,
- &b->simple.offset);
-}
-
-static __be32
-nfsd4_block_proc_getdeviceinfo(struct super_block *sb,
- struct nfsd4_getdeviceinfo *gdp)
-{
- if (sb->s_bdev != sb->s_bdev->bd_contains)
- return nfserr_inval;
- return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp));
-}
-
static __be32
nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
struct nfsd4_layoutget *args)
@@ -141,20 +113,13 @@ out_layoutunavailable:
}
static __be32
-nfsd4_block_proc_layoutcommit(struct inode *inode,
- struct nfsd4_layoutcommit *lcp)
+nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp,
+ struct iomap *iomaps, int nr_iomaps)
{
loff_t new_size = lcp->lc_last_wr + 1;
struct iattr iattr = { .ia_valid = 0 };
- struct iomap *iomaps;
- int nr_iomaps;
int error;
- nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
- lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits);
- if (nr_iomaps < 0)
- return nfserrno(nr_iomaps);
-
if (lcp->lc_mtime.tv_nsec == UTIME_NOW ||
timespec_compare(&lcp->lc_mtime, &inode->i_mtime) < 0)
lcp->lc_mtime = current_fs_time(inode->i_sb);
@@ -172,6 +137,54 @@ nfsd4_block_proc_layoutcommit(struct inode *inode,
return nfserrno(error);
}
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
+static int
+nfsd4_block_get_device_info_simple(struct super_block *sb,
+ struct nfsd4_getdeviceinfo *gdp)
+{
+ struct pnfs_block_deviceaddr *dev;
+ struct pnfs_block_volume *b;
+
+ dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) +
+ sizeof(struct pnfs_block_volume), GFP_KERNEL);
+ if (!dev)
+ return -ENOMEM;
+ gdp->gd_device = dev;
+
+ dev->nr_volumes = 1;
+ b = &dev->volumes[0];
+
+ b->type = PNFS_BLOCK_VOLUME_SIMPLE;
+ b->simple.sig_len = PNFS_BLOCK_UUID_LEN;
+ return sb->s_export_op->get_uuid(sb, b->simple.sig, &b->simple.sig_len,
+ &b->simple.offset);
+}
+
+static __be32
+nfsd4_block_proc_getdeviceinfo(struct super_block *sb,
+ struct nfs4_client *clp,
+ struct nfsd4_getdeviceinfo *gdp)
+{
+ if (sb->s_bdev != sb->s_bdev->bd_contains)
+ return nfserr_inval;
+ return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp));
+}
+
+static __be32
+nfsd4_block_proc_layoutcommit(struct inode *inode,
+ struct nfsd4_layoutcommit *lcp)
+{
+ struct iomap *iomaps;
+ int nr_iomaps;
+
+ nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
+ lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits);
+ if (nr_iomaps < 0)
+ return nfserrno(nr_iomaps);
+
+ return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
+}
+
const struct nfsd4_layout_ops bl_layout_ops = {
/*
* Pretend that we send notification to the client. This is a blatant
@@ -190,3 +203,206 @@ const struct nfsd4_layout_ops bl_layout_ops = {
.encode_layoutget = nfsd4_block_encode_layoutget,
.proc_layoutcommit = nfsd4_block_proc_layoutcommit,
};
+#endif /* CONFIG_NFSD_BLOCKLAYOUT */
+
+#ifdef CONFIG_NFSD_SCSILAYOUT
+static int nfsd4_scsi_identify_device(struct block_device *bdev,
+ struct pnfs_block_volume *b)
+{
+ struct request_queue *q = bdev->bd_disk->queue;
+ struct request *rq;
+ size_t bufflen = 252, len, id_len;
+ u8 *buf, *d, type, assoc;
+ int error;
+
+ buf = kzalloc(bufflen, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ rq = blk_get_request(q, READ, GFP_KERNEL);
+ if (IS_ERR(rq)) {
+ error = -ENOMEM;
+ goto out_free_buf;
+ }
+ blk_rq_set_block_pc(rq);
+
+ error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL);
+ if (error)
+ goto out_put_request;
+
+ rq->cmd[0] = INQUIRY;
+ rq->cmd[1] = 1;
+ rq->cmd[2] = 0x83;
+ rq->cmd[3] = bufflen >> 8;
+ rq->cmd[4] = bufflen & 0xff;
+ rq->cmd_len = COMMAND_SIZE(INQUIRY);
+
+ error = blk_execute_rq(rq->q, NULL, rq, 1);
+ if (error) {
+ pr_err("pNFS: INQUIRY 0x83 failed with: %x\n",
+ rq->errors);
+ goto out_put_request;
+ }
+
+ len = (buf[2] << 8) + buf[3] + 4;
+ if (len > bufflen) {
+ pr_err("pNFS: INQUIRY 0x83 response invalid (len = %zd)\n",
+ len);
+ goto out_put_request;
+ }
+
+ d = buf + 4;
+ for (d = buf + 4; d < buf + len; d += id_len + 4) {
+ id_len = d[3];
+ type = d[1] & 0xf;
+ assoc = (d[1] >> 4) & 0x3;
+
+ /*
+ * We only care about a EUI-64 and NAA designator types
+ * with LU association.
+ */
+ if (assoc != 0x00)
+ continue;
+ if (type != 0x02 && type != 0x03)
+ continue;
+ if (id_len != 8 && id_len != 12 && id_len != 16)
+ continue;
+
+ b->scsi.code_set = PS_CODE_SET_BINARY;
+ b->scsi.designator_type = type == 0x02 ?
+ PS_DESIGNATOR_EUI64 : PS_DESIGNATOR_NAA;
+ b->scsi.designator_len = id_len;
+ memcpy(b->scsi.designator, d + 4, id_len);
+
+ /*
+ * If we found a 8 or 12 byte descriptor continue on to
+ * see if a 16 byte one is available. If we find a
+ * 16 byte descriptor we're done.
+ */
+ if (id_len == 16)
+ break;
+ }
+
+out_put_request:
+ blk_put_request(rq);
+out_free_buf:
+ kfree(buf);
+ return error;
+}
+
+#define NFSD_MDS_PR_KEY 0x0100000000000000
+
+/*
+ * We use the client ID as a unique key for the reservations.
+ * This allows us to easily fence a client when recalls fail.
+ */
+static u64 nfsd4_scsi_pr_key(struct nfs4_client *clp)
+{
+ return ((u64)clp->cl_clientid.cl_boot << 32) | clp->cl_clientid.cl_id;
+}
+
+static int
+nfsd4_block_get_device_info_scsi(struct super_block *sb,
+ struct nfs4_client *clp,
+ struct nfsd4_getdeviceinfo *gdp)
+{
+ struct pnfs_block_deviceaddr *dev;
+ struct pnfs_block_volume *b;
+ const struct pr_ops *ops;
+ int error;
+
+ dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) +
+ sizeof(struct pnfs_block_volume), GFP_KERNEL);
+ if (!dev)
+ return -ENOMEM;
+ gdp->gd_device = dev;
+
+ dev->nr_volumes = 1;
+ b = &dev->volumes[0];
+
+ b->type = PNFS_BLOCK_VOLUME_SCSI;
+ b->scsi.pr_key = nfsd4_scsi_pr_key(clp);
+
+ error = nfsd4_scsi_identify_device(sb->s_bdev, b);
+ if (error)
+ return error;
+
+ ops = sb->s_bdev->bd_disk->fops->pr_ops;
+ if (!ops) {
+ pr_err("pNFS: device %s does not support PRs.\n",
+ sb->s_id);
+ return -EINVAL;
+ }
+
+ error = ops->pr_register(sb->s_bdev, 0, NFSD_MDS_PR_KEY, true);
+ if (error) {
+ pr_err("pNFS: failed to register key for device %s.\n",
+ sb->s_id);
+ return -EINVAL;
+ }
+
+ error = ops->pr_reserve(sb->s_bdev, NFSD_MDS_PR_KEY,
+ PR_EXCLUSIVE_ACCESS_REG_ONLY, 0);
+ if (error) {
+ pr_err("pNFS: failed to reserve device %s.\n",
+ sb->s_id);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static __be32
+nfsd4_scsi_proc_getdeviceinfo(struct super_block *sb,
+ struct nfs4_client *clp,
+ struct nfsd4_getdeviceinfo *gdp)
+{
+ if (sb->s_bdev != sb->s_bdev->bd_contains)
+ return nfserr_inval;
+ return nfserrno(nfsd4_block_get_device_info_scsi(sb, clp, gdp));
+}
+static __be32
+nfsd4_scsi_proc_layoutcommit(struct inode *inode,
+ struct nfsd4_layoutcommit *lcp)
+{
+ struct iomap *iomaps;
+ int nr_iomaps;
+
+ nr_iomaps = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout,
+ lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits);
+ if (nr_iomaps < 0)
+ return nfserrno(nr_iomaps);
+
+ return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
+}
+
+static void
+nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls)
+{
+ struct nfs4_client *clp = ls->ls_stid.sc_client;
+ struct block_device *bdev = ls->ls_file->f_path.mnt->mnt_sb->s_bdev;
+
+ bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
+ nfsd4_scsi_pr_key(clp), 0, true);
+}
+
+const struct nfsd4_layout_ops scsi_layout_ops = {
+ /*
+ * Pretend that we send notification to the client. This is a blatant
+ * lie to force recent Linux clients to cache our device IDs.
+ * We rarely ever change the device ID, so the harm of leaking deviceids
+ * for a while isn't too bad. Unfortunately RFC5661 is a complete mess
+ * in this regard, but I filed errata 4119 for this a while ago, and
+ * hopefully the Linux client will eventually start caching deviceids
+ * without this again.
+ */
+ .notify_types =
+ NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
+ .proc_getdeviceinfo = nfsd4_scsi_proc_getdeviceinfo,
+ .encode_getdeviceinfo = nfsd4_block_encode_getdeviceinfo,
+ .proc_layoutget = nfsd4_block_proc_layoutget,
+ .encode_layoutget = nfsd4_block_encode_layoutget,
+ .proc_layoutcommit = nfsd4_scsi_proc_layoutcommit,
+ .fence_client = nfsd4_scsi_fence_client,
+};
+#endif /* CONFIG_NFSD_SCSILAYOUT */
diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c
index 6d834dc9bbc8..6c3b316f932e 100644
--- a/fs/nfsd/blocklayoutxdr.c
+++ b/fs/nfsd/blocklayoutxdr.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014 Christoph Hellwig.
+ * Copyright (c) 2014-2016 Christoph Hellwig.
*/
#include <linux/sunrpc/svc.h>
#include <linux/exportfs.h>
@@ -53,6 +53,18 @@ nfsd4_block_encode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
p = xdr_encode_hyper(p, b->simple.offset);
p = xdr_encode_opaque(p, b->simple.sig, b->simple.sig_len);
break;
+ case PNFS_BLOCK_VOLUME_SCSI:
+ len = 4 + 4 + 4 + 4 + b->scsi.designator_len + 8;
+ p = xdr_reserve_space(xdr, len);
+ if (!p)
+ return -ETOOSMALL;
+
+ *p++ = cpu_to_be32(b->type);
+ *p++ = cpu_to_be32(b->scsi.code_set);
+ *p++ = cpu_to_be32(b->scsi.designator_type);
+ p = xdr_encode_opaque(p, b->scsi.designator, b->scsi.designator_len);
+ p = xdr_encode_hyper(p, b->scsi.pr_key);
+ break;
default:
return -ENOTSUPP;
}
@@ -93,18 +105,22 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
u32 block_size)
{
struct iomap *iomaps;
- u32 nr_iomaps, expected, i;
+ u32 nr_iomaps, i;
if (len < sizeof(u32)) {
dprintk("%s: extent array too small: %u\n", __func__, len);
return -EINVAL;
}
+ len -= sizeof(u32);
+ if (len % PNFS_BLOCK_EXTENT_SIZE) {
+ dprintk("%s: extent array invalid: %u\n", __func__, len);
+ return -EINVAL;
+ }
nr_iomaps = be32_to_cpup(p++);
- expected = sizeof(__be32) + nr_iomaps * PNFS_BLOCK_EXTENT_SIZE;
- if (len != expected) {
+ if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE) {
dprintk("%s: extent array size mismatch: %u/%u\n",
- __func__, len, expected);
+ __func__, len, nr_iomaps);
return -EINVAL;
}
@@ -155,3 +171,54 @@ fail:
kfree(iomaps);
return -EINVAL;
}
+
+int
+nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
+ u32 block_size)
+{
+ struct iomap *iomaps;
+ u32 nr_iomaps, expected, i;
+
+ if (len < sizeof(u32)) {
+ dprintk("%s: extent array too small: %u\n", __func__, len);
+ return -EINVAL;
+ }
+
+ nr_iomaps = be32_to_cpup(p++);
+ expected = sizeof(__be32) + nr_iomaps * PNFS_SCSI_RANGE_SIZE;
+ if (len != expected) {
+ dprintk("%s: extent array size mismatch: %u/%u\n",
+ __func__, len, expected);
+ return -EINVAL;
+ }
+
+ iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL);
+ if (!iomaps) {
+ dprintk("%s: failed to allocate extent array\n", __func__);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < nr_iomaps; i++) {
+ u64 val;
+
+ p = xdr_decode_hyper(p, &val);
+ if (val & (block_size - 1)) {
+ dprintk("%s: unaligned offset 0x%llx\n", __func__, val);
+ goto fail;
+ }
+ iomaps[i].offset = val;
+
+ p = xdr_decode_hyper(p, &val);
+ if (val & (block_size - 1)) {
+ dprintk("%s: unaligned length 0x%llx\n", __func__, val);
+ goto fail;
+ }
+ iomaps[i].length = val;
+ }
+
+ *iomapp = iomaps;
+ return nr_iomaps;
+fail:
+ kfree(iomaps);
+ return -EINVAL;
+}
diff --git a/fs/nfsd/blocklayoutxdr.h b/fs/nfsd/blocklayoutxdr.h
index 6de925fe8499..397bc7563a49 100644
--- a/fs/nfsd/blocklayoutxdr.h
+++ b/fs/nfsd/blocklayoutxdr.h
@@ -15,6 +15,11 @@ struct pnfs_block_extent {
enum pnfs_block_extent_state es;
};
+struct pnfs_block_range {
+ u64 foff;
+ u64 len;
+};
+
/*
* Random upper cap for the uuid length to avoid unbounded allocation.
* Not actually limited by the protocol.
@@ -29,6 +34,13 @@ struct pnfs_block_volume {
u32 sig_len;
u8 sig[PNFS_BLOCK_UUID_LEN];
} simple;
+ struct {
+ enum scsi_code_set code_set;
+ enum scsi_designator_type designator_type;
+ int designator_len;
+ u8 designator[256];
+ u64 pr_key;
+ } scsi;
};
};
@@ -43,5 +55,7 @@ __be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr,
struct nfsd4_layoutget *lgp);
int nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
u32 block_size);
+int nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
+ u32 block_size);
#endif /* _NFSD_BLOCKLAYOUTXDR_H */
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 77e7a5cca888..1a03bc3059e8 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -58,7 +58,7 @@ nlm_fclose(struct file *filp)
fput(filp);
}
-static struct nlmsvc_binding nfsd_nlm_ops = {
+static const struct nlmsvc_binding nfsd_nlm_ops = {
.fopen = nlm_fopen, /* open file for locking */
.fclose = nlm_fclose, /* close file */
};
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index d8b16c2568f3..5fbf3bbd00d0 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -92,7 +92,7 @@ struct nfsd_net {
struct file *rec_file;
bool in_grace;
- struct nfsd4_client_tracking_ops *client_tracking_ops;
+ const struct nfsd4_client_tracking_ops *client_tracking_ops;
time_t nfsd4_lease;
time_t nfsd4_grace;
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 7b755b7f785c..51c3b06e8036 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -147,6 +147,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
{
__be32 nfserr;
u32 max_blocksize = svc_max_payload(rqstp);
+ unsigned long cnt = min(argp->count, max_blocksize);
dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
SVCFH_fmt(&argp->fh),
@@ -157,7 +158,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
* 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
* + 1 (xdr opaque byte count) = 26
*/
- resp->count = min(argp->count, max_blocksize);
+ resp->count = cnt;
svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
fh_copy(&resp->fh, &argp->fh);
@@ -167,8 +168,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
&resp->count);
if (nfserr == 0) {
struct inode *inode = d_inode(resp->fh.fh_dentry);
-
- resp->eof = (argp->offset + resp->count) >= inode->i_size;
+ resp->eof = nfsd_eof_on_read(cnt, resp->count, argp->offset,
+ inode->i_size);
}
RETURN_STATUS(nfserr);
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index f6e7cbabac5a..2246454dec76 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -262,11 +262,11 @@ void fill_post_wcc(struct svc_fh *fhp)
err = fh_getattr(fhp, &fhp->fh_post_attr);
fhp->fh_post_change = d_inode(fhp->fh_dentry)->i_version;
if (err) {
- fhp->fh_post_saved = 0;
+ fhp->fh_post_saved = false;
/* Grab the ctime anyway - set_change_info might use it */
fhp->fh_post_attr.ctime = d_inode(fhp->fh_dentry)->i_ctime;
} else
- fhp->fh_post_saved = 1;
+ fhp->fh_post_saved = true;
}
/*
@@ -823,7 +823,7 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
} else
dchild = dget(dparent);
} else
- dchild = lookup_one_len(name, dparent, namlen);
+ dchild = lookup_one_len_unlocked(name, dparent, namlen);
if (IS_ERR(dchild))
return rv;
if (d_mountpoint(dchild))
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index e7f50c4081d6..7389cb1d7409 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -792,12 +792,16 @@ static void warn_no_callback_path(struct nfs4_client *clp, int reason)
static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason)
{
+ if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags))
+ return;
clp->cl_cb_state = NFSD4_CB_DOWN;
warn_no_callback_path(clp, reason);
}
static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason)
{
+ if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags))
+ return;
clp->cl_cb_state = NFSD4_CB_FAULT;
warn_no_callback_path(clp, reason);
}
@@ -1143,7 +1147,7 @@ nfsd4_run_cb_work(struct work_struct *work)
}
void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
- struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op)
+ const struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op)
{
cb->cb_clp = clp;
cb->cb_msg.rpc_proc = &nfs4_cb_procedures[op];
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index ebf90e487c75..825c7bc8d789 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -1,6 +1,7 @@
/*
* Copyright (c) 2014 Christoph Hellwig.
*/
+#include <linux/blkdev.h>
#include <linux/kmod.h>
#include <linux/file.h>
#include <linux/jhash.h>
@@ -22,11 +23,16 @@ struct nfs4_layout {
static struct kmem_cache *nfs4_layout_cache;
static struct kmem_cache *nfs4_layout_stateid_cache;
-static struct nfsd4_callback_ops nfsd4_cb_layout_ops;
+static const struct nfsd4_callback_ops nfsd4_cb_layout_ops;
static const struct lock_manager_operations nfsd4_layouts_lm_ops;
const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] = {
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
[LAYOUT_BLOCK_VOLUME] = &bl_layout_ops,
+#endif
+#ifdef CONFIG_NFSD_SCSILAYOUT
+ [LAYOUT_SCSI] = &scsi_layout_ops,
+#endif
};
/* pNFS device ID to export fsid mapping */
@@ -121,10 +127,24 @@ void nfsd4_setup_layout_type(struct svc_export *exp)
if (!(exp->ex_flags & NFSEXP_PNFS))
return;
+ /*
+ * Check if the file system supports exporting a block-like layout.
+ * If the block device supports reservations prefer the SCSI layout,
+ * otherwise advertise the block layout.
+ */
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
if (sb->s_export_op->get_uuid &&
sb->s_export_op->map_blocks &&
sb->s_export_op->commit_blocks)
exp->ex_layout_type = LAYOUT_BLOCK_VOLUME;
+#endif
+#ifdef CONFIG_NFSD_SCSILAYOUT
+ /* overwrite block layout selection if needed */
+ if (sb->s_export_op->map_blocks &&
+ sb->s_export_op->commit_blocks &&
+ sb->s_bdev && sb->s_bdev->bd_disk->fops->pr_ops)
+ exp->ex_layout_type = LAYOUT_SCSI;
+#endif
}
static void
@@ -201,6 +221,7 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
INIT_LIST_HEAD(&ls->ls_perfile);
spin_lock_init(&ls->ls_lock);
INIT_LIST_HEAD(&ls->ls_layouts);
+ mutex_init(&ls->ls_mutex);
ls->ls_layout_type = layout_type;
nfsd4_init_cb(&ls->ls_recall, clp, &nfsd4_cb_layout_ops,
NFSPROC4_CLNT_CB_LAYOUT);
@@ -262,19 +283,23 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
status = nfserr_jukebox;
if (!ls)
goto out;
+ mutex_lock(&ls->ls_mutex);
} else {
ls = container_of(stid, struct nfs4_layout_stateid, ls_stid);
status = nfserr_bad_stateid;
+ mutex_lock(&ls->ls_mutex);
if (stateid->si_generation > stid->sc_stateid.si_generation)
- goto out_put_stid;
+ goto out_unlock_stid;
if (layout_type != ls->ls_layout_type)
- goto out_put_stid;
+ goto out_unlock_stid;
}
*lsp = ls;
return 0;
+out_unlock_stid:
+ mutex_unlock(&ls->ls_mutex);
out_put_stid:
nfs4_put_stid(stid);
out:
@@ -296,8 +321,6 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls)
trace_layout_recall(&ls->ls_stid.sc_stateid);
atomic_inc(&ls->ls_stid.sc_count);
- update_stateid(&ls->ls_stid.sc_stateid);
- memcpy(&ls->ls_recall_sid, &ls->ls_stid.sc_stateid, sizeof(stateid_t));
nfsd4_run_cb(&ls->ls_recall);
out_unlock:
@@ -406,8 +429,7 @@ nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls)
list_add_tail(&new->lo_perstate, &ls->ls_layouts);
new = NULL;
done:
- update_stateid(&ls->ls_stid.sc_stateid);
- memcpy(&lgp->lg_sid, &ls->ls_stid.sc_stateid, sizeof(stateid_t));
+ nfs4_inc_and_copy_stateid(&lgp->lg_sid, &ls->ls_stid);
spin_unlock(&ls->ls_lock);
out:
spin_unlock(&fp->fi_lock);
@@ -481,11 +503,8 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp,
}
}
if (!list_empty(&ls->ls_layouts)) {
- if (found) {
- update_stateid(&ls->ls_stid.sc_stateid);
- memcpy(&lrp->lr_sid, &ls->ls_stid.sc_stateid,
- sizeof(stateid_t));
- }
+ if (found)
+ nfs4_inc_and_copy_stateid(&lrp->lr_sid, &ls->ls_stid);
lrp->lrs_present = 1;
} else {
trace_layoutstate_unhash(&ls->ls_stid.sc_stateid);
@@ -494,6 +513,7 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp,
}
spin_unlock(&ls->ls_lock);
+ mutex_unlock(&ls->ls_mutex);
nfs4_put_stid(&ls->ls_stid);
nfsd4_free_layouts(&reaplist);
return nfs_ok;
@@ -590,8 +610,6 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str));
- trace_layout_recall_fail(&ls->ls_stid.sc_stateid);
-
printk(KERN_WARNING
"nfsd: client %s failed to respond to layout recall. "
" Fencing..\n", addr_str);
@@ -608,34 +626,67 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
}
}
+static void
+nfsd4_cb_layout_prepare(struct nfsd4_callback *cb)
+{
+ struct nfs4_layout_stateid *ls =
+ container_of(cb, struct nfs4_layout_stateid, ls_recall);
+
+ mutex_lock(&ls->ls_mutex);
+ nfs4_inc_and_copy_stateid(&ls->ls_recall_sid, &ls->ls_stid);
+ mutex_unlock(&ls->ls_mutex);
+}
+
static int
nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
{
struct nfs4_layout_stateid *ls =
container_of(cb, struct nfs4_layout_stateid, ls_recall);
+ struct nfsd_net *nn;
+ ktime_t now, cutoff;
+ const struct nfsd4_layout_ops *ops;
LIST_HEAD(reaplist);
+
switch (task->tk_status) {
case 0:
- return 1;
+ case -NFS4ERR_DELAY:
+ /*
+ * Anything left? If not, then call it done. Note that we don't
+ * take the spinlock since this is an optimization and nothing
+ * should get added until the cb counter goes to zero.
+ */
+ if (list_empty(&ls->ls_layouts))
+ return 1;
+
+ /* Poll the client until it's done with the layout */
+ now = ktime_get();
+ nn = net_generic(ls->ls_stid.sc_client->net, nfsd_net_id);
+
+ /* Client gets 2 lease periods to return it */
+ cutoff = ktime_add_ns(task->tk_start,
+ nn->nfsd4_lease * NSEC_PER_SEC * 2);
+
+ if (ktime_before(now, cutoff)) {
+ rpc_delay(task, HZ/100); /* 10 mili-seconds */
+ return 0;
+ }
+ /* Fallthrough */
case -NFS4ERR_NOMATCHING_LAYOUT:
trace_layout_recall_done(&ls->ls_stid.sc_stateid);
task->tk_status = 0;
return 1;
- case -NFS4ERR_DELAY:
- /* Poll the client until it's done with the layout */
- /* FIXME: cap number of retries.
- * The pnfs standard states that we need to only expire
- * the client after at-least "lease time" .eg lease-time * 2
- * when failing to communicate a recall
- */
- rpc_delay(task, HZ/100); /* 10 mili-seconds */
- return 0;
default:
/*
* Unknown error or non-responding client, we'll need to fence.
*/
- nfsd4_cb_layout_fail(ls);
+ trace_layout_recall_fail(&ls->ls_stid.sc_stateid);
+
+ ops = nfsd4_layout_ops[ls->ls_layout_type];
+ if (ops->fence_client)
+ ops->fence_client(ls);
+ else
+ nfsd4_cb_layout_fail(ls);
return -1;
}
}
@@ -654,7 +705,8 @@ nfsd4_cb_layout_release(struct nfsd4_callback *cb)
nfs4_put_stid(&ls->ls_stid);
}
-static struct nfsd4_callback_ops nfsd4_cb_layout_ops = {
+static const struct nfsd4_callback_ops nfsd4_cb_layout_ops = {
+ .prepare = nfsd4_cb_layout_prepare,
.done = nfsd4_cb_layout_done,
.release = nfsd4_cb_layout_release,
};
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 4ce6b97b31ad..de1ff1d98bb1 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -55,10 +55,10 @@ nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u
struct inode *inode = d_inode(resfh->fh_dentry);
int status;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
status = security_inode_setsecctx(resfh->fh_dentry,
label->data, label->len);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (status)
/*
@@ -774,8 +774,9 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* check stateid */
- status = nfs4_preprocess_stateid_op(rqstp, cstate, &read->rd_stateid,
- RD_STATE, &read->rd_filp, &read->rd_tmp_file);
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+ &read->rd_stateid, RD_STATE,
+ &read->rd_filp, &read->rd_tmp_file);
if (status) {
dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
goto out;
@@ -863,12 +864,10 @@ static __be32
nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_secinfo *secinfo)
{
- struct svc_fh resfh;
struct svc_export *exp;
struct dentry *dentry;
__be32 err;
- fh_init(&resfh, NFS4_FHSIZE);
err = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, NFSD_MAY_EXEC);
if (err)
return err;
@@ -877,6 +876,7 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
&exp, &dentry);
if (err)
return err;
+ fh_unlock(&cstate->current_fh);
if (d_really_is_negative(dentry)) {
exp_put(exp);
err = nfserr_noent;
@@ -921,7 +921,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
status = nfs4_preprocess_stateid_op(rqstp, cstate,
- &setattr->sa_stateid, WR_STATE, NULL, NULL);
+ &cstate->current_fh, &setattr->sa_stateid,
+ WR_STATE, NULL, NULL);
if (status) {
dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
return status;
@@ -985,8 +986,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (write->wr_offset >= OFFSET_MAX)
return nfserr_inval;
- status = nfs4_preprocess_stateid_op(rqstp, cstate, stateid, WR_STATE,
- &filp, NULL);
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+ stateid, WR_STATE, &filp, NULL);
if (status) {
dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
return status;
@@ -1010,13 +1011,54 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
}
static __be32
+nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_clone *clone)
+{
+ struct file *src, *dst;
+ __be32 status;
+
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
+ &clone->cl_src_stateid, RD_STATE,
+ &src, NULL);
+ if (status) {
+ dprintk("NFSD: %s: couldn't process src stateid!\n", __func__);
+ goto out;
+ }
+
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+ &clone->cl_dst_stateid, WR_STATE,
+ &dst, NULL);
+ if (status) {
+ dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
+ goto out_put_src;
+ }
+
+ /* fix up for NFS-specific error code */
+ if (!S_ISREG(file_inode(src)->i_mode) ||
+ !S_ISREG(file_inode(dst)->i_mode)) {
+ status = nfserr_wrong_type;
+ goto out_put_dst;
+ }
+
+ status = nfsd4_clone_file_range(src, clone->cl_src_pos,
+ dst, clone->cl_dst_pos, clone->cl_count);
+
+out_put_dst:
+ fput(dst);
+out_put_src:
+ fput(src);
+out:
+ return status;
+}
+
+static __be32
nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_fallocate *fallocate, int flags)
{
__be32 status = nfserr_notsupp;
struct file *file;
- status = nfs4_preprocess_stateid_op(rqstp, cstate,
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&fallocate->falloc_stateid,
WR_STATE, &file, NULL);
if (status != nfs_ok) {
@@ -1055,7 +1097,7 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
__be32 status;
struct file *file;
- status = nfs4_preprocess_stateid_op(rqstp, cstate,
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&seek->seek_stateid,
RD_STATE, &file, NULL);
if (status) {
@@ -1226,8 +1268,10 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp,
goto out;
nfserr = nfs_ok;
- if (gdp->gd_maxcount != 0)
- nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp);
+ if (gdp->gd_maxcount != 0) {
+ nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb,
+ cstate->session->se_client, gdp);
+ }
gdp->gd_notify_types &= ops->notify_types;
out:
@@ -1309,6 +1353,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp,
nfserr = nfsd4_insert_layout(lgp, ls);
out_put_stid:
+ mutex_unlock(&ls->ls_mutex);
nfs4_put_stid(&ls->ls_stid);
out:
return nfserr;
@@ -1362,6 +1407,9 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp,
goto out;
}
+ /* LAYOUTCOMMIT does not require any serialization */
+ mutex_unlock(&ls->ls_mutex);
+
if (new_size > i_size_read(inode)) {
lcp->lc_size_chg = 1;
lcp->lc_newsize = new_size;
@@ -2275,6 +2323,12 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_name = "OP_DEALLOCATE",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
+ [OP_CLONE] = {
+ .op_func = (nfsd4op_func)nfsd4_clone,
+ .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
+ .op_name = "OP_CLONE",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+ },
[OP_SEEK] = {
.op_func = (nfsd4op_func)nfsd4_seek,
.op_name = "OP_SEEK",
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index e3d47091b191..66eaeb1e8c2c 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -32,10 +32,10 @@
*
*/
+#include <crypto/hash.h>
#include <linux/file.h>
#include <linux/slab.h>
#include <linux/namei.h>
-#include <linux/crypto.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/module.h>
@@ -104,29 +104,35 @@ static int
nfs4_make_rec_clidname(char *dname, const struct xdr_netobj *clname)
{
struct xdr_netobj cksum;
- struct hash_desc desc;
- struct scatterlist sg;
+ struct crypto_shash *tfm;
int status;
dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
clname->len, clname->data);
- desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
- desc.tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(desc.tfm)) {
- status = PTR_ERR(desc.tfm);
+ tfm = crypto_alloc_shash("md5", 0, 0);
+ if (IS_ERR(tfm)) {
+ status = PTR_ERR(tfm);
goto out_no_tfm;
}
- cksum.len = crypto_hash_digestsize(desc.tfm);
+ cksum.len = crypto_shash_digestsize(tfm);
cksum.data = kmalloc(cksum.len, GFP_KERNEL);
if (cksum.data == NULL) {
status = -ENOMEM;
goto out;
}
- sg_init_one(&sg, clname->data, clname->len);
+ {
+ SHASH_DESC_ON_STACK(desc, tfm);
+
+ desc->tfm = tfm;
+ desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+ status = crypto_shash_digest(desc, clname->data, clname->len,
+ cksum.data);
+ shash_desc_zero(desc);
+ }
- status = crypto_hash_digest(&desc, &sg, sg.length, cksum.data);
if (status)
goto out;
@@ -135,7 +141,7 @@ nfs4_make_rec_clidname(char *dname, const struct xdr_netobj *clname)
status = 0;
out:
kfree(cksum.data);
- crypto_free_hash(desc.tfm);
+ crypto_free_shash(tfm);
out_no_tfm:
return status;
}
@@ -192,7 +198,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
dir = nn->rec_file->f_path.dentry;
/* lock the parent */
- mutex_lock(&d_inode(dir)->i_mutex);
+ inode_lock(d_inode(dir));
dentry = lookup_one_len(dname, dir, HEXDIR_LEN-1);
if (IS_ERR(dentry)) {
@@ -213,7 +219,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
out_put:
dput(dentry);
out_unlock:
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
if (status == 0) {
if (nn->in_grace) {
crp = nfs4_client_to_reclaim(dname, nn);
@@ -286,7 +292,7 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
}
status = iterate_dir(nn->rec_file, &ctx.ctx);
- mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
if (!status) {
@@ -302,7 +308,7 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
list_del(&entry->list);
kfree(entry);
}
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
nfs4_reset_creds(original_cred);
list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
@@ -322,7 +328,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn)
dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
dir = nn->rec_file->f_path.dentry;
- mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
dentry = lookup_one_len(name, dir, namlen);
if (IS_ERR(dentry)) {
status = PTR_ERR(dentry);
@@ -335,7 +341,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn)
out:
dput(dentry);
out_unlock:
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
return status;
}
@@ -631,7 +637,7 @@ nfsd4_check_legacy_client(struct nfs4_client *clp)
return -ENOENT;
}
-static struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = {
+static const struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = {
.init = nfsd4_legacy_tracking_init,
.exit = nfsd4_legacy_tracking_exit,
.create = nfsd4_create_clid_dir,
@@ -1050,7 +1056,7 @@ out_err:
printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret);
}
-static struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = {
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = {
.init = nfsd4_init_cld_pipe,
.exit = nfsd4_remove_cld_pipe,
.create = nfsd4_cld_create,
@@ -1260,6 +1266,7 @@ nfsd4_umh_cltrack_init(struct net *net)
/* XXX: The usermode helper s not working in container yet. */
if (net != &init_net) {
pr_warn("NFSD: attempt to initialize umh client tracking in a container ignored.\n");
+ kfree(grace_start);
return -EINVAL;
}
@@ -1394,7 +1401,7 @@ nfsd4_umh_cltrack_grace_done(struct nfsd_net *nn)
kfree(legacy);
}
-static struct nfsd4_client_tracking_ops nfsd4_umh_tracking_ops = {
+static const struct nfsd4_client_tracking_ops nfsd4_umh_tracking_ops = {
.init = nfsd4_umh_cltrack_init,
.exit = NULL,
.create = nfsd4_umh_cltrack_create,
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0f1d5691b795..0462eeddfff9 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -98,7 +98,7 @@ static struct kmem_cache *odstate_slab;
static void free_session(struct nfsd4_session *);
-static struct nfsd4_callback_ops nfsd4_cb_recall_ops;
+static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
static bool is_session_dead(struct nfsd4_session *ses)
{
@@ -575,6 +575,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid;
/* Will be incremented before return to client: */
atomic_set(&stid->sc_count, 1);
+ spin_lock_init(&stid->sc_lock);
/*
* It shouldn't be a problem to reuse an opaque stateid value.
@@ -745,6 +746,18 @@ nfs4_put_stid(struct nfs4_stid *s)
put_nfs4_file(fp);
}
+void
+nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid)
+{
+ stateid_t *src = &stid->sc_stateid;
+
+ spin_lock(&stid->sc_lock);
+ if (unlikely(++src->si_generation == 0))
+ src->si_generation = 1;
+ memcpy(dst, src, sizeof(*dst));
+ spin_unlock(&stid->sc_lock);
+}
+
static void nfs4_put_deleg_lease(struct nfs4_file *fp)
{
struct file *filp = NULL;
@@ -765,16 +778,68 @@ void nfs4_unhash_stid(struct nfs4_stid *s)
s->sc_type = 0;
}
-static void
+/**
+ * nfs4_get_existing_delegation - Discover if this delegation already exists
+ * @clp: a pointer to the nfs4_client we're granting a delegation to
+ * @fp: a pointer to the nfs4_file we're granting a delegation on
+ *
+ * Return:
+ * On success: NULL if an existing delegation was not found.
+ *
+ * On error: -EAGAIN if one was previously granted to this nfs4_client
+ * for this nfs4_file.
+ *
+ */
+
+static int
+nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp)
+{
+ struct nfs4_delegation *searchdp = NULL;
+ struct nfs4_client *searchclp = NULL;
+
+ lockdep_assert_held(&state_lock);
+ lockdep_assert_held(&fp->fi_lock);
+
+ list_for_each_entry(searchdp, &fp->fi_delegations, dl_perfile) {
+ searchclp = searchdp->dl_stid.sc_client;
+ if (clp == searchclp) {
+ return -EAGAIN;
+ }
+ }
+ return 0;
+}
+
+/**
+ * hash_delegation_locked - Add a delegation to the appropriate lists
+ * @dp: a pointer to the nfs4_delegation we are adding.
+ * @fp: a pointer to the nfs4_file we're granting a delegation on
+ *
+ * Return:
+ * On success: NULL if the delegation was successfully hashed.
+ *
+ * On error: -EAGAIN if one was previously granted to this
+ * nfs4_client for this nfs4_file. Delegation is not hashed.
+ *
+ */
+
+static int
hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
{
+ int status;
+ struct nfs4_client *clp = dp->dl_stid.sc_client;
+
lockdep_assert_held(&state_lock);
lockdep_assert_held(&fp->fi_lock);
+ status = nfs4_get_existing_delegation(clp, fp);
+ if (status)
+ return status;
+ ++fp->fi_delegees;
atomic_inc(&dp->dl_stid.sc_count);
dp->dl_stid.sc_type = NFS4_DELEG_STID;
list_add(&dp->dl_perfile, &fp->fi_delegations);
- list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
+ list_add(&dp->dl_perclnt, &clp->cl_delegations);
+ return 0;
}
static bool
@@ -1792,15 +1857,28 @@ static void copy_clid(struct nfs4_client *target, struct nfs4_client *source)
target->cl_clientid.cl_id = source->cl_clientid.cl_id;
}
-static int copy_cred(struct svc_cred *target, struct svc_cred *source)
+int strdup_if_nonnull(char **target, char *source)
{
- if (source->cr_principal) {
- target->cr_principal =
- kstrdup(source->cr_principal, GFP_KERNEL);
- if (target->cr_principal == NULL)
+ if (source) {
+ *target = kstrdup(source, GFP_KERNEL);
+ if (!*target)
return -ENOMEM;
} else
- target->cr_principal = NULL;
+ *target = NULL;
+ return 0;
+}
+
+static int copy_cred(struct svc_cred *target, struct svc_cred *source)
+{
+ int ret;
+
+ ret = strdup_if_nonnull(&target->cr_principal, source->cr_principal);
+ if (ret)
+ return ret;
+ ret = strdup_if_nonnull(&target->cr_raw_principal,
+ source->cr_raw_principal);
+ if (ret)
+ return ret;
target->cr_flavor = source->cr_flavor;
target->cr_uid = source->cr_uid;
target->cr_gid = source->cr_gid;
@@ -1904,6 +1982,9 @@ static bool mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp)
return false;
if (!svc_rqst_integrity_protected(rqstp))
return false;
+ if (cl->cl_cred.cr_raw_principal)
+ return 0 == strcmp(cl->cl_cred.cr_raw_principal,
+ cr->cr_raw_principal);
if (!cr->cr_principal)
return false;
return 0 == strcmp(cl->cl_cred.cr_principal, cr->cr_principal);
@@ -2175,7 +2256,8 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
base = resp->cstate.data_offset;
slot->sl_datalen = buf->len - base;
if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen))
- WARN("%s: sessions DRC could not cache compound\n", __func__);
+ WARN(1, "%s: sessions DRC could not cache compound\n",
+ __func__);
return;
}
@@ -2256,15 +2338,20 @@ nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid)
clid->flags = new->cl_exchange_flags;
}
+static bool client_has_openowners(struct nfs4_client *clp)
+{
+ struct nfs4_openowner *oo;
+
+ list_for_each_entry(oo, &clp->cl_openowners, oo_perclient) {
+ if (!list_empty(&oo->oo_owner.so_stateids))
+ return true;
+ }
+ return false;
+}
+
static bool client_has_state(struct nfs4_client *clp)
{
- /*
- * Note clp->cl_openowners check isn't quite right: there's no
- * need to count owners without stateid's.
- *
- * Also note we should probably be using this in 4.0 case too.
- */
- return !list_empty(&clp->cl_openowners)
+ return client_has_openowners(clp)
#ifdef CONFIG_NFSD_PNFS
|| !list_empty(&clp->cl_lo_states)
#endif
@@ -2295,22 +2382,36 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
if (exid->flags & ~EXCHGID4_FLAG_MASK_A)
return nfserr_inval;
+ new = create_client(exid->clname, rqstp, &verf);
+ if (new == NULL)
+ return nfserr_jukebox;
+
switch (exid->spa_how) {
case SP4_MACH_CRED:
- if (!svc_rqst_integrity_protected(rqstp))
- return nfserr_inval;
+ if (!svc_rqst_integrity_protected(rqstp)) {
+ status = nfserr_inval;
+ goto out_nolock;
+ }
+ /*
+ * Sometimes userspace doesn't give us a principal.
+ * Which is a bug, really. Anyway, we can't enforce
+ * MACH_CRED in that case, better to give up now:
+ */
+ if (!new->cl_cred.cr_principal &&
+ !new->cl_cred.cr_raw_principal) {
+ status = nfserr_serverfault;
+ goto out_nolock;
+ }
+ new->cl_mach_cred = true;
case SP4_NONE:
break;
default: /* checked by xdr code */
WARN_ON_ONCE(1);
case SP4_SSV:
- return nfserr_encr_alg_unsupp;
+ status = nfserr_encr_alg_unsupp;
+ goto out_nolock;
}
- new = create_client(exid->clname, rqstp, &verf);
- if (new == NULL)
- return nfserr_jukebox;
-
/* Cases below refer to rfc 5661 section 18.35.4: */
spin_lock(&nn->client_lock);
conf = find_confirmed_client_by_name(&exid->clname, nn);
@@ -2372,7 +2473,6 @@ out_new:
goto out;
}
new->cl_minorversion = cstate->minorversion;
- new->cl_mach_cred = (exid->spa_how == SP4_MACH_CRED);
gen_clid(new, nn);
add_to_unconfirmed(new);
@@ -2390,6 +2490,7 @@ out_copy:
out:
spin_unlock(&nn->client_lock);
+out_nolock:
if (new)
expire_client(new);
if (unconf)
@@ -2486,21 +2587,26 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs
return nfs_ok;
}
+/*
+ * Server's NFSv4.1 backchannel support is AUTH_SYS-only for now.
+ * These are based on similar macros in linux/sunrpc/msg_prot.h .
+ */
+#define RPC_MAX_HEADER_WITH_AUTH_SYS \
+ (RPC_CALLHDRSIZE + 2 * (2 + UNX_CALLSLACK))
+
+#define RPC_MAX_REPHEADER_WITH_AUTH_SYS \
+ (RPC_REPHDRSIZE + (2 + NUL_REPLYSLACK))
+
#define NFSD_CB_MAX_REQ_SZ ((NFS4_enc_cb_recall_sz + \
- RPC_MAX_HEADER_WITH_AUTH) * sizeof(__be32))
+ RPC_MAX_HEADER_WITH_AUTH_SYS) * sizeof(__be32))
#define NFSD_CB_MAX_RESP_SZ ((NFS4_dec_cb_recall_sz + \
- RPC_MAX_REPHEADER_WITH_AUTH) * sizeof(__be32))
+ RPC_MAX_REPHEADER_WITH_AUTH_SYS) * \
+ sizeof(__be32))
static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca)
{
ca->headerpadsz = 0;
- /*
- * These RPC_MAX_HEADER macros are overkill, especially since we
- * don't even do gss on the backchannel yet. But this is still
- * less than 1k. Tighten up this estimate in the unlikely event
- * it turns out to be a problem for some client:
- */
if (ca->maxreq_sz < NFSD_CB_MAX_REQ_SZ)
return nfserr_toosmall;
if (ca->maxresp_sz < NFSD_CB_MAX_RESP_SZ)
@@ -2610,10 +2716,9 @@ nfsd4_create_session(struct svc_rqst *rqstp,
goto out_free_conn;
}
status = nfs_ok;
- /*
- * We do not support RDMA or persistent sessions
- */
+ /* Persistent sessions are not supported */
cr_ses->flags &= ~SESSION4_PERSIST;
+ /* Upshifting from TCP to RDMA is not supported */
cr_ses->flags &= ~SESSION4_RDMA;
init_session(rqstp, new, conf, cr_ses);
@@ -3049,7 +3154,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
/* Cases below refer to rfc 3530 section 14.2.33: */
spin_lock(&nn->client_lock);
conf = find_confirmed_client_by_name(&clname, nn);
- if (conf) {
+ if (conf && client_has_state(conf)) {
/* case 0: */
status = nfserr_clid_inuse;
if (clp_used_exchangeid(conf))
@@ -3136,6 +3241,11 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
} else { /* case 3: normal case; new or rebooted client */
old = find_confirmed_client_by_name(&unconf->cl_name, nn);
if (old) {
+ status = nfserr_clid_inuse;
+ if (client_has_state(old)
+ && !same_creds(&unconf->cl_cred,
+ &old->cl_cred))
+ goto out;
status = mark_client_expired_locked(old);
if (status) {
old = NULL;
@@ -3317,6 +3427,27 @@ static const struct nfs4_stateowner_operations openowner_ops = {
.so_free = nfs4_free_openowner,
};
+static struct nfs4_ol_stateid *
+nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
+{
+ struct nfs4_ol_stateid *local, *ret = NULL;
+ struct nfs4_openowner *oo = open->op_openowner;
+
+ lockdep_assert_held(&fp->fi_lock);
+
+ list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
+ /* ignore lock owners */
+ if (local->st_stateowner->so_is_open_owner == 0)
+ continue;
+ if (local->st_stateowner == &oo->oo_owner) {
+ ret = local;
+ atomic_inc(&ret->st_stid.sc_count);
+ break;
+ }
+ }
+ return ret;
+}
+
static struct nfs4_openowner *
alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
struct nfsd4_compound_state *cstate)
@@ -3348,9 +3479,20 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
return ret;
}
-static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) {
+static struct nfs4_ol_stateid *
+init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp,
+ struct nfsd4_open *open)
+{
+
struct nfs4_openowner *oo = open->op_openowner;
+ struct nfs4_ol_stateid *retstp = NULL;
+ spin_lock(&oo->oo_owner.so_client->cl_lock);
+ spin_lock(&fp->fi_lock);
+
+ retstp = nfsd4_find_existing_open(fp, open);
+ if (retstp)
+ goto out_unlock;
atomic_inc(&stp->st_stid.sc_count);
stp->st_stid.sc_type = NFS4_OPEN_STID;
INIT_LIST_HEAD(&stp->st_locks);
@@ -3360,12 +3502,14 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp,
stp->st_access_bmap = 0;
stp->st_deny_bmap = 0;
stp->st_openstp = NULL;
- spin_lock(&oo->oo_owner.so_client->cl_lock);
+ init_rwsem(&stp->st_rwsem);
list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
- spin_lock(&fp->fi_lock);
list_add(&stp->st_perfile, &fp->fi_stateids);
+
+out_unlock:
spin_unlock(&fp->fi_lock);
spin_unlock(&oo->oo_owner.so_client->cl_lock);
+ return retstp;
}
/*
@@ -3539,7 +3683,7 @@ static void nfsd4_cb_recall_release(struct nfsd4_callback *cb)
nfs4_put_stid(&dp->dl_stid);
}
-static struct nfsd4_callback_ops nfsd4_cb_recall_ops = {
+static const struct nfsd4_callback_ops nfsd4_cb_recall_ops = {
.prepare = nfsd4_cb_recall_prepare,
.done = nfsd4_cb_recall_done,
.release = nfsd4_cb_recall_release,
@@ -3776,27 +3920,6 @@ out:
return nfs_ok;
}
-static struct nfs4_ol_stateid *
-nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
-{
- struct nfs4_ol_stateid *local, *ret = NULL;
- struct nfs4_openowner *oo = open->op_openowner;
-
- spin_lock(&fp->fi_lock);
- list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
- /* ignore lock owners */
- if (local->st_stateowner->so_is_open_owner == 0)
- continue;
- if (local->st_stateowner == &oo->oo_owner) {
- ret = local;
- atomic_inc(&ret->st_stid.sc_count);
- break;
- }
- }
- spin_unlock(&fp->fi_lock);
- return ret;
-}
-
static inline int nfs4_access_to_access(u32 nfs4_access)
{
int flags = 0;
@@ -3945,6 +4068,18 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag)
return fl;
}
+/**
+ * nfs4_setlease - Obtain a delegation by requesting lease from vfs layer
+ * @dp: a pointer to the nfs4_delegation we're adding.
+ *
+ * Return:
+ * On success: Return code will be 0 on success.
+ *
+ * On error: -EAGAIN if there was an existing delegation.
+ * nonzero if there is an error in other cases.
+ *
+ */
+
static int nfs4_setlease(struct nfs4_delegation *dp)
{
struct nfs4_file *fp = dp->dl_stid.sc_file;
@@ -3976,16 +4111,19 @@ static int nfs4_setlease(struct nfs4_delegation *dp)
goto out_unlock;
/* Race breaker */
if (fp->fi_deleg_file) {
- status = 0;
- ++fp->fi_delegees;
- hash_delegation_locked(dp, fp);
+ status = hash_delegation_locked(dp, fp);
goto out_unlock;
}
fp->fi_deleg_file = filp;
- fp->fi_delegees = 1;
- hash_delegation_locked(dp, fp);
+ fp->fi_delegees = 0;
+ status = hash_delegation_locked(dp, fp);
spin_unlock(&fp->fi_lock);
spin_unlock(&state_lock);
+ if (status) {
+ /* Should never happen, this is a new fi_deleg_file */
+ WARN_ON_ONCE(1);
+ goto out_fput;
+ }
return 0;
out_unlock:
spin_unlock(&fp->fi_lock);
@@ -4005,6 +4143,15 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
if (fp->fi_had_conflict)
return ERR_PTR(-EAGAIN);
+ spin_lock(&state_lock);
+ spin_lock(&fp->fi_lock);
+ status = nfs4_get_existing_delegation(clp, fp);
+ spin_unlock(&fp->fi_lock);
+ spin_unlock(&state_lock);
+
+ if (status)
+ return ERR_PTR(status);
+
dp = alloc_init_deleg(clp, fh, odstate);
if (!dp)
return ERR_PTR(-ENOMEM);
@@ -4023,9 +4170,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
status = -EAGAIN;
goto out_unlock;
}
- ++fp->fi_delegees;
- hash_delegation_locked(dp, fp);
- status = 0;
+ status = hash_delegation_locked(dp, fp);
out_unlock:
spin_unlock(&fp->fi_lock);
spin_unlock(&state_lock);
@@ -4160,6 +4305,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
struct nfs4_client *cl = open->op_openowner->oo_owner.so_client;
struct nfs4_file *fp = NULL;
struct nfs4_ol_stateid *stp = NULL;
+ struct nfs4_ol_stateid *swapstp = NULL;
struct nfs4_delegation *dp = NULL;
__be32 status;
@@ -4173,7 +4319,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
status = nfs4_check_deleg(cl, open, &dp);
if (status)
goto out;
+ spin_lock(&fp->fi_lock);
stp = nfsd4_find_existing_open(fp, open);
+ spin_unlock(&fp->fi_lock);
} else {
open->op_file = NULL;
status = nfserr_bad_stateid;
@@ -4187,15 +4335,32 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
*/
if (stp) {
/* Stateid was found, this is an OPEN upgrade */
+ down_read(&stp->st_rwsem);
status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
- if (status)
+ if (status) {
+ up_read(&stp->st_rwsem);
goto out;
+ }
} else {
stp = open->op_stp;
open->op_stp = NULL;
- init_open_stateid(stp, fp, open);
+ swapstp = init_open_stateid(stp, fp, open);
+ if (swapstp) {
+ nfs4_put_stid(&stp->st_stid);
+ stp = swapstp;
+ down_read(&stp->st_rwsem);
+ status = nfs4_upgrade_open(rqstp, fp, current_fh,
+ stp, open);
+ if (status) {
+ up_read(&stp->st_rwsem);
+ goto out;
+ }
+ goto upgrade_out;
+ }
+ down_read(&stp->st_rwsem);
status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
if (status) {
+ up_read(&stp->st_rwsem);
release_open_stateid(stp);
goto out;
}
@@ -4205,8 +4370,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
if (stp->st_clnt_odstate == open->op_odstate)
open->op_odstate = NULL;
}
- update_stateid(&stp->st_stid.sc_stateid);
- memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
+upgrade_out:
+ nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid);
+ up_read(&stp->st_rwsem);
if (nfsd4_has_session(&resp->cstate)) {
if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) {
@@ -4410,8 +4576,7 @@ static void
laundromat_main(struct work_struct *laundry)
{
time_t t;
- struct delayed_work *dwork = container_of(laundry, struct delayed_work,
- work);
+ struct delayed_work *dwork = to_delayed_work(laundry);
struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
laundromat_work);
@@ -4666,10 +4831,9 @@ nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
*/
__be32
nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
- struct nfsd4_compound_state *cstate, stateid_t *stateid,
- int flags, struct file **filpp, bool *tmp_file)
+ struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
+ stateid_t *stateid, int flags, struct file **filpp, bool *tmp_file)
{
- struct svc_fh *fhp = &cstate->current_fh;
struct inode *ino = d_inode(fhp->fh_dentry);
struct net *net = SVC_NET(rqstp);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -4819,10 +4983,13 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
* revoked delegations are kept only for free_stateid.
*/
return nfserr_bad_stateid;
+ down_write(&stp->st_rwsem);
status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate));
- if (status)
- return status;
- return nfs4_check_fh(current_fh, &stp->st_stid);
+ if (status == nfs_ok)
+ status = nfs4_check_fh(current_fh, &stp->st_stid);
+ if (status != nfs_ok)
+ up_write(&stp->st_rwsem);
+ return status;
}
/*
@@ -4869,6 +5036,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs
return status;
oo = openowner(stp->st_stateowner);
if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
+ up_write(&stp->st_rwsem);
nfs4_put_stid(&stp->st_stid);
return nfserr_bad_stateid;
}
@@ -4899,11 +5067,13 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
oo = openowner(stp->st_stateowner);
status = nfserr_bad_stateid;
- if (oo->oo_flags & NFS4_OO_CONFIRMED)
+ if (oo->oo_flags & NFS4_OO_CONFIRMED) {
+ up_write(&stp->st_rwsem);
goto put_stateid;
+ }
oo->oo_flags |= NFS4_OO_CONFIRMED;
- update_stateid(&stp->st_stid.sc_stateid);
- memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
+ nfs4_inc_and_copy_stateid(&oc->oc_resp_stateid, &stp->st_stid);
+ up_write(&stp->st_rwsem);
dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
__func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid));
@@ -4975,13 +5145,11 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
goto put_stateid;
}
nfs4_stateid_downgrade(stp, od->od_share_access);
-
reset_union_bmap_deny(od->od_share_deny, stp);
-
- update_stateid(&stp->st_stid.sc_stateid);
- memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
+ nfs4_inc_and_copy_stateid(&od->od_stateid, &stp->st_stid);
status = nfs_ok;
put_stateid:
+ up_write(&stp->st_rwsem);
nfs4_put_stid(&stp->st_stid);
out:
nfsd4_bump_seqid(cstate, status);
@@ -5033,8 +5201,8 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nfsd4_bump_seqid(cstate, status);
if (status)
goto out;
- update_stateid(&stp->st_stid.sc_stateid);
- memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
+ nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
+ up_write(&stp->st_rwsem);
nfsd4_close_open_stateid(stp);
@@ -5260,6 +5428,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,
stp->st_access_bmap = 0;
stp->st_deny_bmap = open_stp->st_deny_bmap;
stp->st_openstp = open_stp;
+ init_rwsem(&stp->st_rwsem);
list_add(&stp->st_locks, &open_stp->st_locks);
list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
spin_lock(&fp->fi_lock);
@@ -5428,6 +5597,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
&open_stp, nn);
if (status)
goto out;
+ up_write(&open_stp->st_rwsem);
open_sop = openowner(open_stp->st_stateowner);
status = nfserr_bad_stateid;
if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid,
@@ -5435,6 +5605,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
status = lookup_or_create_lock_state(cstate, open_stp, lock,
&lock_stp, &new);
+ if (status == nfs_ok)
+ down_write(&lock_stp->st_rwsem);
} else {
status = nfs4_preprocess_seqid_op(cstate,
lock->lk_old_lock_seqid,
@@ -5512,9 +5684,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
err = vfs_lock_file(filp, F_SETLK, file_lock, conflock);
switch (-err) {
case 0: /* success! */
- update_stateid(&lock_stp->st_stid.sc_stateid);
- memcpy(&lock->lk_resp_stateid, &lock_stp->st_stid.sc_stateid,
- sizeof(stateid_t));
+ nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid);
status = 0;
break;
case (EAGAIN): /* conflock holds conflicting lock */
@@ -5540,6 +5710,8 @@ out:
seqid_mutating_err(ntohl(status)))
lock_sop->lo_owner.so_seqid++;
+ up_write(&lock_stp->st_rwsem);
+
/*
* If this is a new, never-before-used stateid, and we are
* returning an error, then just go ahead and release it.
@@ -5704,11 +5876,11 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n");
goto out_nfserr;
}
- update_stateid(&stp->st_stid.sc_stateid);
- memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
+ nfs4_inc_and_copy_stateid(&locku->lu_stateid, &stp->st_stid);
fput:
fput(filp);
put_stateid:
+ up_write(&stp->st_rwsem);
nfs4_put_stid(&stp->st_stid);
out:
nfsd4_bump_seqid(cstate, status);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 51c9e9ca39a4..9df898ba648f 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1072,8 +1072,9 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename
READ_BUF(4);
rename->rn_snamelen = be32_to_cpup(p++);
- READ_BUF(rename->rn_snamelen + 4);
+ READ_BUF(rename->rn_snamelen);
SAVEMEM(rename->rn_sname, rename->rn_snamelen);
+ READ_BUF(4);
rename->rn_tnamelen = be32_to_cpup(p++);
READ_BUF(rename->rn_tnamelen);
SAVEMEM(rename->rn_tname, rename->rn_tnamelen);
@@ -1155,13 +1156,14 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient
READ_BUF(8);
setclientid->se_callback_prog = be32_to_cpup(p++);
setclientid->se_callback_netid_len = be32_to_cpup(p++);
-
- READ_BUF(setclientid->se_callback_netid_len + 4);
+ READ_BUF(setclientid->se_callback_netid_len);
SAVEMEM(setclientid->se_callback_netid_val, setclientid->se_callback_netid_len);
+ READ_BUF(4);
setclientid->se_callback_addr_len = be32_to_cpup(p++);
- READ_BUF(setclientid->se_callback_addr_len + 4);
+ READ_BUF(setclientid->se_callback_addr_len);
SAVEMEM(setclientid->se_callback_addr_val, setclientid->se_callback_addr_len);
+ READ_BUF(4);
setclientid->se_callback_ident = be32_to_cpup(p++);
DECODE_TAIL;
@@ -1675,6 +1677,25 @@ nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp,
}
static __be32
+nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone)
+{
+ DECODE_HEAD;
+
+ status = nfsd4_decode_stateid(argp, &clone->cl_src_stateid);
+ if (status)
+ return status;
+ status = nfsd4_decode_stateid(argp, &clone->cl_dst_stateid);
+ if (status)
+ return status;
+
+ READ_BUF(8 + 8 + 8);
+ p = xdr_decode_hyper(p, &clone->cl_src_pos);
+ p = xdr_decode_hyper(p, &clone->cl_dst_pos);
+ p = xdr_decode_hyper(p, &clone->cl_count);
+ DECODE_TAIL;
+}
+
+static __be32
nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
{
DECODE_HEAD;
@@ -1785,6 +1806,7 @@ static nfsd4_dec nfsd4_dec_ops[] = {
[OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek,
[OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone,
};
static inline bool
@@ -1815,8 +1837,9 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
READ_BUF(4);
argp->taglen = be32_to_cpup(p++);
- READ_BUF(argp->taglen + 8);
+ READ_BUF(argp->taglen);
SAVEMEM(argp->tag, argp->taglen);
+ READ_BUF(8);
argp->minorversion = be32_to_cpup(p++);
argp->opcnt = be32_to_cpup(p++);
max_reply += 4 + (XDR_QUADLEN(argp->taglen) << 2);
@@ -2838,14 +2861,14 @@ nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd,
__be32 nfserr;
int ignore_crossmnt = 0;
- dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen);
+ dentry = lookup_one_len_unlocked(name, cd->rd_fhp->fh_dentry, namlen);
if (IS_ERR(dentry))
return nfserrno(PTR_ERR(dentry));
if (d_really_is_negative(dentry)) {
/*
- * nfsd_buffered_readdir drops the i_mutex between
- * readdir and calling this callback, leaving a window
- * where this directory entry could have gone away.
+ * we're not holding the i_mutex here, so there's
+ * a window where this directory entry could have gone
+ * away.
*/
dput(dentry);
return nfserr_noent;
@@ -3040,7 +3063,7 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp,
p = xdr_encode_opaque_fixed(p, bcts->sessionid.data,
NFS4_MAX_SESSIONID_LEN);
*p++ = cpu_to_be32(bcts->dir);
- /* Sorry, we do not yet support RDMA over 4.1: */
+ /* Upshifting from TCP to RDMA is not supported */
*p++ = cpu_to_be32(0);
}
return nfserr;
@@ -3342,6 +3365,7 @@ static __be32 nfsd4_encode_splice_read(
struct xdr_stream *xdr = &resp->xdr;
struct xdr_buf *buf = xdr->buf;
u32 eof;
+ long len;
int space_left;
__be32 nfserr;
__be32 *p = xdr->p - 2;
@@ -3350,6 +3374,7 @@ static __be32 nfsd4_encode_splice_read(
if (xdr->end - xdr->p < 1)
return nfserr_resource;
+ len = maxcount;
nfserr = nfsd_splice_read(read->rd_rqstp, file,
read->rd_offset, &maxcount);
if (nfserr) {
@@ -3362,8 +3387,8 @@ static __be32 nfsd4_encode_splice_read(
return nfserr;
}
- eof = (read->rd_offset + maxcount >=
- d_inode(read->rd_fhp->fh_dentry)->i_size);
+ eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
+ d_inode(read->rd_fhp->fh_dentry)->i_size);
*(p++) = htonl(eof);
*(p++) = htonl(maxcount);
@@ -3433,14 +3458,15 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
}
read->rd_vlen = v;
+ len = maxcount;
nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec,
read->rd_vlen, &maxcount);
if (nfserr)
return nfserr;
xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
- eof = (read->rd_offset + maxcount >=
- d_inode(read->rd_fhp->fh_dentry)->i_size);
+ eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
+ d_inode(read->rd_fhp->fh_dentry)->i_size);
tmp = htonl(eof);
write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4);
@@ -4292,6 +4318,7 @@ static nfsd4_enc nfsd4_enc_ops[] = {
[OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop,
[OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek,
[OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop,
};
/*
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 46ec934f5dee..54cde9a5864e 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -63,7 +63,6 @@ static unsigned int longest_chain;
static unsigned int longest_chain_cachesize;
static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
-static void cache_cleaner_func(struct work_struct *unused);
static unsigned long nfsd_reply_cache_count(struct shrinker *shrink,
struct shrink_control *sc);
static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink,
@@ -76,13 +75,6 @@ static struct shrinker nfsd_reply_cache_shrinker = {
};
/*
- * locking for the reply cache:
- * A cache entry is "single use" if c_state == RC_INPROG
- * Otherwise, it when accessing _prev or _next, the lock must be held.
- */
-static DECLARE_DELAYED_WORK(cache_cleaner, cache_cleaner_func);
-
-/*
* Put a cap on the size of the DRC based on the amount of available
* low memory in the machine.
*
@@ -203,7 +195,6 @@ void nfsd_reply_cache_shutdown(void)
unsigned int i;
unregister_shrinker(&nfsd_reply_cache_shrinker);
- cancel_delayed_work_sync(&cache_cleaner);
for (i = 0; i < drc_hashsize; i++) {
struct list_head *head = &drc_hashtbl[i].lru_head;
@@ -217,10 +208,8 @@ void nfsd_reply_cache_shutdown(void)
drc_hashtbl = NULL;
drc_hashsize = 0;
- if (drc_slab) {
- kmem_cache_destroy(drc_slab);
- drc_slab = NULL;
- }
+ kmem_cache_destroy(drc_slab);
+ drc_slab = NULL;
}
/*
@@ -232,7 +221,6 @@ lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
{
rp->c_timestamp = jiffies;
list_move_tail(&rp->c_lru, &b->lru_head);
- schedule_delayed_work(&cache_cleaner, RC_EXPIRE);
}
static long
@@ -266,7 +254,6 @@ prune_cache_entries(void)
{
unsigned int i;
long freed = 0;
- bool cancel = true;
for (i = 0; i < drc_hashsize; i++) {
struct nfsd_drc_bucket *b = &drc_hashtbl[i];
@@ -275,26 +262,11 @@ prune_cache_entries(void)
continue;
spin_lock(&b->cache_lock);
freed += prune_bucket(b);
- if (!list_empty(&b->lru_head))
- cancel = false;
spin_unlock(&b->cache_lock);
}
-
- /*
- * Conditionally rearm the job to run in RC_EXPIRE since we just
- * ran the pruner.
- */
- if (!cancel)
- mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE);
return freed;
}
-static void
-cache_cleaner_func(struct work_struct *unused)
-{
- prune_cache_entries();
-}
-
static unsigned long
nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc)
{
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 350041a40fe5..c1681ce894c5 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -631,10 +631,7 @@ fh_put(struct svc_fh *fhp)
fh_unlock(fhp);
fhp->fh_dentry = NULL;
dput(dentry);
-#ifdef CONFIG_NFSD_V3
- fhp->fh_pre_saved = 0;
- fhp->fh_post_saved = 0;
-#endif
+ fh_clear_wcc(fhp);
}
fh_drop_write(fhp);
if (exp) {
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 1e90dad4926b..f84fe6bf9aee 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -7,6 +7,7 @@
#ifndef _LINUX_NFSD_NFSFH_H
#define _LINUX_NFSD_NFSFH_H
+#include <linux/crc32.h>
#include <linux/sunrpc/svc.h>
#include <uapi/linux/nfsd/nfsfh.h>
@@ -26,16 +27,16 @@ static inline ino_t u32_to_ino_t(__u32 uino)
*/
typedef struct svc_fh {
struct knfsd_fh fh_handle; /* FH data */
+ int fh_maxsize; /* max size for fh_handle */
struct dentry * fh_dentry; /* validated dentry */
struct svc_export * fh_export; /* export pointer */
- int fh_maxsize; /* max size for fh_handle */
- unsigned char fh_locked; /* inode locked by us */
- unsigned char fh_want_write; /* remount protection taken */
+ bool fh_locked; /* inode locked by us */
+ bool fh_want_write; /* remount protection taken */
#ifdef CONFIG_NFSD_V3
- unsigned char fh_post_saved; /* post-op attrs saved */
- unsigned char fh_pre_saved; /* pre-op attrs saved */
+ bool fh_post_saved; /* post-op attrs saved */
+ bool fh_pre_saved; /* pre-op attrs saved */
/* Pre-op attributes saved during fh_lock */
__u64 fh_pre_size; /* size before operation */
@@ -205,6 +206,28 @@ static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2)
return true;
}
+#ifdef CONFIG_CRC32
+/**
+ * knfsd_fh_hash - calculate the crc32 hash for the filehandle
+ * @fh - pointer to filehandle
+ *
+ * returns a crc32 hash for the filehandle that is compatible with
+ * the one displayed by "wireshark".
+ */
+
+static inline u32
+knfsd_fh_hash(struct knfsd_fh *fh)
+{
+ return ~crc32_le(0xFFFFFFFF, (unsigned char *)&fh->fh_base, fh->fh_size);
+}
+#else
+static inline u32
+knfsd_fh_hash(struct knfsd_fh *fh)
+{
+ return 0;
+}
+#endif
+
#ifdef CONFIG_NFSD_V3
/*
* The wcc data stored in current_fh should be cleared
@@ -213,8 +236,8 @@ static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2)
static inline void
fh_clear_wcc(struct svc_fh *fhp)
{
- fhp->fh_post_saved = 0;
- fhp->fh_pre_saved = 0;
+ fhp->fh_post_saved = false;
+ fhp->fh_pre_saved = false;
}
/*
@@ -231,7 +254,7 @@ fill_pre_wcc(struct svc_fh *fhp)
fhp->fh_pre_ctime = inode->i_ctime;
fhp->fh_pre_size = inode->i_size;
fhp->fh_pre_change = inode->i_version;
- fhp->fh_pre_saved = 1;
+ fhp->fh_pre_saved = true;
}
}
@@ -265,9 +288,9 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
}
inode = d_inode(dentry);
- mutex_lock_nested(&inode->i_mutex, subclass);
+ inode_lock_nested(inode, subclass);
fill_pre_wcc(fhp);
- fhp->fh_locked = 1;
+ fhp->fh_locked = true;
}
static inline void
@@ -284,8 +307,8 @@ fh_unlock(struct svc_fh *fhp)
{
if (fhp->fh_locked) {
fill_post_wcc(fhp);
- mutex_unlock(&d_inode(fhp->fh_dentry)->i_mutex);
- fhp->fh_locked = 0;
+ inode_unlock(d_inode(fhp->fh_dentry));
+ fhp->fh_locked = false;
}
}
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index ad4e2377dd63..45007acaf364 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -14,9 +14,13 @@
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/svcsock.h>
+#include <linux/sunrpc/svc_xprt.h>
#include <linux/lockd/bind.h>
#include <linux/nfsacl.h>
#include <linux/seq_file.h>
+#include <linux/inetdevice.h>
+#include <net/addrconf.h>
+#include <net/ipv6.h>
#include <net/net_namespace.h>
#include "nfsd.h"
#include "cache.h"
@@ -306,22 +310,81 @@ static void nfsd_shutdown_net(struct net *net)
nfsd_shutdown_generic();
}
+static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event,
+ void *ptr)
+{
+ struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
+ struct net_device *dev = ifa->ifa_dev->dev;
+ struct net *net = dev_net(dev);
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct sockaddr_in sin;
+
+ if (event != NETDEV_DOWN)
+ goto out;
+
+ if (nn->nfsd_serv) {
+ dprintk("nfsd_inetaddr_event: removed %pI4\n", &ifa->ifa_local);
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = ifa->ifa_local;
+ svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin);
+ }
+
+out:
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block nfsd_inetaddr_notifier = {
+ .notifier_call = nfsd_inetaddr_event,
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int nfsd_inet6addr_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
+ struct net_device *dev = ifa->idev->dev;
+ struct net *net = dev_net(dev);
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct sockaddr_in6 sin6;
+
+ if (event != NETDEV_DOWN)
+ goto out;
+
+ if (nn->nfsd_serv) {
+ dprintk("nfsd_inet6addr_event: removed %pI6\n", &ifa->addr);
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_addr = ifa->addr;
+ svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6);
+ }
+
+out:
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block nfsd_inet6addr_notifier = {
+ .notifier_call = nfsd_inet6addr_event,
+};
+#endif
+
static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ unregister_inetaddr_notifier(&nfsd_inetaddr_notifier);
+#if IS_ENABLED(CONFIG_IPV6)
+ unregister_inet6addr_notifier(&nfsd_inet6addr_notifier);
+#endif
/*
* write_ports can create the server without actually starting
* any threads--if we get shut down before any threads are
* started, then nfsd_last_thread will be run before any of this
- * other initialization has been done.
+ * other initialization has been done except the rpcb information.
*/
+ svc_rpcb_cleanup(serv, net);
if (!nn->nfsd_net_up)
return;
- nfsd_shutdown_net(net);
-
- svc_rpcb_cleanup(serv, net);
+ nfsd_shutdown_net(net);
printk(KERN_WARNING "nfsd: last server has exited, flushing export "
"cache\n");
nfsd_export_flush(net);
@@ -425,6 +488,10 @@ int nfsd_create_serv(struct net *net)
}
set_max_drc();
+ register_inetaddr_notifier(&nfsd_inetaddr_notifier);
+#if IS_ENABLED(CONFIG_IPV6)
+ register_inet6addr_notifier(&nfsd_inet6addr_notifier);
+#endif
do_gettimeofday(&nn->nfssvc_boot); /* record boot time */
return 0;
}
diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
index d4c4453674c6..7d073b9b1553 100644
--- a/fs/nfsd/pnfs.h
+++ b/fs/nfsd/pnfs.h
@@ -21,6 +21,7 @@ struct nfsd4_layout_ops {
u32 notify_types;
__be32 (*proc_getdeviceinfo)(struct super_block *sb,
+ struct nfs4_client *clp,
struct nfsd4_getdeviceinfo *gdevp);
__be32 (*encode_getdeviceinfo)(struct xdr_stream *xdr,
struct nfsd4_getdeviceinfo *gdevp);
@@ -32,10 +33,17 @@ struct nfsd4_layout_ops {
__be32 (*proc_layoutcommit)(struct inode *inode,
struct nfsd4_layoutcommit *lcp);
+
+ void (*fence_client)(struct nfs4_layout_stateid *ls);
};
extern const struct nfsd4_layout_ops *nfsd4_layout_ops[];
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
extern const struct nfsd4_layout_ops bl_layout_ops;
+#endif
+#ifdef CONFIG_NFSD_SCSILAYOUT
+extern const struct nfsd4_layout_ops scsi_layout_ops;
+#endif
__be32 nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, stateid_t *stateid,
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 583ffc13cae2..c050c53036a6 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -65,7 +65,7 @@ struct nfsd4_callback {
struct nfs4_client *cb_clp;
u32 cb_minorversion;
struct rpc_message cb_msg;
- struct nfsd4_callback_ops *cb_ops;
+ const struct nfsd4_callback_ops *cb_ops;
struct work_struct cb_work;
int cb_seq_status;
int cb_status;
@@ -84,7 +84,7 @@ struct nfsd4_callback_ops {
* fields that are of general use to any stateid.
*/
struct nfs4_stid {
- atomic_t sc_count;
+ atomic_t sc_count;
#define NFS4_OPEN_STID 1
#define NFS4_LOCK_STID 2
#define NFS4_DELEG_STID 4
@@ -94,11 +94,12 @@ struct nfs4_stid {
#define NFS4_REVOKED_DELEG_STID 16
#define NFS4_CLOSED_DELEG_STID 32
#define NFS4_LAYOUT_STID 64
- unsigned char sc_type;
- stateid_t sc_stateid;
- struct nfs4_client *sc_client;
- struct nfs4_file *sc_file;
- void (*sc_free)(struct nfs4_stid *);
+ unsigned char sc_type;
+ stateid_t sc_stateid;
+ spinlock_t sc_lock;
+ struct nfs4_client *sc_client;
+ struct nfs4_file *sc_file;
+ void (*sc_free)(struct nfs4_stid *);
};
/*
@@ -364,15 +365,6 @@ struct nfs4_client_reclaim {
char cr_recdir[HEXDIR_LEN]; /* recover dir */
};
-static inline void
-update_stateid(stateid_t *stateid)
-{
- stateid->si_generation++;
- /* Wraparound recommendation from 3530bis-13 9.1.3.2: */
- if (stateid->si_generation == 0)
- stateid->si_generation = 1;
-}
-
/* A reasonable value for REPLAY_ISIZE was estimated as follows:
* The OPEN response, typically the largest, requires
* 4(status) + 8(stateid) + 20(changeinfo) + 4(rflags) + 8(verifier) +
@@ -534,15 +526,16 @@ struct nfs4_file {
* Better suggestions welcome.
*/
struct nfs4_ol_stateid {
- struct nfs4_stid st_stid; /* must be first field */
- struct list_head st_perfile;
- struct list_head st_perstateowner;
- struct list_head st_locks;
- struct nfs4_stateowner * st_stateowner;
- struct nfs4_clnt_odstate * st_clnt_odstate;
- unsigned char st_access_bmap;
- unsigned char st_deny_bmap;
- struct nfs4_ol_stateid * st_openstp;
+ struct nfs4_stid st_stid;
+ struct list_head st_perfile;
+ struct list_head st_perstateowner;
+ struct list_head st_locks;
+ struct nfs4_stateowner *st_stateowner;
+ struct nfs4_clnt_odstate *st_clnt_odstate;
+ unsigned char st_access_bmap;
+ unsigned char st_deny_bmap;
+ struct nfs4_ol_stateid *st_openstp;
+ struct rw_semaphore st_rwsem;
};
static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s)
@@ -561,6 +554,7 @@ struct nfs4_layout_stateid {
struct nfsd4_callback ls_recall;
stateid_t ls_recall_sid;
bool ls_recalled;
+ struct mutex ls_mutex;
};
static inline struct nfs4_layout_stateid *layoutstateid(struct nfs4_stid *s)
@@ -584,8 +578,8 @@ struct nfsd4_compound_state;
struct nfsd_net;
extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
- struct nfsd4_compound_state *cstate, stateid_t *stateid,
- int flags, struct file **filp, bool *tmp_file);
+ struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
+ stateid_t *stateid, int flags, struct file **filp, bool *tmp_file);
__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
stateid_t *stateid, unsigned char typemask,
struct nfs4_stid **s, struct nfsd_net *nn);
@@ -593,6 +587,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
struct kmem_cache *slab);
void nfs4_unhash_stid(struct nfs4_stid *s);
void nfs4_put_stid(struct nfs4_stid *s);
+void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid);
void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *);
extern void nfs4_release_reclaim(struct nfsd_net *);
extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
@@ -604,7 +599,7 @@ extern void nfsd4_probe_callback(struct nfs4_client *clp);
extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
- struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op);
+ const struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op);
extern void nfsd4_run_cb(struct nfsd4_callback *cb);
extern int nfsd4_create_callback_queue(void);
extern void nfsd4_destroy_callback_queue(void);
diff --git a/fs/nfsd/trace.c b/fs/nfsd/trace.c
index 82f89070594c..90967466a1e5 100644
--- a/fs/nfsd/trace.c
+++ b/fs/nfsd/trace.c
@@ -1,5 +1,3 @@
-#include "state.h"
-
#define CREATE_TRACE_POINTS
#include "trace.h"
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index c668520c344b..3287041905da 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -8,6 +8,49 @@
#define _NFSD_TRACE_H
#include <linux/tracepoint.h>
+#include "nfsfh.h"
+
+DECLARE_EVENT_CLASS(nfsd_io_class,
+ TP_PROTO(struct svc_rqst *rqstp,
+ struct svc_fh *fhp,
+ loff_t offset,
+ int len),
+ TP_ARGS(rqstp, fhp, offset, len),
+ TP_STRUCT__entry(
+ __field(__be32, xid)
+ __field_struct(struct knfsd_fh, fh)
+ __field(loff_t, offset)
+ __field(int, len)
+ ),
+ TP_fast_assign(
+ __entry->xid = rqstp->rq_xid,
+ fh_copy_shallow(&__entry->fh, &fhp->fh_handle);
+ __entry->offset = offset;
+ __entry->len = len;
+ ),
+ TP_printk("xid=0x%x fh=0x%x offset=%lld len=%d",
+ __be32_to_cpu(__entry->xid), knfsd_fh_hash(&__entry->fh),
+ __entry->offset, __entry->len)
+)
+
+#define DEFINE_NFSD_IO_EVENT(name) \
+DEFINE_EVENT(nfsd_io_class, name, \
+ TP_PROTO(struct svc_rqst *rqstp, \
+ struct svc_fh *fhp, \
+ loff_t offset, \
+ int len), \
+ TP_ARGS(rqstp, fhp, offset, len))
+
+DEFINE_NFSD_IO_EVENT(read_start);
+DEFINE_NFSD_IO_EVENT(read_opened);
+DEFINE_NFSD_IO_EVENT(read_io_done);
+DEFINE_NFSD_IO_EVENT(read_done);
+DEFINE_NFSD_IO_EVENT(write_start);
+DEFINE_NFSD_IO_EVENT(write_opened);
+DEFINE_NFSD_IO_EVENT(write_io_done);
+DEFINE_NFSD_IO_EVENT(write_done);
+
+#include "state.h"
DECLARE_EVENT_CLASS(nfsd_stateid_class,
TP_PROTO(stateid_t *stp),
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 45c04979e7b3..d40010e4f1a9 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -36,12 +36,14 @@
#endif /* CONFIG_NFSD_V3 */
#ifdef CONFIG_NFSD_V4
+#include "../internal.h"
#include "acl.h"
#include "idmap.h"
#endif /* CONFIG_NFSD_V4 */
#include "nfsd.h"
#include "vfs.h"
+#include "trace.h"
#define NFSDDBG_FACILITY NFSDDBG_FILEOP
@@ -217,10 +219,16 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
host_err = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto out_nfserr;
- /*
- * check if we have crossed a mount point ...
- */
if (nfsd_mountpoint(dentry, exp)) {
+ /*
+ * We don't need the i_mutex after all. It's
+ * still possible we could open this (regular
+ * files can be mountpoints too), but the
+ * i_mutex is just there to prevent renames of
+ * something that we might be about to delegate,
+ * and a mountpoint won't be renamed:
+ */
+ fh_unlock(fhp);
if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
dput(dentry);
goto out_nfserr;
@@ -485,9 +493,9 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
dentry = fhp->fh_dentry;
- mutex_lock(&d_inode(dentry)->i_mutex);
+ inode_lock(d_inode(dentry));
host_error = security_inode_setsecctx(dentry, label->data, label->len);
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
return nfserrno(host_error);
}
#else
@@ -498,6 +506,13 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
#endif
+__be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
+ u64 dst_pos, u64 count)
+{
+ return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos,
+ count));
+}
+
__be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset, loff_t len,
int flags)
@@ -855,7 +870,7 @@ __be32 nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen,
oldfs = get_fs();
set_fs(KERNEL_DS);
- host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset);
+ host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset, 0);
set_fs(oldfs);
return nfsd_finish_read(file, count, host_err);
}
@@ -942,7 +957,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
/* Write the data. */
oldfs = get_fs(); set_fs(KERNEL_DS);
- host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos);
+ host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos, 0);
set_fs(oldfs);
if (host_err < 0)
goto out_nfserr;
@@ -983,16 +998,23 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct raparms *ra;
__be32 err;
+ trace_read_start(rqstp, fhp, offset, vlen);
err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
if (err)
return err;
ra = nfsd_init_raparms(file);
+
+ trace_read_opened(rqstp, fhp, offset, vlen);
err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count);
+ trace_read_io_done(rqstp, fhp, offset, vlen);
+
if (ra)
nfsd_put_raparams(file, ra);
fput(file);
+ trace_read_done(rqstp, fhp, offset, vlen);
+
return err;
}
@@ -1008,24 +1030,31 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
{
__be32 err = 0;
+ trace_write_start(rqstp, fhp, offset, vlen);
+
if (file) {
err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE);
if (err)
goto out;
+ trace_write_opened(rqstp, fhp, offset, vlen);
err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt,
stablep);
+ trace_write_io_done(rqstp, fhp, offset, vlen);
} else {
err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
if (err)
goto out;
+ trace_write_opened(rqstp, fhp, offset, vlen);
if (cnt)
err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen,
cnt, stablep);
+ trace_write_io_done(rqstp, fhp, offset, vlen);
fput(file);
}
out:
+ trace_write_done(rqstp, fhp, offset, vlen);
return err;
}
@@ -1631,7 +1660,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
/* cannot use fh_lock as we need deadlock protective ordering
* so do it by hand */
trap = lock_rename(tdentry, fdentry);
- ffhp->fh_locked = tfhp->fh_locked = 1;
+ ffhp->fh_locked = tfhp->fh_locked = true;
fill_pre_wcc(ffhp);
fill_pre_wcc(tfhp);
@@ -1681,7 +1710,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
fill_post_wcc(ffhp);
fill_post_wcc(tfhp);
unlock_rename(tdentry, fdentry);
- ffhp->fh_locked = tfhp->fh_locked = 0;
+ ffhp->fh_locked = tfhp->fh_locked = false;
fh_drop_write(ffhp);
out:
@@ -1809,7 +1838,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
offset = *offsetp;
while (1) {
- struct inode *dir_inode = file_inode(file);
unsigned int reclen;
cdp->err = nfserr_eof; /* will be cleared on successful read */
@@ -1828,15 +1856,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
if (!size)
break;
- /*
- * Various filldir functions may end up calling back into
- * lookup_one_len() and the file system's ->lookup() method.
- * These expect i_mutex to be held, as it would within readdir.
- */
- host_err = mutex_lock_killable(&dir_inode->i_mutex);
- if (host_err)
- break;
-
de = (struct buffered_dirent *)buf.dirent;
while (size > 0) {
offset = de->offset;
@@ -1853,7 +1872,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
size -= reclen;
de = (struct buffered_dirent *)((char *)de + reclen);
}
- mutex_unlock(&dir_inode->i_mutex);
if (size > 0) /* We bailed out early */
break;
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index fee2451ae248..2d573ec057f8 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -56,6 +56,8 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *,
struct xdr_netobj *);
__be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *,
struct file *, loff_t, loff_t, int);
+__be32 nfsd4_clone_file_range(struct file *, u64, struct file *,
+ u64, u64);
#endif /* CONFIG_NFSD_V4 */
__be32 nfsd_create(struct svc_rqst *, struct svc_fh *,
char *name, int len, struct iattr *attrs,
@@ -112,14 +114,14 @@ static inline int fh_want_write(struct svc_fh *fh)
int ret = mnt_want_write(fh->fh_export->ex_path.mnt);
if (!ret)
- fh->fh_want_write = 1;
+ fh->fh_want_write = true;
return ret;
}
static inline void fh_drop_write(struct svc_fh *fh)
{
if (fh->fh_want_write) {
- fh->fh_want_write = 0;
+ fh->fh_want_write = false;
mnt_drop_write(fh->fh_export->ex_path.mnt);
}
}
@@ -137,4 +139,23 @@ static inline int nfsd_create_is_exclusive(int createmode)
|| createmode == NFS4_CREATE_EXCLUSIVE4_1;
}
+static inline bool nfsd_eof_on_read(long requested, long read,
+ loff_t offset, loff_t size)
+{
+ /* We assume a short read means eof: */
+ if (requested > read)
+ return true;
+ /*
+ * A non-short read might also reach end of file. The spec
+ * still requires us to set eof in that case.
+ *
+ * Further operations may have modified the file size since
+ * the read, so the following check is not atomic with the read.
+ * We've only seen that cause a problem for a client in the case
+ * where the read returned a count of 0 without setting eof.
+ * That case was fixed by the addition of the above check.
+ */
+ return (offset + read >= size);
+}
+
#endif /* LINUX_NFSD_VFS_H */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 9f991007a578..d9554813e58a 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -491,6 +491,15 @@ struct nfsd4_fallocate {
u64 falloc_length;
};
+struct nfsd4_clone {
+ /* request */
+ stateid_t cl_src_stateid;
+ stateid_t cl_dst_stateid;
+ u64 cl_src_pos;
+ u64 cl_dst_pos;
+ u64 cl_count;
+};
+
struct nfsd4_seek {
/* request */
stateid_t seek_stateid;
@@ -555,6 +564,7 @@ struct nfsd4_op {
/* NFSv4.2 */
struct nfsd4_fallocate allocate;
struct nfsd4_fallocate deallocate;
+ struct nfsd4_clone clone;
struct nfsd4_seek seek;
} u;
struct nfs4_replay * replay;
@@ -632,7 +642,7 @@ static inline void
set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
{
BUG_ON(!fhp->fh_pre_saved);
- cinfo->atomic = fhp->fh_post_saved;
+ cinfo->atomic = (u32)fhp->fh_post_saved;
cinfo->change_supported = IS_I_VERSION(d_inode(fhp->fh_dentry));
cinfo->before_change = fhp->fh_pre_change;