summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/nfs-rdma.txt103
-rw-r--r--fs/lockd/svc.c33
-rw-r--r--fs/lockd/svc4proc.c7
-rw-r--r--fs/lockd/svclock.c33
-rw-r--r--fs/lockd/svcproc.c7
-rw-r--r--fs/lockd/svcsubs.c32
-rw-r--r--fs/nfsd/lockd.c2
-rw-r--r--fs/nfsd/nfs2acl.c7
-rw-r--r--fs/nfsd/nfs3acl.c5
-rw-r--r--fs/nfsd/nfs3proc.c8
-rw-r--r--fs/nfsd/nfs4proc.c76
-rw-r--r--fs/nfsd/nfs4state.c49
-rw-r--r--fs/nfsd/nfs4xdr.c392
-rw-r--r--fs/nfsd/nfsctl.c118
-rw-r--r--fs/nfsd/nfsfh.c31
-rw-r--r--fs/nfsd/nfsproc.c9
-rw-r--r--fs/nfsd/nfssvc.c148
-rw-r--r--fs/nfsd/vfs.c140
-rw-r--r--include/linux/fs.h4
-rw-r--r--include/linux/lockd/lockd.h8
-rw-r--r--include/linux/nfs4.h3
-rw-r--r--include/linux/nfsd/nfsd.h27
-rw-r--r--include/linux/nfsd/state.h2
-rw-r--r--include/linux/sunrpc/gss_krb5.h3
-rw-r--r--include/linux/sunrpc/svc.h7
-rw-r--r--include/linux/sunrpc/svc_rdma.h36
-rw-r--r--net/sunrpc/auth_gss/Makefile4
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c10
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seal.c26
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_unseal.c16
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c72
-rw-r--r--net/sunrpc/svc.c110
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c35
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c84
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c166
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c195
36 files changed, 1042 insertions, 966 deletions
diff --git a/Documentation/filesystems/nfs-rdma.txt b/Documentation/filesystems/nfs-rdma.txt
index d0ec45ae4e7d..44bd766f2e5d 100644
--- a/Documentation/filesystems/nfs-rdma.txt
+++ b/Documentation/filesystems/nfs-rdma.txt
@@ -5,7 +5,7 @@
################################################################################
Author: NetApp and Open Grid Computing
- Date: April 15, 2008
+ Date: May 29, 2008
Table of Contents
~~~~~~~~~~~~~~~~~
@@ -60,16 +60,18 @@ Installation
The procedures described in this document have been tested with
distributions from Red Hat's Fedora Project (http://fedora.redhat.com/).
- - Install nfs-utils-1.1.1 or greater on the client
+ - Install nfs-utils-1.1.2 or greater on the client
- An NFS/RDMA mount point can only be obtained by using the mount.nfs
- command in nfs-utils-1.1.1 or greater. To see which version of mount.nfs
- you are using, type:
+ An NFS/RDMA mount point can be obtained by using the mount.nfs command in
+ nfs-utils-1.1.2 or greater (nfs-utils-1.1.1 was the first nfs-utils
+ version with support for NFS/RDMA mounts, but for various reasons we
+ recommend using nfs-utils-1.1.2 or greater). To see which version of
+ mount.nfs you are using, type:
- > /sbin/mount.nfs -V
+ $ /sbin/mount.nfs -V
- If the version is less than 1.1.1 or the command does not exist,
- then you will need to install the latest version of nfs-utils.
+ If the version is less than 1.1.2 or the command does not exist,
+ you should install the latest version of nfs-utils.
Download the latest package from:
@@ -77,22 +79,33 @@ Installation
Uncompress the package and follow the installation instructions.
- If you will not be using GSS and NFSv4, the installation process
- can be simplified by disabling these features when running configure:
+ If you will not need the idmapper and gssd executables (you do not need
+ these to create an NFS/RDMA enabled mount command), the installation
+ process can be simplified by disabling these features when running
+ configure:
- > ./configure --disable-gss --disable-nfsv4
+ $ ./configure --disable-gss --disable-nfsv4
- For more information on this see the package's README and INSTALL files.
+ To build nfs-utils you will need the tcp_wrappers package installed. For
+ more information on this see the package's README and INSTALL files.
After building the nfs-utils package, there will be a mount.nfs binary in
the utils/mount directory. This binary can be used to initiate NFS v2, v3,
- or v4 mounts. To initiate a v4 mount, the binary must be called mount.nfs4.
- The standard technique is to create a symlink called mount.nfs4 to mount.nfs.
+ or v4 mounts. To initiate a v4 mount, the binary must be called
+ mount.nfs4. The standard technique is to create a symlink called
+ mount.nfs4 to mount.nfs.
- NOTE: mount.nfs and therefore nfs-utils-1.1.1 or greater is only needed
+ This mount.nfs binary should be installed at /sbin/mount.nfs as follows:
+
+ $ sudo cp utils/mount/mount.nfs /sbin/mount.nfs
+
+ In this location, mount.nfs will be invoked automatically for NFS mounts
+ by the system mount commmand.
+
+ NOTE: mount.nfs and therefore nfs-utils-1.1.2 or greater is only needed
on the NFS client machine. You do not need this specific version of
nfs-utils on the server. Furthermore, only the mount.nfs command from
- nfs-utils-1.1.1 is needed on the client.
+ nfs-utils-1.1.2 is needed on the client.
- Install a Linux kernel with NFS/RDMA
@@ -156,8 +169,8 @@ Check RDMA and NFS Setup
this time. For example, if you are using a Mellanox Tavor/Sinai/Arbel
card:
- > modprobe ib_mthca
- > modprobe ib_ipoib
+ $ modprobe ib_mthca
+ $ modprobe ib_ipoib
If you are using InfiniBand, make sure there is a Subnet Manager (SM)
running on the network. If your IB switch has an embedded SM, you can
@@ -166,7 +179,7 @@ Check RDMA and NFS Setup
If an SM is running on your network, you should see the following:
- > cat /sys/class/infiniband/driverX/ports/1/state
+ $ cat /sys/class/infiniband/driverX/ports/1/state
4: ACTIVE
where driverX is mthca0, ipath5, ehca3, etc.
@@ -174,10 +187,10 @@ Check RDMA and NFS Setup
To further test the InfiniBand software stack, use IPoIB (this
assumes you have two IB hosts named host1 and host2):
- host1> ifconfig ib0 a.b.c.x
- host2> ifconfig ib0 a.b.c.y
- host1> ping a.b.c.y
- host2> ping a.b.c.x
+ host1$ ifconfig ib0 a.b.c.x
+ host2$ ifconfig ib0 a.b.c.y
+ host1$ ping a.b.c.y
+ host2$ ping a.b.c.x
For other device types, follow the appropriate procedures.
@@ -202,11 +215,11 @@ NFS/RDMA Setup
/vol0 192.168.0.47(fsid=0,rw,async,insecure,no_root_squash)
/vol0 192.168.0.0/255.255.255.0(fsid=0,rw,async,insecure,no_root_squash)
- The IP address(es) is(are) the client's IPoIB address for an InfiniBand HCA or the
- cleint's iWARP address(es) for an RNIC.
+ The IP address(es) is(are) the client's IPoIB address for an InfiniBand
+ HCA or the cleint's iWARP address(es) for an RNIC.
- NOTE: The "insecure" option must be used because the NFS/RDMA client does not
- use a reserved port.
+ NOTE: The "insecure" option must be used because the NFS/RDMA client does
+ not use a reserved port.
Each time a machine boots:
@@ -214,43 +227,45 @@ NFS/RDMA Setup
For InfiniBand using a Mellanox adapter:
- > modprobe ib_mthca
- > modprobe ib_ipoib
- > ifconfig ib0 a.b.c.d
+ $ modprobe ib_mthca
+ $ modprobe ib_ipoib
+ $ ifconfig ib0 a.b.c.d
NOTE: use unique addresses for the client and server
- Start the NFS server
- If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in kernel config),
- load the RDMA transport module:
+ If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in
+ kernel config), load the RDMA transport module:
- > modprobe svcrdma
+ $ modprobe svcrdma
- Regardless of how the server was built (module or built-in), start the server:
+ Regardless of how the server was built (module or built-in), start the
+ server:
- > /etc/init.d/nfs start
+ $ /etc/init.d/nfs start
or
- > service nfs start
+ $ service nfs start
Instruct the server to listen on the RDMA transport:
- > echo rdma 2050 > /proc/fs/nfsd/portlist
+ $ echo rdma 2050 > /proc/fs/nfsd/portlist
- On the client system
- If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in kernel config),
- load the RDMA client module:
+ If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in
+ kernel config), load the RDMA client module:
- > modprobe xprtrdma.ko
+ $ modprobe xprtrdma.ko
- Regardless of how the client was built (module or built-in), issue the mount.nfs command:
+ Regardless of how the client was built (module or built-in), use this
+ command to mount the NFS/RDMA server:
- > /path/to/your/mount.nfs <IPoIB-server-name-or-address>:/<export> /mnt -i -o rdma,port=2050
+ $ mount -o rdma,port=2050 <IPoIB-server-name-or-address>:/<export> /mnt
- To verify that the mount is using RDMA, run "cat /proc/mounts" and check the
- "proto" field for the given mount.
+ To verify that the mount is using RDMA, run "cat /proc/mounts" and check
+ the "proto" field for the given mount.
Congratulations! You're using NFS/RDMA!
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 2169af4d5455..5bd9bf0fa9df 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -50,7 +50,7 @@ EXPORT_SYMBOL(nlmsvc_ops);
static DEFINE_MUTEX(nlmsvc_mutex);
static unsigned int nlmsvc_users;
static struct task_struct *nlmsvc_task;
-static struct svc_serv *nlmsvc_serv;
+static struct svc_rqst *nlmsvc_rqst;
int nlmsvc_grace_period;
unsigned long nlmsvc_timeout;
@@ -194,20 +194,11 @@ lockd(void *vrqstp)
svc_process(rqstp);
}
-
flush_signals(current);
if (nlmsvc_ops)
nlmsvc_invalidate_all();
nlm_shutdown_hosts();
-
unlock_kernel();
-
- nlmsvc_task = NULL;
- nlmsvc_serv = NULL;
-
- /* Exit the RPC thread */
- svc_exit_thread(rqstp);
-
return 0;
}
@@ -254,16 +245,15 @@ int
lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
{
struct svc_serv *serv;
- struct svc_rqst *rqstp;
int error = 0;
mutex_lock(&nlmsvc_mutex);
/*
* Check whether we're already up and running.
*/
- if (nlmsvc_serv) {
+ if (nlmsvc_rqst) {
if (proto)
- error = make_socks(nlmsvc_serv, proto);
+ error = make_socks(nlmsvc_rqst->rq_server, proto);
goto out;
}
@@ -288,9 +278,10 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
/*
* Create the kernel thread and wait for it to start.
*/
- rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]);
- if (IS_ERR(rqstp)) {
- error = PTR_ERR(rqstp);
+ nlmsvc_rqst = svc_prepare_thread(serv, &serv->sv_pools[0]);
+ if (IS_ERR(nlmsvc_rqst)) {
+ error = PTR_ERR(nlmsvc_rqst);
+ nlmsvc_rqst = NULL;
printk(KERN_WARNING
"lockd_up: svc_rqst allocation failed, error=%d\n",
error);
@@ -298,16 +289,15 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
}
svc_sock_update_bufs(serv);
- nlmsvc_serv = rqstp->rq_server;
- nlmsvc_task = kthread_run(lockd, rqstp, serv->sv_name);
+ nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, serv->sv_name);
if (IS_ERR(nlmsvc_task)) {
error = PTR_ERR(nlmsvc_task);
+ svc_exit_thread(nlmsvc_rqst);
nlmsvc_task = NULL;
- nlmsvc_serv = NULL;
+ nlmsvc_rqst = NULL;
printk(KERN_WARNING
"lockd_up: kthread_run failed, error=%d\n", error);
- svc_exit_thread(rqstp);
goto destroy_and_out;
}
@@ -346,6 +336,9 @@ lockd_down(void)
BUG();
}
kthread_stop(nlmsvc_task);
+ svc_exit_thread(nlmsvc_rqst);
+ nlmsvc_task = NULL;
+ nlmsvc_rqst = NULL;
out:
mutex_unlock(&nlmsvc_mutex);
}
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 2e27176ff42f..399444639337 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -58,8 +58,7 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
return 0;
no_locks:
- if (host)
- nlm_release_host(host);
+ nlm_release_host(host);
if (error)
return error;
return nlm_lck_denied_nolocks;
@@ -100,7 +99,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
/* Now check for conflicting locks */
- resp->status = nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie);
+ resp->status = nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie);
if (resp->status == nlm_drop_reply)
rc = rpc_drop_reply;
else
@@ -146,7 +145,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
#endif
/* Now try to lock the file */
- resp->status = nlmsvc_lock(rqstp, file, &argp->lock,
+ resp->status = nlmsvc_lock(rqstp, file, host, &argp->lock,
argp->block, &argp->cookie);
if (resp->status == nlm_drop_reply)
rc = rpc_drop_reply;
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 56a08ab9a4cb..821b9acdfb66 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -129,9 +129,9 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock)
static inline int nlm_cookie_match(struct nlm_cookie *a, struct nlm_cookie *b)
{
- if(a->len != b->len)
+ if (a->len != b->len)
return 0;
- if(memcmp(a->data,b->data,a->len))
+ if (memcmp(a->data, b->data, a->len))
return 0;
return 1;
}
@@ -180,6 +180,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host,
struct nlm_block *block;
struct nlm_rqst *call = NULL;
+ nlm_get_host(host);
call = nlm_alloc_call(host);
if (call == NULL)
return NULL;
@@ -358,10 +359,10 @@ nlmsvc_defer_lock_rqst(struct svc_rqst *rqstp, struct nlm_block *block)
*/
__be32
nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
- struct nlm_lock *lock, int wait, struct nlm_cookie *cookie)
+ struct nlm_host *host, struct nlm_lock *lock, int wait,
+ struct nlm_cookie *cookie)
{
struct nlm_block *block = NULL;
- struct nlm_host *host;
int error;
__be32 ret;
@@ -373,11 +374,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
(long long)lock->fl.fl_end,
wait);
- /* Create host handle for callback */
- host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len);
- if (host == NULL)
- return nlm_lck_denied_nolocks;
-
/* Lock file against concurrent access */
mutex_lock(&file->f_mutex);
/* Get existing block (in case client is busy-waiting)
@@ -385,8 +381,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
*/
block = nlmsvc_lookup_block(file, lock);
if (block == NULL) {
- block = nlmsvc_create_block(rqstp, nlm_get_host(host), file,
- lock, cookie);
+ block = nlmsvc_create_block(rqstp, host, file, lock, cookie);
ret = nlm_lck_denied_nolocks;
if (block == NULL)
goto out;
@@ -417,7 +412,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
lock->fl.fl_flags &= ~FL_SLEEP;
dprintk("lockd: vfs_lock_file returned %d\n", error);
- switch(error) {
+ switch (error) {
case 0:
ret = nlm_granted;
goto out;
@@ -450,7 +445,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
out:
mutex_unlock(&file->f_mutex);
nlmsvc_release_block(block);
- nlm_release_host(host);
dprintk("lockd: nlmsvc_lock returned %u\n", ret);
return ret;
}
@@ -460,8 +454,8 @@ out:
*/
__be32
nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
- struct nlm_lock *lock, struct nlm_lock *conflock,
- struct nlm_cookie *cookie)
+ struct nlm_host *host, struct nlm_lock *lock,
+ struct nlm_lock *conflock, struct nlm_cookie *cookie)
{
struct nlm_block *block = NULL;
int error;
@@ -479,16 +473,9 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
if (block == NULL) {
struct file_lock *conf = kzalloc(sizeof(*conf), GFP_KERNEL);
- struct nlm_host *host;
if (conf == NULL)
return nlm_granted;
- /* Create host handle for callback */
- host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len);
- if (host == NULL) {
- kfree(conf);
- return nlm_lck_denied_nolocks;
- }
block = nlmsvc_create_block(rqstp, host, file, lock, cookie);
if (block == NULL) {
kfree(conf);
@@ -897,7 +884,7 @@ nlmsvc_retry_blocked(void)
if (block->b_when == NLM_NEVER)
break;
- if (time_after(block->b_when,jiffies)) {
+ if (time_after(block->b_when, jiffies)) {
timeout = block->b_when - jiffies;
break;
}
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index ce6952b50a75..76019d2ff72d 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -87,8 +87,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
return 0;
no_locks:
- if (host)
- nlm_release_host(host);
+ nlm_release_host(host);
if (error)
return error;
return nlm_lck_denied_nolocks;
@@ -129,7 +128,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
/* Now check for conflicting locks */
- resp->status = cast_status(nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie));
+ resp->status = cast_status(nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie));
if (resp->status == nlm_drop_reply)
rc = rpc_drop_reply;
else
@@ -176,7 +175,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
#endif
/* Now try to lock the file */
- resp->status = cast_status(nlmsvc_lock(rqstp, file, &argp->lock,
+ resp->status = cast_status(nlmsvc_lock(rqstp, file, host, &argp->lock,
argp->block, &argp->cookie));
if (resp->status == nlm_drop_reply)
rc = rpc_drop_reply;
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index d1c48b539df8..198b4e55b373 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -373,13 +373,16 @@ nlmsvc_free_host_resources(struct nlm_host *host)
}
}
-/*
- * Remove all locks held for clients
+/**
+ * nlmsvc_invalidate_all - remove all locks held for clients
+ *
+ * Release all locks held by NFS clients.
+ *
*/
void
nlmsvc_invalidate_all(void)
{
- /* Release all locks held by NFS clients.
+ /*
* Previously, the code would call
* nlmsvc_free_host_resources for each client in
* turn, which is about as inefficient as it gets.
@@ -396,6 +399,12 @@ nlmsvc_match_sb(void *datap, struct nlm_file *file)
return sb == file->f_file->f_path.mnt->mnt_sb;
}
+/**
+ * nlmsvc_unlock_all_by_sb - release locks held on this file system
+ * @sb: super block
+ *
+ * Release all locks held by clients accessing this file system.
+ */
int
nlmsvc_unlock_all_by_sb(struct super_block *sb)
{
@@ -409,17 +418,22 @@ EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_sb);
static int
nlmsvc_match_ip(void *datap, struct nlm_host *host)
{
- __be32 *server_addr = datap;
-
- return host->h_saddr.sin_addr.s_addr == *server_addr;
+ return nlm_cmp_addr(&host->h_saddr, datap);
}
+/**
+ * nlmsvc_unlock_all_by_ip - release local locks by IP address
+ * @server_addr: server's IP address as seen by clients
+ *
+ * Release all locks held by clients accessing this host
+ * via the passed in IP address.
+ */
int
-nlmsvc_unlock_all_by_ip(__be32 server_addr)
+nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr)
{
int ret;
- ret = nlm_traverse_files(&server_addr, nlmsvc_match_ip, NULL);
- return ret ? -EIO : 0;
+ ret = nlm_traverse_files(server_addr, nlmsvc_match_ip, NULL);
+ return ret ? -EIO : 0;
}
EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_ip);
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 9e4a568a5013..6b6225ac4926 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -35,7 +35,7 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
fh.fh_export = NULL;
exp_readlock();
- nfserr = nfsd_open(rqstp, &fh, S_IFREG, MAY_LOCK, filp);
+ nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp);
fh_put(&fh);
rqstp->rq_client = NULL;
exp_readunlock();
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 1c3b7654e966..4e3219e84116 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -40,7 +40,8 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,
dprintk("nfsd: GETACL(2acl) %s\n", SVCFH_fmt(&argp->fh));
fh = fh_copy(&resp->fh, &argp->fh);
- if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP)))
+ nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
+ if (nfserr)
RETURN_STATUS(nfserr);
if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
@@ -107,7 +108,7 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
dprintk("nfsd: SETACL(2acl) %s\n", SVCFH_fmt(&argp->fh));
fh = fh_copy(&resp->fh, &argp->fh);
- nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_SATTR);
+ nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR);
if (!nfserr) {
nfserr = nfserrno( nfsd_set_posix_acl(
@@ -134,7 +135,7 @@ static __be32 nfsacld_proc_getattr(struct svc_rqst * rqstp,
dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh));
fh_copy(&resp->fh, &argp->fh);
- return fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
+ return fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
}
/*
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index b647f2f872dc..9981dbb377a3 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -36,7 +36,8 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,
__be32 nfserr = 0;
fh = fh_copy(&resp->fh, &argp->fh);
- if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP)))
+ nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
+ if (nfserr)
RETURN_STATUS(nfserr);
if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
@@ -101,7 +102,7 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,
__be32 nfserr = 0;
fh = fh_copy(&resp->fh, &argp->fh);
- nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_SATTR);
+ nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR);
if (!nfserr) {
nfserr = nfserrno( nfsd_set_posix_acl(
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index c721a1e6e9dd..4d617ea28cfc 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -63,7 +63,7 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp,
SVCFH_fmt(&argp->fh));
fh_copy(&resp->fh, &argp->fh);
- nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
+ nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
if (nfserr)
RETURN_STATUS(nfserr);
@@ -242,7 +242,7 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
attr = &argp->attrs;
/* Get the directory inode */
- nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, MAY_CREATE);
+ nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_CREATE);
if (nfserr)
RETURN_STATUS(nfserr);
@@ -558,7 +558,7 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp,
resp->f_maxfilesize = ~(u32) 0;
resp->f_properties = NFS3_FSF_DEFAULT;
- nfserr = fh_verify(rqstp, &argp->fh, 0, MAY_NOP);
+ nfserr = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP);
/* Check special features of the file system. May request
* different read/write sizes for file systems known to have
@@ -597,7 +597,7 @@ nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle *argp,
resp->p_case_insensitive = 0;
resp->p_case_preserving = 1;
- nfserr = fh_verify(rqstp, &argp->fh, 0, MAY_NOP);
+ nfserr = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP);
if (nfserr == 0) {
struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index c309c881bd4e..eef1629806f5 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -71,11 +71,11 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
return nfserr_inval;
if (open->op_share_access & NFS4_SHARE_ACCESS_READ)
- accmode |= MAY_READ;
+ accmode |= NFSD_MAY_READ;
if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
- accmode |= (MAY_WRITE | MAY_TRUNC);
+ accmode |= (NFSD_MAY_WRITE | NFSD_MAY_TRUNC);
if (open->op_share_deny & NFS4_SHARE_DENY_WRITE)
- accmode |= MAY_WRITE;
+ accmode |= NFSD_MAY_WRITE;
status = fh_verify(rqstp, current_fh, S_IFREG, accmode);
@@ -126,7 +126,8 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
&resfh.fh_handle.fh_base, resfh.fh_handle.fh_size);
if (!created)
- status = do_open_permission(rqstp, current_fh, open, MAY_NOP);
+ status = do_open_permission(rqstp, current_fh, open,
+ NFSD_MAY_NOP);
out:
fh_put(&resfh);
@@ -157,7 +158,8 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_
open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) &&
(open->op_iattr.ia_size == 0);
- status = do_open_permission(rqstp, current_fh, open, MAY_OWNER_OVERRIDE);
+ status = do_open_permission(rqstp, current_fh, open,
+ NFSD_MAY_OWNER_OVERRIDE);
return status;
}
@@ -186,7 +188,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
cstate->current_fh.fh_handle.fh_size = rp->rp_openfh_len;
memcpy(&cstate->current_fh.fh_handle.fh_base, rp->rp_openfh,
rp->rp_openfh_len);
- status = fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP);
+ status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
if (status)
dprintk("nfsd4_open: replay failed"
" restoring previous filehandle\n");
@@ -285,7 +287,7 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen;
memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval,
putfh->pf_fhlen);
- return fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP);
+ return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
}
static __be32
@@ -363,7 +365,8 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
fh_init(&resfh, NFS4_FHSIZE);
- status = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, MAY_CREATE);
+ status = fh_verify(rqstp, &cstate->current_fh, S_IFDIR,
+ NFSD_MAY_CREATE);
if (status == nfserr_symlink)
status = nfserr_notdir;
if (status)
@@ -445,7 +448,7 @@ nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
{
__be32 status;
- status = fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP);
+ status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
if (status)
return status;
@@ -730,7 +733,7 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
int count;
__be32 status;
- status = fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP);
+ status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
if (status)
return status;
@@ -843,10 +846,13 @@ struct nfsd4_operation {
#define ALLOWED_WITHOUT_FH 1
/* GETATTR and ops not listed as returning NFS4ERR_MOVED: */
#define ALLOWED_ON_ABSENT_FS 2
+ char *op_name;
};
static struct nfsd4_operation nfsd4_ops[];
+static inline char *nfsd4_op_name(unsigned opnum);
+
/*
* COMPOUND call.
*/
@@ -888,7 +894,9 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
while (!status && resp->opcnt < args->opcnt) {
op = &args->ops[resp->opcnt++];
- dprintk("nfsv4 compound op #%d: %d\n", resp->opcnt, op->opnum);
+ dprintk("nfsv4 compound op #%d/%d: %d (%s)\n",
+ resp->opcnt, args->opcnt, op->opnum,
+ nfsd4_op_name(op->opnum));
/*
* The XDR decode routines may have pre-set op->status;
@@ -952,126 +960,170 @@ encode_op:
out:
nfsd4_release_compoundargs(args);
cstate_free(cstate);
+ dprintk("nfsv4 compound returned %d\n", ntohl(status));
return status;
}
static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = {
[OP_ACCESS] = {
.op_func = (nfsd4op_func)nfsd4_access,
+ .op_name = "OP_ACCESS",
},
[OP_CLOSE] = {
.op_func = (nfsd4op_func)nfsd4_close,
+ .op_name = "OP_CLOSE",
},
[OP_COMMIT] = {
.op_func = (nfsd4op_func)nfsd4_commit,
+ .op_name = "OP_COMMIT",
},
[OP_CREATE] = {
.op_func = (nfsd4op_func)nfsd4_create,
+ .op_name = "OP_CREATE",
},
[OP_DELEGRETURN] = {
.op_func = (nfsd4op_func)nfsd4_delegreturn,
+ .op_name = "OP_DELEGRETURN",
},
[OP_GETATTR] = {
.op_func = (nfsd4op_func)nfsd4_getattr,
.op_flags = ALLOWED_ON_ABSENT_FS,
+ .op_name = "OP_GETATTR",
},
[OP_GETFH] = {
.op_func = (nfsd4op_func)nfsd4_getfh,
+ .op_name = "OP_GETFH",
},
[OP_LINK] = {
.op_func = (nfsd4op_func)nfsd4_link,
+ .op_name = "OP_LINK",
},
[OP_LOCK] = {
.op_func = (nfsd4op_func)nfsd4_lock,
+ .op_name = "OP_LOCK",
},
[OP_LOCKT] = {
.op_func = (nfsd4op_func)nfsd4_lockt,
+ .op_name = "OP_LOCKT",
},
[OP_LOCKU] = {
.op_func = (nfsd4op_func)nfsd4_locku,
+ .op_name = "OP_LOCKU",
},
[OP_LOOKUP] = {
.op_func = (nfsd4op_func)nfsd4_lookup,
+ .op_name = "OP_LOOKUP",
},
[OP_LOOKUPP] = {
.op_func = (nfsd4op_func)nfsd4_lookupp,
+ .op_name = "OP_LOOKUPP",
},
[OP_NVERIFY] = {
.op_func = (nfsd4op_func)nfsd4_nverify,
+ .op_name = "OP_NVERIFY",
},
[OP_OPEN] = {
.op_func = (nfsd4op_func)nfsd4_open,
+ .op_name = "OP_OPEN",
},
[OP_OPEN_CONFIRM] = {
.op_func = (nfsd4op_func)nfsd4_open_confirm,
+ .op_name = "OP_OPEN_CONFIRM",
},
[OP_OPEN_DOWNGRADE] = {
.op_func = (nfsd4op_func)nfsd4_open_downgrade,
+ .op_name = "OP_OPEN_DOWNGRADE",
},
[OP_PUTFH] = {
.op_func = (nfsd4op_func)nfsd4_putfh,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+ .op_name = "OP_PUTFH",
},
[OP_PUTPUBFH] = {
- /* unsupported; just for future reference: */
+ /* unsupported, just for future reference: */
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+ .op_name = "OP_PUTPUBFH",
},
[OP_PUTROOTFH] = {
.op_func = (nfsd4op_func)nfsd4_putrootfh,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+ .op_name = "OP_PUTROOTFH",
},
[OP_READ] = {
.op_func = (nfsd4op_func)nfsd4_read,
+ .op_name = "OP_READ",
},
[OP_READDIR] = {
.op_func = (nfsd4op_func)nfsd4_readdir,
+ .op_name = "OP_READDIR",
},
[OP_READLINK] = {
.op_func = (nfsd4op_func)nfsd4_readlink,
+ .op_name = "OP_READLINK",
},
[OP_REMOVE] = {
.op_func = (nfsd4op_func)nfsd4_remove,
+ .op_name = "OP_REMOVE",
},
[OP_RENAME] = {
+ .op_name = "OP_RENAME",
.op_func = (nfsd4op_func)nfsd4_rename,
},
[OP_RENEW] = {
.op_func = (nfsd4op_func)nfsd4_renew,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+ .op_name = "OP_RENEW",
},
[OP_RESTOREFH] = {
.op_func = (nfsd4op_func)nfsd4_restorefh,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+ .op_name = "OP_RESTOREFH",
},
[OP_SAVEFH] = {
.op_func = (nfsd4op_func)nfsd4_savefh,
+ .op_name = "OP_SAVEFH",
},
[OP_SECINFO] = {
.op_func = (nfsd4op_func)nfsd4_secinfo,
+ .op_name = "OP_SECINFO",
},
[OP_SETATTR] = {
.op_func = (nfsd4op_func)nfsd4_setattr,
+ .op_name = "OP_SETATTR",
},
[OP_SETCLIENTID] = {
.op_func = (nfsd4op_func)nfsd4_setclientid,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+ .op_name = "OP_SETCLIENTID",
},
[OP_SETCLIENTID_CONFIRM] = {
.op_func = (nfsd4op_func)nfsd4_setclientid_confirm,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+ .op_name = "OP_SETCLIENTID_CONFIRM",
},
[OP_VERIFY] = {
.op_func = (nfsd4op_func)nfsd4_verify,
+ .op_name = "OP_VERIFY",
},
[OP_WRITE] = {
.op_func = (nfsd4op_func)nfsd4_write,
+ .op_name = "OP_WRITE",
},
[OP_RELEASE_LOCKOWNER] = {
.op_func = (nfsd4op_func)nfsd4_release_lockowner,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+ .op_name = "OP_RELEASE_LOCKOWNER",
},
};
+static inline char *
+nfsd4_op_name(unsigned opnum)
+{
+ if (opnum < ARRAY_SIZE(nfsd4_ops))
+ return nfsd4_ops[opnum].op_name;
+ return "unknown_operation";
+}
+
#define nfs4svc_decode_voidargs NULL
#define nfs4svc_release_void NULL
#define nfsd4_voidres nfsd4_voidargs
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 8799b8708188..1578d7a2667e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1173,6 +1173,24 @@ static inline int deny_valid(u32 x)
return x <= NFS4_SHARE_DENY_BOTH;
}
+/*
+ * We store the NONE, READ, WRITE, and BOTH bits separately in the
+ * st_{access,deny}_bmap field of the stateid, in order to track not
+ * only what share bits are currently in force, but also what
+ * combinations of share bits previous opens have used. This allows us
+ * to enforce the recommendation of rfc 3530 14.2.19 that the server
+ * return an error if the client attempt to downgrade to a combination
+ * of share bits not explicable by closing some of its previous opens.
+ *
+ * XXX: This enforcement is actually incomplete, since we don't keep
+ * track of access/deny bit combinations; so, e.g., we allow:
+ *
+ * OPEN allow read, deny write
+ * OPEN allow both, deny none
+ * DOWNGRADE allow read, deny none
+ *
+ * which we should reject.
+ */
static void
set_access(unsigned int *access, unsigned long bmap) {
int i;
@@ -1570,6 +1588,10 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_sta
int err = get_write_access(inode);
if (err)
return nfserrno(err);
+ err = mnt_want_write(cur_fh->fh_export->ex_path.mnt);
+ if (err)
+ return nfserrno(err);
+ file_take_write(filp);
}
status = nfsd4_truncate(rqstp, cur_fh, open);
if (status) {
@@ -1579,8 +1601,8 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_sta
}
/* remember the open */
filp->f_mode |= open->op_share_access;
- set_bit(open->op_share_access, &stp->st_access_bmap);
- set_bit(open->op_share_deny, &stp->st_deny_bmap);
+ __set_bit(open->op_share_access, &stp->st_access_bmap);
+ __set_bit(open->op_share_deny, &stp->st_deny_bmap);
return nfs_ok;
}
@@ -1722,9 +1744,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
/* Stateid was not found, this is a new OPEN */
int flags = 0;
if (open->op_share_access & NFS4_SHARE_ACCESS_READ)
- flags |= MAY_READ;
+ flags |= NFSD_MAY_READ;
if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
- flags |= MAY_WRITE;
+ flags |= NFSD_MAY_WRITE;
status = nfs4_new_open(rqstp, &stp, dp, current_fh, flags);
if (status)
goto out;
@@ -2610,7 +2632,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return nfserr_inval;
if ((status = fh_verify(rqstp, &cstate->current_fh,
- S_IFREG, MAY_LOCK))) {
+ S_IFREG, NFSD_MAY_LOCK))) {
dprintk("NFSD: nfsd4_lock: permission denied!\n");
return status;
}
@@ -3249,12 +3271,14 @@ nfs4_state_shutdown(void)
nfs4_unlock_state();
}
+/*
+ * user_recovery_dirname is protected by the nfsd_mutex since it's only
+ * accessed when nfsd is starting.
+ */
static void
nfs4_set_recdir(char *recdir)
{
- nfs4_lock_state();
strcpy(user_recovery_dirname, recdir);
- nfs4_unlock_state();
}
/*
@@ -3278,6 +3302,12 @@ nfs4_reset_recoverydir(char *recdir)
return status;
}
+char *
+nfs4_recoverydir(void)
+{
+ return user_recovery_dirname;
+}
+
/*
* Called when leasetime is changed.
*
@@ -3286,11 +3316,12 @@ nfs4_reset_recoverydir(char *recdir)
* we start to register any changes in lease time. If the administrator
* really wants to change the lease time *now*, they can go ahead and bring
* nfsd down and then back up again after changing the lease time.
+ *
+ * user_lease_time is protected by nfsd_mutex since it's only really accessed
+ * when nfsd is starting
*/
void
nfs4_reset_lease(time_t leasetime)
{
- lock_kernel();
user_lease_time = leasetime;
- unlock_kernel();
}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index c513bbdf2d36..14ba4d9b2859 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -986,10 +986,74 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel
}
static __be32
+nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
+{
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p)
+{
+ return nfserr_opnotsupp;
+}
+
+typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *);
+
+static nfsd4_dec nfsd4_dec_ops[] = {
+ [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access,
+ [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close,
+ [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit,
+ [OP_CREATE] = (nfsd4_dec)nfsd4_decode_create,
+ [OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn,
+ [OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr,
+ [OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop,
+ [OP_LINK] = (nfsd4_dec)nfsd4_decode_link,
+ [OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock,
+ [OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt,
+ [OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku,
+ [OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup,
+ [OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop,
+ [OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify,
+ [OP_OPEN] = (nfsd4_dec)nfsd4_decode_open,
+ [OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm,
+ [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade,
+ [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh,
+ [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop,
+ [OP_READ] = (nfsd4_dec)nfsd4_decode_read,
+ [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir,
+ [OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop,
+ [OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove,
+ [OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename,
+ [OP_RENEW] = (nfsd4_dec)nfsd4_decode_renew,
+ [OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop,
+ [OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop,
+ [OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo,
+ [OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr,
+ [OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_setclientid,
+ [OP_SETCLIENTID_CONFIRM] = (nfsd4_dec)nfsd4_decode_setclientid_confirm,
+ [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify,
+ [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write,
+ [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner,
+};
+
+struct nfsd4_minorversion_ops {
+ nfsd4_dec *decoders;
+ int nops;
+};
+
+static struct nfsd4_minorversion_ops nfsd4_minorversion[] = {
+ [0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) },
+};
+
+static __be32
nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
{
DECODE_HEAD;
struct nfsd4_op *op;
+ struct nfsd4_minorversion_ops *ops;
int i;
/*
@@ -1019,6 +1083,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
}
}
+ if (argp->minorversion >= ARRAY_SIZE(nfsd4_minorversion))
+ argp->opcnt = 0;
+
+ ops = &nfsd4_minorversion[argp->minorversion];
for (i = 0; i < argp->opcnt; i++) {
op = &argp->ops[i];
op->replay = NULL;
@@ -1056,120 +1124,11 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
}
op->opnum = ntohl(*argp->p++);
- switch (op->opnum) {
- case 2: /* Reserved operation */
- op->opnum = OP_ILLEGAL;
- if (argp->minorversion == 0)
- op->status = nfserr_op_illegal;
- else
- op->status = nfserr_minor_vers_mismatch;
- break;
- case OP_ACCESS:
- op->status = nfsd4_decode_access(argp, &op->u.access);
- break;
- case OP_CLOSE:
- op->status = nfsd4_decode_close(argp, &op->u.close);
- break;
- case OP_COMMIT:
- op->status = nfsd4_decode_commit(argp, &op->u.commit);
- break;
- case OP_CREATE:
- op->status = nfsd4_decode_create(argp, &op->u.create);
- break;
- case OP_DELEGRETURN:
- op->status = nfsd4_decode_delegreturn(argp, &op->u.delegreturn);
- break;
- case OP_GETATTR:
- op->status = nfsd4_decode_getattr(argp, &op->u.getattr);
- break;
- case OP_GETFH:
- op->status = nfs_ok;
- break;
- case OP_LINK:
- op->status = nfsd4_decode_link(argp, &op->u.link);
- break;
- case OP_LOCK:
- op->status = nfsd4_decode_lock(argp, &op->u.lock);
- break;
- case OP_LOCKT:
- op->status = nfsd4_decode_lockt(argp, &op->u.lockt);
- break;
- case OP_LOCKU:
- op->status = nfsd4_decode_locku(argp, &op->u.locku);
- break;
- case OP_LOOKUP:
- op->status = nfsd4_decode_lookup(argp, &op->u.lookup);
- break;
- case OP_LOOKUPP:
- op->status = nfs_ok;
- break;
- case OP_NVERIFY:
- op->status = nfsd4_decode_verify(argp, &op->u.nverify);
- break;
- case OP_OPEN:
- op->status = nfsd4_decode_open(argp, &op->u.open);
- break;
- case OP_OPEN_CONFIRM:
- op->status = nfsd4_decode_open_confirm(argp, &op->u.open_confirm);
- break;
- case OP_OPEN_DOWNGRADE:
- op->status = nfsd4_decode_open_downgrade(argp, &op->u.open_downgrade);
- break;
- case OP_PUTFH:
- op->status = nfsd4_decode_putfh(argp, &op->u.putfh);
- break;
- case OP_PUTROOTFH:
- op->status = nfs_ok;
- break;
- case OP_READ:
- op->status = nfsd4_decode_read(argp, &op->u.read);
- break;
- case OP_READDIR:
- op->status = nfsd4_decode_readdir(argp, &op->u.readdir);
- break;
- case OP_READLINK:
- op->status = nfs_ok;
- break;
- case OP_REMOVE:
- op->status = nfsd4_decode_remove(argp, &op->u.remove);
- break;
- case OP_RENAME:
- op->status = nfsd4_decode_rename(argp, &op->u.rename);
- break;
- case OP_RESTOREFH:
- op->status = nfs_ok;
- break;
- case OP_RENEW:
- op->status = nfsd4_decode_renew(argp, &op->u.renew);
- break;
- case OP_SAVEFH:
- op->status = nfs_ok;
- break;
- case OP_SECINFO:
- op->status = nfsd4_decode_secinfo(argp, &op->u.secinfo);
- break;
- case OP_SETATTR:
- op->status = nfsd4_decode_setattr(argp, &op->u.setattr);
- break;
- case OP_SETCLIENTID:
- op->status = nfsd4_decode_setclientid(argp, &op->u.setclientid);
- break;
- case OP_SETCLIENTID_CONFIRM:
- op->status = nfsd4_decode_setclientid_confirm(argp, &op->u.setclientid_confirm);
- break;
- case OP_VERIFY:
- op->status = nfsd4_decode_verify(argp, &op->u.verify);
- break;
- case OP_WRITE:
- op->status = nfsd4_decode_write(argp, &op->u.write);
- break;
- case OP_RELEASE_LOCKOWNER:
- op->status = nfsd4_decode_release_lockowner(argp, &op->u.release_lockowner);
- break;
- default:
+ if (op->opnum >= OP_ACCESS && op->opnum < ops->nops)
+ op->status = ops->decoders[op->opnum](argp, &op->u);
+ else {
op->opnum = OP_ILLEGAL;
op->status = nfserr_op_illegal;
- break;
}
if (op->status) {
@@ -1201,11 +1160,11 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
*p++ = htonl((u32)((n) >> 32)); \
*p++ = htonl((u32)(n)); \
} while (0)
-#define WRITEMEM(ptr,nbytes) do { \
+#define WRITEMEM(ptr,nbytes) do { if (nbytes > 0) { \
*(p + XDR_QUADLEN(nbytes) -1) = 0; \
memcpy(p, ptr, nbytes); \
p += XDR_QUADLEN(nbytes); \
-} while (0)
+}} while (0)
#define WRITECINFO(c) do { \
*p++ = htonl(c.atomic); \
*p++ = htonl(c.before_ctime_sec); \
@@ -1991,7 +1950,7 @@ fail:
return -EINVAL;
}
-static void
+static __be32
nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access)
{
ENCODE_HEAD;
@@ -2002,9 +1961,10 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
WRITE32(access->ac_resp_access);
ADJUST_ARGS();
}
+ return nfserr;
}
-static void
+static __be32
nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close)
{
ENCODE_SEQID_OP_HEAD;
@@ -2016,10 +1976,11 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c
ADJUST_ARGS();
}
ENCODE_SEQID_OP_TAIL(close->cl_stateowner);
+ return nfserr;
}
-static void
+static __be32
nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit)
{
ENCODE_HEAD;
@@ -2029,9 +1990,10 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
WRITEMEM(commit->co_verf.data, 8);
ADJUST_ARGS();
}
+ return nfserr;
}
-static void
+static __be32
nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create)
{
ENCODE_HEAD;
@@ -2044,6 +2006,7 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
WRITE32(create->cr_bmval[1]);
ADJUST_ARGS();
}
+ return nfserr;
}
static __be32
@@ -2064,9 +2027,10 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
return nfserr;
}
-static void
-nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh *fhp)
+static __be32
+nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp)
{
+ struct svc_fh *fhp = *fhpp;
unsigned int len;
ENCODE_HEAD;
@@ -2077,6 +2041,7 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh
WRITEMEM(&fhp->fh_handle.fh_base, len);
ADJUST_ARGS();
}
+ return nfserr;
}
/*
@@ -2104,7 +2069,7 @@ nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denie
ADJUST_ARGS();
}
-static void
+static __be32
nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock)
{
ENCODE_SEQID_OP_HEAD;
@@ -2118,16 +2083,18 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo
nfsd4_encode_lock_denied(resp, &lock->lk_denied);
ENCODE_SEQID_OP_TAIL(lock->lk_replay_owner);
+ return nfserr;
}
-static void
+static __be32
nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt)
{
if (nfserr == nfserr_denied)
nfsd4_encode_lock_denied(resp, &lockt->lt_denied);
+ return nfserr;
}
-static void
+static __be32
nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku)
{
ENCODE_SEQID_OP_HEAD;
@@ -2140,10 +2107,11 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l
}
ENCODE_SEQID_OP_TAIL(locku->lu_stateowner);
+ return nfserr;
}
-static void
+static __be32
nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link)
{
ENCODE_HEAD;
@@ -2153,10 +2121,11 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li
WRITECINFO(link->li_cinfo);
ADJUST_ARGS();
}
+ return nfserr;
}
-static void
+static __be32
nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open)
{
ENCODE_SEQID_OP_HEAD;
@@ -2219,9 +2188,10 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op
/* XXX save filehandle here */
out:
ENCODE_SEQID_OP_TAIL(open->op_stateowner);
+ return nfserr;
}
-static void
+static __be32
nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc)
{
ENCODE_SEQID_OP_HEAD;
@@ -2234,9 +2204,10 @@ nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct
}
ENCODE_SEQID_OP_TAIL(oc->oc_stateowner);
+ return nfserr;
}
-static void
+static __be32
nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od)
{
ENCODE_SEQID_OP_HEAD;
@@ -2249,6 +2220,7 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struc
}
ENCODE_SEQID_OP_TAIL(od->od_stateowner);
+ return nfserr;
}
static __be32
@@ -2443,7 +2415,7 @@ err_no_verf:
return nfserr;
}
-static void
+static __be32
nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove)
{
ENCODE_HEAD;
@@ -2453,9 +2425,10 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
WRITECINFO(remove->rm_cinfo);
ADJUST_ARGS();
}
+ return nfserr;
}
-static void
+static __be32
nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename)
{
ENCODE_HEAD;
@@ -2466,9 +2439,10 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
WRITECINFO(rename->rn_tinfo);
ADJUST_ARGS();
}
+ return nfserr;
}
-static void
+static __be32
nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_secinfo *secinfo)
{
@@ -2532,13 +2506,14 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
out:
if (exp)
exp_put(exp);
+ return nfserr;
}
/*
* The SETATTR encode routine is special -- it always encodes a bitmap,
* regardless of the error status.
*/
-static void
+static __be32
nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr)
{
ENCODE_HEAD;
@@ -2555,9 +2530,10 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
WRITE32(setattr->sa_bmval[1]);
}
ADJUST_ARGS();
+ return nfserr;
}
-static void
+static __be32
nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd)
{
ENCODE_HEAD;
@@ -2574,9 +2550,10 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n
WRITE32(0);
ADJUST_ARGS();
}
+ return nfserr;
}
-static void
+static __be32
nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write)
{
ENCODE_HEAD;
@@ -2588,8 +2565,56 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w
WRITEMEM(write->wr_verifier.data, 8);
ADJUST_ARGS();
}
+ return nfserr;
}
+static __be32
+nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
+{
+ return nfserr;
+}
+
+typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *);
+
+static nfsd4_enc nfsd4_enc_ops[] = {
+ [OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access,
+ [OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close,
+ [OP_COMMIT] = (nfsd4_enc)nfsd4_encode_commit,
+ [OP_CREATE] = (nfsd4_enc)nfsd4_encode_create,
+ [OP_DELEGPURGE] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_DELEGRETURN] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_GETATTR] = (nfsd4_enc)nfsd4_encode_getattr,
+ [OP_GETFH] = (nfsd4_enc)nfsd4_encode_getfh,
+ [OP_LINK] = (nfsd4_enc)nfsd4_encode_link,
+ [OP_LOCK] = (nfsd4_enc)nfsd4_encode_lock,
+ [OP_LOCKT] = (nfsd4_enc)nfsd4_encode_lockt,
+ [OP_LOCKU] = (nfsd4_enc)nfsd4_encode_locku,
+ [OP_LOOKUP] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_LOOKUPP] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_NVERIFY] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_OPEN] = (nfsd4_enc)nfsd4_encode_open,
+ [OP_OPEN_CONFIRM] = (nfsd4_enc)nfsd4_encode_open_confirm,
+ [OP_OPEN_DOWNGRADE] = (nfsd4_enc)nfsd4_encode_open_downgrade,
+ [OP_PUTFH] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_PUTPUBFH] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_PUTROOTFH] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_READ] = (nfsd4_enc)nfsd4_encode_read,
+ [OP_READDIR] = (nfsd4_enc)nfsd4_encode_readdir,
+ [OP_READLINK] = (nfsd4_enc)nfsd4_encode_readlink,
+ [OP_REMOVE] = (nfsd4_enc)nfsd4_encode_remove,
+ [OP_RENAME] = (nfsd4_enc)nfsd4_encode_rename,
+ [OP_RENEW] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_RESTOREFH] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_SAVEFH] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_SECINFO] = (nfsd4_enc)nfsd4_encode_secinfo,
+ [OP_SETATTR] = (nfsd4_enc)nfsd4_encode_setattr,
+ [OP_SETCLIENTID] = (nfsd4_enc)nfsd4_encode_setclientid,
+ [OP_SETCLIENTID_CONFIRM] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_VERIFY] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_WRITE] = (nfsd4_enc)nfsd4_encode_write,
+ [OP_RELEASE_LOCKOWNER] = (nfsd4_enc)nfsd4_encode_noop,
+};
+
void
nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
{
@@ -2601,101 +2626,12 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
statp = p++; /* to be backfilled at the end */
ADJUST_ARGS();
- switch (op->opnum) {
- case OP_ACCESS:
- nfsd4_encode_access(resp, op->status, &op->u.access);
- break;
- case OP_CLOSE:
- nfsd4_encode_close(resp, op->status, &op->u.close);
- break;
- case OP_COMMIT:
- nfsd4_encode_commit(resp, op->status, &op->u.commit);
- break;
- case OP_CREATE:
- nfsd4_encode_create(resp, op->status, &op->u.create);
- break;
- case OP_DELEGRETURN:
- break;
- case OP_GETATTR:
- op->status = nfsd4_encode_getattr(resp, op->status, &op->u.getattr);
- break;
- case OP_GETFH:
- nfsd4_encode_getfh(resp, op->status, op->u.getfh);
- break;
- case OP_LINK:
- nfsd4_encode_link(resp, op->status, &op->u.link);
- break;
- case OP_LOCK:
- nfsd4_encode_lock(resp, op->status, &op->u.lock);
- break;
- case OP_LOCKT:
- nfsd4_encode_lockt(resp, op->status, &op->u.lockt);
- break;
- case OP_LOCKU:
- nfsd4_encode_locku(resp, op->status, &op->u.locku);
- break;
- case OP_LOOKUP:
- break;
- case OP_LOOKUPP:
- break;
- case OP_NVERIFY:
- break;
- case OP_OPEN:
- nfsd4_encode_open(resp, op->status, &op->u.open);
- break;
- case OP_OPEN_CONFIRM:
- nfsd4_encode_open_confirm(resp, op->status, &op->u.open_confirm);
- break;
- case OP_OPEN_DOWNGRADE:
- nfsd4_encode_open_downgrade(resp, op->status, &op->u.open_downgrade);
- break;
- case OP_PUTFH:
- break;
- case OP_PUTROOTFH:
- break;
- case OP_READ:
- op->status = nfsd4_encode_read(resp, op->status, &op->u.read);
- break;
- case OP_READDIR:
- op->status = nfsd4_encode_readdir(resp, op->status, &op->u.readdir);
- break;
- case OP_READLINK:
- op->status = nfsd4_encode_readlink(resp, op->status, &op->u.readlink);
- break;
- case OP_REMOVE:
- nfsd4_encode_remove(resp, op->status, &op->u.remove);
- break;
- case OP_RENAME:
- nfsd4_encode_rename(resp, op->status, &op->u.rename);
- break;
- case OP_RENEW:
- break;
- case OP_RESTOREFH:
- break;
- case OP_SAVEFH:
- break;
- case OP_SECINFO:
- nfsd4_encode_secinfo(resp, op->status, &op->u.secinfo);
- break;
- case OP_SETATTR:
- nfsd4_encode_setattr(resp, op->status, &op->u.setattr);
- break;
- case OP_SETCLIENTID:
- nfsd4_encode_setclientid(resp, op->status, &op->u.setclientid);
- break;
- case OP_SETCLIENTID_CONFIRM:
- break;
- case OP_VERIFY:
- break;
- case OP_WRITE:
- nfsd4_encode_write(resp, op->status, &op->u.write);
- break;
- case OP_RELEASE_LOCKOWNER:
- break;
- default:
- break;
- }
-
+ if (op->opnum == OP_ILLEGAL)
+ goto status;
+ BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) ||
+ !nfsd4_enc_ops[op->opnum]);
+ op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u);
+status:
/*
* Note: We write the status directly, instead of using WRITE32(),
* since it is already in network byte order.
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 5ac00c4fee91..1955a2702e60 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -310,9 +310,12 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size)
{
- __be32 server_ip;
- char *fo_path, c;
+ struct sockaddr_in sin = {
+ .sin_family = AF_INET,
+ };
int b1, b2, b3, b4;
+ char c;
+ char *fo_path;
/* sanity check */
if (size == 0)
@@ -326,11 +329,13 @@ static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size)
return -EINVAL;
/* get ipv4 address */
- if (sscanf(fo_path, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4)
+ if (sscanf(fo_path, NIPQUAD_FMT "%c", &b1, &b2, &b3, &b4, &c) != 4)
return -EINVAL;
- server_ip = htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4);
+ if (b1 > 255 || b2 > 255 || b3 > 255 || b4 > 255)
+ return -EINVAL;
+ sin.sin_addr.s_addr = htonl((b1 << 24) | (b2 << 16) | (b3 << 8) | b4);
- return nlmsvc_unlock_all_by_ip(server_ip);
+ return nlmsvc_unlock_all_by_ip((struct sockaddr *)&sin);
}
static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size)
@@ -450,22 +455,26 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
int i;
int rv;
int len;
- int npools = nfsd_nrpools();
+ int npools;
int *nthreads;
+ mutex_lock(&nfsd_mutex);
+ npools = nfsd_nrpools();
if (npools == 0) {
/*
* NFS is shut down. The admin can start it by
* writing to the threads file but NOT the pool_threads
* file, sorry. Report zero threads.
*/
+ mutex_unlock(&nfsd_mutex);
strcpy(buf, "0\n");
return strlen(buf);
}
nthreads = kcalloc(npools, sizeof(int), GFP_KERNEL);
+ rv = -ENOMEM;
if (nthreads == NULL)
- return -ENOMEM;
+ goto out_free;
if (size > 0) {
for (i = 0; i < npools; i++) {
@@ -496,14 +505,16 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
mesg += len;
}
+ mutex_unlock(&nfsd_mutex);
return (mesg-buf);
out_free:
kfree(nthreads);
+ mutex_unlock(&nfsd_mutex);
return rv;
}
-static ssize_t write_versions(struct file *file, char *buf, size_t size)
+static ssize_t __write_versions(struct file *file, char *buf, size_t size)
{
/*
* Format:
@@ -566,14 +577,23 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size)
return len;
}
-static ssize_t write_ports(struct file *file, char *buf, size_t size)
+static ssize_t write_versions(struct file *file, char *buf, size_t size)
+{
+ ssize_t rv;
+
+ mutex_lock(&nfsd_mutex);
+ rv = __write_versions(file, buf, size);
+ mutex_unlock(&nfsd_mutex);
+ return rv;
+}
+
+static ssize_t __write_ports(struct file *file, char *buf, size_t size)
{
if (size == 0) {
int len = 0;
- lock_kernel();
+
if (nfsd_serv)
len = svc_xprt_names(nfsd_serv, buf, 0);
- unlock_kernel();
return len;
}
/* Either a single 'fd' number is written, in which
@@ -603,9 +623,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
/* Decrease the count, but don't shutdown the
* the service
*/
- lock_kernel();
nfsd_serv->sv_nrthreads--;
- unlock_kernel();
}
return err < 0 ? err : 0;
}
@@ -614,10 +632,8 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
int len = 0;
if (!toclose)
return -ENOMEM;
- lock_kernel();
if (nfsd_serv)
len = svc_sock_names(buf, nfsd_serv, toclose);
- unlock_kernel();
if (len >= 0)
lockd_down();
kfree(toclose);
@@ -655,7 +671,6 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) {
if (port == 0)
return -EINVAL;
- lock_kernel();
if (nfsd_serv) {
xprt = svc_find_xprt(nfsd_serv, transport,
AF_UNSPEC, port);
@@ -666,13 +681,23 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
} else
err = -ENOTCONN;
}
- unlock_kernel();
return err < 0 ? err : 0;
}
}
return -EINVAL;
}
+static ssize_t write_ports(struct file *file, char *buf, size_t size)
+{
+ ssize_t rv;
+
+ mutex_lock(&nfsd_mutex);
+ rv = __write_ports(file, buf, size);
+ mutex_unlock(&nfsd_mutex);
+ return rv;
+}
+
+
int nfsd_max_blksize;
static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
@@ -691,13 +716,13 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
if (bsize > NFSSVC_MAXBLKSIZE)
bsize = NFSSVC_MAXBLKSIZE;
bsize &= ~(1024-1);
- lock_kernel();
+ mutex_lock(&nfsd_mutex);
if (nfsd_serv && nfsd_serv->sv_nrthreads) {
- unlock_kernel();
+ mutex_unlock(&nfsd_mutex);
return -EBUSY;
}
nfsd_max_blksize = bsize;
- unlock_kernel();
+ mutex_unlock(&nfsd_mutex);
}
return sprintf(buf, "%d\n", nfsd_max_blksize);
}
@@ -705,16 +730,17 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
#ifdef CONFIG_NFSD_V4
extern time_t nfs4_leasetime(void);
-static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
+static ssize_t __write_leasetime(struct file *file, char *buf, size_t size)
{
/* if size > 10 seconds, call
* nfs4_reset_lease() then write out the new lease (seconds) as reply
*/
char *mesg = buf;
- int rv;
+ int rv, lease;
if (size > 0) {
- int lease;
+ if (nfsd_serv)
+ return -EBUSY;
rv = get_int(&mesg, &lease);
if (rv)
return rv;
@@ -726,24 +752,52 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
return strlen(buf);
}
-static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
+static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
+{
+ ssize_t rv;
+
+ mutex_lock(&nfsd_mutex);
+ rv = __write_leasetime(file, buf, size);
+ mutex_unlock(&nfsd_mutex);
+ return rv;
+}
+
+extern char *nfs4_recoverydir(void);
+
+static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size)
{
char *mesg = buf;
char *recdir;
int len, status;
- if (size == 0 || size > PATH_MAX || buf[size-1] != '\n')
- return -EINVAL;
- buf[size-1] = 0;
+ if (size > 0) {
+ if (nfsd_serv)
+ return -EBUSY;
+ if (size > PATH_MAX || buf[size-1] != '\n')
+ return -EINVAL;
+ buf[size-1] = 0;
- recdir = mesg;
- len = qword_get(&mesg, recdir, size);
- if (len <= 0)
- return -EINVAL;
+ recdir = mesg;
+ len = qword_get(&mesg, recdir, size);
+ if (len <= 0)
+ return -EINVAL;
- status = nfs4_reset_recoverydir(recdir);
+ status = nfs4_reset_recoverydir(recdir);
+ }
+ sprintf(buf, "%s\n", nfs4_recoverydir());
return strlen(buf);
}
+
+static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
+{
+ ssize_t rv;
+
+ mutex_lock(&nfsd_mutex);
+ rv = __write_recoverydir(file, buf, size);
+ mutex_unlock(&nfsd_mutex);
+ return rv;
+}
+
#endif
/*----------------------------------------------------------------------------*/
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 100ae5641162..f45451eb1e38 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -176,9 +176,24 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
if (IS_ERR(exp))
return nfserrno(PTR_ERR(exp));
- error = nfsd_setuser_and_check_port(rqstp, exp);
- if (error)
- goto out;
+ if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) {
+ /* Elevate privileges so that the lack of 'r' or 'x'
+ * permission on some parent directory will
+ * not stop exportfs_decode_fh from being able
+ * to reconnect a directory into the dentry cache.
+ * The same problem can affect "SUBTREECHECK" exports,
+ * but as nfsd_acceptable depends on correct
+ * access control settings being in effect, we cannot
+ * fix that case easily.
+ */
+ current->cap_effective =
+ cap_raise_nfsd_set(current->cap_effective,
+ current->cap_permitted);
+ } else {
+ error = nfsd_setuser_and_check_port(rqstp, exp);
+ if (error)
+ goto out;
+ }
/*
* Look up the dentry using the NFS file handle.
@@ -215,6 +230,14 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
goto out;
}
+ if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) {
+ error = nfsd_setuser_and_check_port(rqstp, exp);
+ if (error) {
+ dput(dentry);
+ goto out;
+ }
+ }
+
if (S_ISDIR(dentry->d_inode->i_mode) &&
(dentry->d_flags & DCACHE_DISCONNECTED)) {
printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n",
@@ -279,7 +302,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
if (error)
goto out;
- if (!(access & MAY_LOCK)) {
+ if (!(access & NFSD_MAY_LOCK)) {
/*
* pseudoflavor restrictions are not enforced on NLM,
* which clients virtually always use auth_sys for,
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 6cfc96a12483..0766f95d236a 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -65,7 +65,7 @@ nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp,
dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh));
fh_copy(&resp->fh, &argp->fh);
- nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
+ nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
return nfsd_return_attrs(nfserr, resp);
}
@@ -215,11 +215,11 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
SVCFH_fmt(dirfhp), argp->len, argp->name);
/* First verify the parent file handle */
- nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, MAY_EXEC);
+ nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_EXEC);
if (nfserr)
goto done; /* must fh_put dirfhp even on error */
- /* Check for MAY_WRITE in nfsd_create if necessary */
+ /* Check for NFSD_MAY_WRITE in nfsd_create if necessary */
nfserr = nfserr_acces;
if (!argp->len)
@@ -281,7 +281,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
nfserr = nfsd_permission(rqstp,
newfhp->fh_export,
newfhp->fh_dentry,
- MAY_WRITE|MAY_LOCAL_ACCESS);
+ NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS);
if (nfserr && nfserr != nfserr_rofs)
goto out_unlock;
}
@@ -614,6 +614,7 @@ nfserrno (int errno)
#endif
{ nfserr_stale, -ESTALE },
{ nfserr_jukebox, -ETIMEDOUT },
+ { nfserr_jukebox, -ERESTARTSYS },
{ nfserr_dropit, -EAGAIN },
{ nfserr_dropit, -ENOMEM },
{ nfserr_badname, -ESRCH },
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 941041f4b136..80292ff5e924 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -21,6 +21,7 @@
#include <linux/smp_lock.h>
#include <linux/freezer.h>
#include <linux/fs_struct.h>
+#include <linux/kthread.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/stats.h>
@@ -36,28 +37,38 @@
#define NFSDDBG_FACILITY NFSDDBG_SVC
-/* these signals will be delivered to an nfsd thread
- * when handling a request
- */
-#define ALLOWED_SIGS (sigmask(SIGKILL))
-/* these signals will be delivered to an nfsd thread
- * when not handling a request. i.e. when waiting
- */
-#define SHUTDOWN_SIGS (sigmask(SIGKILL) | sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT))
-/* if the last thread dies with SIGHUP, then the exports table is
- * left unchanged ( like 2.4-{0-9} ). Any other signal will clear
- * the exports table (like 2.2).
- */
-#define SIG_NOCLEAN SIGHUP
-
extern struct svc_program nfsd_program;
-static void nfsd(struct svc_rqst *rqstp);
+static int nfsd(void *vrqstp);
struct timeval nfssvc_boot;
- struct svc_serv *nfsd_serv;
static atomic_t nfsd_busy;
static unsigned long nfsd_last_call;
static DEFINE_SPINLOCK(nfsd_call_lock);
+/*
+ * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members
+ * of the svc_serv struct. In particular, ->sv_nrthreads but also to some
+ * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt
+ *
+ * If (out side the lock) nfsd_serv is non-NULL, then it must point to a
+ * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number
+ * of nfsd threads must exist and each must listed in ->sp_all_threads in each
+ * entry of ->sv_pools[].
+ *
+ * Transitions of the thread count between zero and non-zero are of particular
+ * interest since the svc_serv needs to be created and initialized at that
+ * point, or freed.
+ *
+ * Finally, the nfsd_mutex also protects some of the global variables that are
+ * accessed when nfsd starts and that are settable via the write_* routines in
+ * nfsctl.c. In particular:
+ *
+ * user_recovery_dirname
+ * user_lease_time
+ * nfsd_versions
+ */
+DEFINE_MUTEX(nfsd_mutex);
+struct svc_serv *nfsd_serv;
+
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
static struct svc_stat nfsd_acl_svcstats;
static struct svc_version * nfsd_acl_version[] = {
@@ -145,13 +156,14 @@ int nfsd_vers(int vers, enum vers_op change)
int nfsd_nrthreads(void)
{
- if (nfsd_serv == NULL)
- return 0;
- else
- return nfsd_serv->sv_nrthreads;
+ int rv = 0;
+ mutex_lock(&nfsd_mutex);
+ if (nfsd_serv)
+ rv = nfsd_serv->sv_nrthreads;
+ mutex_unlock(&nfsd_mutex);
+ return rv;
}
-static int killsig; /* signal that was used to kill last nfsd */
static void nfsd_last_thread(struct svc_serv *serv)
{
/* When last nfsd thread exits we need to do some clean-up */
@@ -162,11 +174,9 @@ static void nfsd_last_thread(struct svc_serv *serv)
nfsd_racache_shutdown();
nfs4_state_shutdown();
- printk(KERN_WARNING "nfsd: last server has exited\n");
- if (killsig != SIG_NOCLEAN) {
- printk(KERN_WARNING "nfsd: unexporting all filesystems\n");
- nfsd_export_flush();
- }
+ printk(KERN_WARNING "nfsd: last server has exited, flushing export "
+ "cache\n");
+ nfsd_export_flush();
}
void nfsd_reset_versions(void)
@@ -190,13 +200,14 @@ void nfsd_reset_versions(void)
}
}
+
int nfsd_create_serv(void)
{
int err = 0;
- lock_kernel();
+
+ WARN_ON(!mutex_is_locked(&nfsd_mutex));
if (nfsd_serv) {
svc_get(nfsd_serv);
- unlock_kernel();
return 0;
}
if (nfsd_max_blksize == 0) {
@@ -217,13 +228,11 @@ int nfsd_create_serv(void)
}
atomic_set(&nfsd_busy, 0);
- nfsd_serv = svc_create_pooled(&nfsd_program,
- nfsd_max_blksize,
- nfsd_last_thread,
- nfsd, SIG_NOCLEAN, THIS_MODULE);
+ nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
+ nfsd_last_thread, nfsd, THIS_MODULE);
if (nfsd_serv == NULL)
err = -ENOMEM;
- unlock_kernel();
+
do_gettimeofday(&nfssvc_boot); /* record boot time */
return err;
}
@@ -282,6 +291,8 @@ int nfsd_set_nrthreads(int n, int *nthreads)
int tot = 0;
int err = 0;
+ WARN_ON(!mutex_is_locked(&nfsd_mutex));
+
if (nfsd_serv == NULL || n <= 0)
return 0;
@@ -316,7 +327,6 @@ int nfsd_set_nrthreads(int n, int *nthreads)
nthreads[0] = 1;
/* apply the new numbers */
- lock_kernel();
svc_get(nfsd_serv);
for (i = 0; i < n; i++) {
err = svc_set_num_threads(nfsd_serv, &nfsd_serv->sv_pools[i],
@@ -325,7 +335,6 @@ int nfsd_set_nrthreads(int n, int *nthreads)
break;
}
svc_destroy(nfsd_serv);
- unlock_kernel();
return err;
}
@@ -334,8 +343,8 @@ int
nfsd_svc(unsigned short port, int nrservs)
{
int error;
-
- lock_kernel();
+
+ mutex_lock(&nfsd_mutex);
dprintk("nfsd: creating service\n");
error = -EINVAL;
if (nrservs <= 0)
@@ -363,7 +372,7 @@ nfsd_svc(unsigned short port, int nrservs)
failure:
svc_destroy(nfsd_serv); /* Release server */
out:
- unlock_kernel();
+ mutex_unlock(&nfsd_mutex);
return error;
}
@@ -391,18 +400,17 @@ update_thread_usage(int busy_threads)
/*
* This is the NFS server kernel thread
*/
-static void
-nfsd(struct svc_rqst *rqstp)
+static int
+nfsd(void *vrqstp)
{
+ struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp;
struct fs_struct *fsp;
- int err;
- sigset_t shutdown_mask, allowed_mask;
+ int err, preverr = 0;
/* Lock module and set up kernel thread */
- lock_kernel();
- daemonize("nfsd");
+ mutex_lock(&nfsd_mutex);
- /* After daemonize() this kernel thread shares current->fs
+ /* At this point, the thread shares current->fs
* with the init process. We need to create files with a
* umask of 0 instead of init's umask. */
fsp = copy_fs_struct(current->fs);
@@ -414,14 +422,17 @@ nfsd(struct svc_rqst *rqstp)
current->fs = fsp;
current->fs->umask = 0;
- siginitsetinv(&shutdown_mask, SHUTDOWN_SIGS);
- siginitsetinv(&allowed_mask, ALLOWED_SIGS);
+ /*
+ * thread is spawned with all signals set to SIG_IGN, re-enable
+ * the ones that will bring down the thread
+ */
+ allow_signal(SIGKILL);
+ allow_signal(SIGHUP);
+ allow_signal(SIGINT);
+ allow_signal(SIGQUIT);
nfsdstats.th_cnt++;
-
- rqstp->rq_task = current;
-
- unlock_kernel();
+ mutex_unlock(&nfsd_mutex);
/*
* We want less throttling in balance_dirty_pages() so that nfs to
@@ -435,26 +446,30 @@ nfsd(struct svc_rqst *rqstp)
* The main request loop
*/
for (;;) {
- /* Block all but the shutdown signals */
- sigprocmask(SIG_SETMASK, &shutdown_mask, NULL);
-
/*
* Find a socket with data available and call its
* recvfrom routine.
*/
while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN)
;
- if (err < 0)
+ if (err == -EINTR)
break;
+ else if (err < 0) {
+ if (err != preverr) {
+ printk(KERN_WARNING "%s: unexpected error "
+ "from svc_recv (%d)\n", __func__, -err);
+ preverr = err;
+ }
+ schedule_timeout_uninterruptible(HZ);
+ continue;
+ }
+
update_thread_usage(atomic_read(&nfsd_busy));
atomic_inc(&nfsd_busy);
/* Lock the export hash tables for reading. */
exp_readlock();
- /* Process request with signals blocked. */
- sigprocmask(SIG_SETMASK, &allowed_mask, NULL);
-
svc_process(rqstp);
/* Unlock export hash tables */
@@ -463,22 +478,10 @@ nfsd(struct svc_rqst *rqstp)
atomic_dec(&nfsd_busy);
}
- if (err != -EINTR) {
- printk(KERN_WARNING "nfsd: terminating on error %d\n", -err);
- } else {
- unsigned int signo;
-
- for (signo = 1; signo <= _NSIG; signo++)
- if (sigismember(&current->pending.signal, signo) &&
- !sigismember(&current->blocked, signo))
- break;
- killsig = signo;
- }
/* Clear signals before calling svc_exit_thread() */
flush_signals(current);
- lock_kernel();
-
+ mutex_lock(&nfsd_mutex);
nfsdstats.th_cnt --;
out:
@@ -486,8 +489,9 @@ out:
svc_exit_thread(rqstp);
/* Release module */
- unlock_kernel();
+ mutex_unlock(&nfsd_mutex);
module_put_and_exit(0);
+ return 0;
}
static __be32 map_new_errors(u32 vers, __be32 nfserr)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index a3a291f771f4..0f4481e0502d 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -144,7 +144,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
/* Obtain dentry and export. */
- err = fh_verify(rqstp, fhp, S_IFDIR, MAY_EXEC);
+ err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
if (err)
return err;
@@ -262,14 +262,14 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
{
struct dentry *dentry;
struct inode *inode;
- int accmode = MAY_SATTR;
+ int accmode = NFSD_MAY_SATTR;
int ftype = 0;
__be32 err;
int host_err;
int size_change = 0;
if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
- accmode |= MAY_WRITE|MAY_OWNER_OVERRIDE;
+ accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
if (iap->ia_valid & ATTR_SIZE)
ftype = S_IFREG;
@@ -331,7 +331,8 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
*/
if (iap->ia_valid & ATTR_SIZE) {
if (iap->ia_size < inode->i_size) {
- err = nfsd_permission(rqstp, fhp->fh_export, dentry, MAY_TRUNC|MAY_OWNER_OVERRIDE);
+ err = nfsd_permission(rqstp, fhp->fh_export, dentry,
+ NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE);
if (err)
goto out;
}
@@ -462,7 +463,7 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int flags = 0;
/* Get inode */
- error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, MAY_SATTR);
+ error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR);
if (error)
return error;
@@ -563,20 +564,20 @@ struct accessmap {
int how;
};
static struct accessmap nfs3_regaccess[] = {
- { NFS3_ACCESS_READ, MAY_READ },
- { NFS3_ACCESS_EXECUTE, MAY_EXEC },
- { NFS3_ACCESS_MODIFY, MAY_WRITE|MAY_TRUNC },
- { NFS3_ACCESS_EXTEND, MAY_WRITE },
+ { NFS3_ACCESS_READ, NFSD_MAY_READ },
+ { NFS3_ACCESS_EXECUTE, NFSD_MAY_EXEC },
+ { NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE|NFSD_MAY_TRUNC },
+ { NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE },
{ 0, 0 }
};
static struct accessmap nfs3_diraccess[] = {
- { NFS3_ACCESS_READ, MAY_READ },
- { NFS3_ACCESS_LOOKUP, MAY_EXEC },
- { NFS3_ACCESS_MODIFY, MAY_EXEC|MAY_WRITE|MAY_TRUNC },
- { NFS3_ACCESS_EXTEND, MAY_EXEC|MAY_WRITE },
- { NFS3_ACCESS_DELETE, MAY_REMOVE },
+ { NFS3_ACCESS_READ, NFSD_MAY_READ },
+ { NFS3_ACCESS_LOOKUP, NFSD_MAY_EXEC },
+ { NFS3_ACCESS_MODIFY, NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC},
+ { NFS3_ACCESS_EXTEND, NFSD_MAY_EXEC|NFSD_MAY_WRITE },
+ { NFS3_ACCESS_DELETE, NFSD_MAY_REMOVE },
{ 0, 0 }
};
@@ -589,10 +590,10 @@ static struct accessmap nfs3_anyaccess[] = {
* mainly at mode bits, and we make sure to ignore read-only
* filesystem checks
*/
- { NFS3_ACCESS_READ, MAY_READ },
- { NFS3_ACCESS_EXECUTE, MAY_EXEC },
- { NFS3_ACCESS_MODIFY, MAY_WRITE|MAY_LOCAL_ACCESS },
- { NFS3_ACCESS_EXTEND, MAY_WRITE|MAY_LOCAL_ACCESS },
+ { NFS3_ACCESS_READ, NFSD_MAY_READ },
+ { NFS3_ACCESS_EXECUTE, NFSD_MAY_EXEC },
+ { NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS },
+ { NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS },
{ 0, 0 }
};
@@ -606,7 +607,7 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
u32 query, result = 0, sresult = 0;
__be32 error;
- error = fh_verify(rqstp, fhp, 0, MAY_NOP);
+ error = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP);
if (error)
goto out;
@@ -678,7 +679,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
* and (hopefully) checked permission - so allow OWNER_OVERRIDE
* in case a chmod has now revoked permission.
*/
- err = fh_verify(rqstp, fhp, type, access | MAY_OWNER_OVERRIDE);
+ err = fh_verify(rqstp, fhp, type, access | NFSD_MAY_OWNER_OVERRIDE);
if (err)
goto out;
@@ -689,7 +690,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
* or any access when mandatory locking enabled
*/
err = nfserr_perm;
- if (IS_APPEND(inode) && (access & MAY_WRITE))
+ if (IS_APPEND(inode) && (access & NFSD_MAY_WRITE))
goto out;
/*
* We must ignore files (but only files) which might have mandatory
@@ -706,14 +707,14 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
* Check to see if there are any leases on this file.
* This may block while leases are broken.
*/
- host_err = break_lease(inode, O_NONBLOCK | ((access & MAY_WRITE) ? FMODE_WRITE : 0));
+ host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? FMODE_WRITE : 0));
if (host_err == -EWOULDBLOCK)
host_err = -ETIMEDOUT;
if (host_err) /* NOMEM or WOULDBLOCK */
goto out_nfserr;
- if (access & MAY_WRITE) {
- if (access & MAY_READ)
+ if (access & NFSD_MAY_WRITE) {
+ if (access & NFSD_MAY_READ)
flags = O_RDWR|O_LARGEFILE;
else
flags = O_WRONLY|O_LARGEFILE;
@@ -1069,12 +1070,12 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
if (file) {
err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
- MAY_READ|MAY_OWNER_OVERRIDE);
+ NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE);
if (err)
goto out;
err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
} else {
- err = nfsd_open(rqstp, fhp, S_IFREG, MAY_READ, &file);
+ err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
if (err)
goto out;
err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
@@ -1098,13 +1099,13 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
if (file) {
err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
- MAY_WRITE|MAY_OWNER_OVERRIDE);
+ NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE);
if (err)
goto out;
err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt,
stablep);
} else {
- err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file);
+ err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
if (err)
goto out;
@@ -1136,7 +1137,8 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
if ((u64)count > ~(u64)offset)
return nfserr_inval;
- if ((err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file)) != 0)
+ err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
+ if (err)
return err;
if (EX_ISSYNC(fhp->fh_export)) {
if (file->f_op && file->f_op->fsync) {
@@ -1197,7 +1199,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (isdotent(fname, flen))
goto out;
- err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
+ err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
if (err)
goto out;
@@ -1248,36 +1250,34 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
iap->ia_mode = 0;
iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type;
+ err = nfserr_inval;
+ if (!S_ISREG(type) && !S_ISDIR(type) && !special_file(type)) {
+ printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n",
+ type);
+ goto out;
+ }
+
+ host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
+ if (host_err)
+ goto out_nfserr;
+
/*
* Get the dir op function pointer.
*/
err = 0;
switch (type) {
case S_IFREG:
- host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
- if (host_err)
- goto out_nfserr;
host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
break;
case S_IFDIR:
- host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
- if (host_err)
- goto out_nfserr;
host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
break;
case S_IFCHR:
case S_IFBLK:
case S_IFIFO:
case S_IFSOCK:
- host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
- if (host_err)
- goto out_nfserr;
host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
break;
- default:
- printk("nfsd: bad file type %o in nfsd_create\n", type);
- host_err = -EINVAL;
- goto out_nfserr;
}
if (host_err < 0) {
mnt_drop_write(fhp->fh_export->ex_path.mnt);
@@ -1289,7 +1289,6 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
write_inode_now(dchild->d_inode, 1);
}
-
err2 = nfsd_create_setattr(rqstp, resfhp, iap);
if (err2)
err = err2;
@@ -1334,7 +1333,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out;
if (!(iap->ia_valid & ATTR_MODE))
iap->ia_mode = 0;
- err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
+ err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
if (err)
goto out;
@@ -1471,7 +1470,7 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
__be32 err;
int host_err;
- err = fh_verify(rqstp, fhp, S_IFLNK, MAY_NOP);
+ err = fh_verify(rqstp, fhp, S_IFLNK, NFSD_MAY_NOP);
if (err)
goto out;
@@ -1526,7 +1525,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (isdotent(fname, flen))
goto out;
- err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
+ err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
if (err)
goto out;
fh_lock(fhp);
@@ -1591,10 +1590,10 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
__be32 err;
int host_err;
- err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_CREATE);
+ err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE);
if (err)
goto out;
- err = fh_verify(rqstp, tfhp, -S_IFDIR, MAY_NOP);
+ err = fh_verify(rqstp, tfhp, -S_IFDIR, NFSD_MAY_NOP);
if (err)
goto out;
@@ -1661,10 +1660,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
__be32 err;
int host_err;
- err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_REMOVE);
+ err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
if (err)
goto out;
- err = fh_verify(rqstp, tfhp, S_IFDIR, MAY_CREATE);
+ err = fh_verify(rqstp, tfhp, S_IFDIR, NFSD_MAY_CREATE);
if (err)
goto out;
@@ -1768,7 +1767,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
err = nfserr_acces;
if (!flen || isdotent(fname, flen))
goto out;
- err = fh_verify(rqstp, fhp, S_IFDIR, MAY_REMOVE);
+ err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_REMOVE);
if (err)
goto out;
@@ -1834,7 +1833,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
struct file *file;
loff_t offset = *offsetp;
- err = nfsd_open(rqstp, fhp, S_IFDIR, MAY_READ, &file);
+ err = nfsd_open(rqstp, fhp, S_IFDIR, NFSD_MAY_READ, &file);
if (err)
goto out;
@@ -1875,7 +1874,7 @@ out:
__be32
nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat)
{
- __be32 err = fh_verify(rqstp, fhp, 0, MAY_NOP);
+ __be32 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP);
if (!err && vfs_statfs(fhp->fh_dentry,stat))
err = nfserr_io;
return err;
@@ -1896,18 +1895,18 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
struct inode *inode = dentry->d_inode;
int err;
- if (acc == MAY_NOP)
+ if (acc == NFSD_MAY_NOP)
return 0;
#if 0
dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n",
acc,
- (acc & MAY_READ)? " read" : "",
- (acc & MAY_WRITE)? " write" : "",
- (acc & MAY_EXEC)? " exec" : "",
- (acc & MAY_SATTR)? " sattr" : "",
- (acc & MAY_TRUNC)? " trunc" : "",
- (acc & MAY_LOCK)? " lock" : "",
- (acc & MAY_OWNER_OVERRIDE)? " owneroverride" : "",
+ (acc & NFSD_MAY_READ)? " read" : "",
+ (acc & NFSD_MAY_WRITE)? " write" : "",
+ (acc & NFSD_MAY_EXEC)? " exec" : "",
+ (acc & NFSD_MAY_SATTR)? " sattr" : "",
+ (acc & NFSD_MAY_TRUNC)? " trunc" : "",
+ (acc & NFSD_MAY_LOCK)? " lock" : "",
+ (acc & NFSD_MAY_OWNER_OVERRIDE)? " owneroverride" : "",
inode->i_mode,
IS_IMMUTABLE(inode)? " immut" : "",
IS_APPEND(inode)? " append" : "",
@@ -1920,18 +1919,18 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
* system. But if it is IRIX doing check on write-access for a
* device special file, we ignore rofs.
*/
- if (!(acc & MAY_LOCAL_ACCESS))
- if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) {
+ if (!(acc & NFSD_MAY_LOCAL_ACCESS))
+ if (acc & (NFSD_MAY_WRITE | NFSD_MAY_SATTR | NFSD_MAY_TRUNC)) {
if (exp_rdonly(rqstp, exp) ||
__mnt_is_readonly(exp->ex_path.mnt))
return nfserr_rofs;
- if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode))
+ if (/* (acc & NFSD_MAY_WRITE) && */ IS_IMMUTABLE(inode))
return nfserr_perm;
}
- if ((acc & MAY_TRUNC) && IS_APPEND(inode))
+ if ((acc & NFSD_MAY_TRUNC) && IS_APPEND(inode))
return nfserr_perm;
- if (acc & MAY_LOCK) {
+ if (acc & NFSD_MAY_LOCK) {
/* If we cannot rely on authentication in NLM requests,
* just allow locks, otherwise require read permission, or
* ownership
@@ -1939,7 +1938,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
if (exp->ex_flags & NFSEXP_NOAUTHNLM)
return 0;
else
- acc = MAY_READ | MAY_OWNER_OVERRIDE;
+ acc = NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE;
}
/*
* The file owner always gets access permission for accesses that
@@ -1955,15 +1954,16 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
* We must trust the client to do permission checking - using "ACCESS"
* with NFSv3.
*/
- if ((acc & MAY_OWNER_OVERRIDE) &&
+ if ((acc & NFSD_MAY_OWNER_OVERRIDE) &&
inode->i_uid == current->fsuid)
return 0;
+ /* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC), NULL);
/* Allow read access to binaries even when mode 111 */
if (err == -EACCES && S_ISREG(inode->i_mode) &&
- acc == (MAY_READ | MAY_OWNER_OVERRIDE))
+ acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE))
err = permission(inode, MAY_EXEC, NULL);
return err? nfserrno(err) : 0;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c6455dadb21b..9c2ac5c0ef5c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -918,12 +918,12 @@ struct file_lock {
struct list_head fl_link; /* doubly linked list of all locks */
struct list_head fl_block; /* circular list of blocked processes */
fl_owner_t fl_owner;
+ unsigned char fl_flags;
+ unsigned char fl_type;
unsigned int fl_pid;
struct pid *fl_nspid;
wait_queue_head_t fl_wait;
struct file *fl_file;
- unsigned char fl_flags;
- unsigned char fl_type;
loff_t fl_start;
loff_t fl_end;
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 102d928f7206..dbb87ab282e8 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -200,10 +200,12 @@ typedef int (*nlm_host_match_fn_t)(void *cur, struct nlm_host *ref);
* Server-side lock handling
*/
__be32 nlmsvc_lock(struct svc_rqst *, struct nlm_file *,
- struct nlm_lock *, int, struct nlm_cookie *);
+ struct nlm_host *, struct nlm_lock *, int,
+ struct nlm_cookie *);
__be32 nlmsvc_unlock(struct nlm_file *, struct nlm_lock *);
__be32 nlmsvc_testlock(struct svc_rqst *, struct nlm_file *,
- struct nlm_lock *, struct nlm_lock *, struct nlm_cookie *);
+ struct nlm_host *, struct nlm_lock *,
+ struct nlm_lock *, struct nlm_cookie *);
__be32 nlmsvc_cancel_blocked(struct nlm_file *, struct nlm_lock *);
unsigned long nlmsvc_retry_blocked(void);
void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
@@ -224,7 +226,7 @@ void nlmsvc_invalidate_all(void);
* Cluster failover support
*/
int nlmsvc_unlock_all_by_sb(struct super_block *sb);
-int nlmsvc_unlock_all_by_ip(__be32 server_addr);
+int nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr);
static inline struct inode *nlmsvc_file_inode(struct nlm_file *file)
{
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 8726491de154..ea0366769484 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -65,9 +65,6 @@
#define NFS4_ACE_SUCCESSFUL_ACCESS_ACE_FLAG 0x00000010
#define NFS4_ACE_FAILED_ACCESS_ACE_FLAG 0x00000020
#define NFS4_ACE_IDENTIFIER_GROUP 0x00000040
-#define NFS4_ACE_OWNER 0x00000080
-#define NFS4_ACE_GROUP 0x00000100
-#define NFS4_ACE_EVERYONE 0x00000200
#define NFS4_ACE_READ_DATA 0x00000001
#define NFS4_ACE_LIST_DIRECTORY 0x00000001
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 41d30c9c9de6..a2861d95ecc3 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -28,20 +28,20 @@
#define NFSD_SUPPORTED_MINOR_VERSION 0
/*
- * Special flags for nfsd_permission. These must be different from MAY_READ,
- * MAY_WRITE, and MAY_EXEC.
+ * Flags for nfsd_permission
*/
-#define MAY_NOP 0
-#define MAY_SATTR 8
-#define MAY_TRUNC 16
-#define MAY_LOCK 32
-#define MAY_OWNER_OVERRIDE 64
-#define MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/
-#if (MAY_SATTR | MAY_TRUNC | MAY_LOCK | MAY_OWNER_OVERRIDE | MAY_LOCAL_ACCESS) & (MAY_READ | MAY_WRITE | MAY_EXEC)
-# error "please use a different value for MAY_SATTR or MAY_TRUNC or MAY_LOCK or MAY_LOCAL_ACCESS or MAY_OWNER_OVERRIDE."
-#endif
-#define MAY_CREATE (MAY_EXEC|MAY_WRITE)
-#define MAY_REMOVE (MAY_EXEC|MAY_WRITE|MAY_TRUNC)
+#define NFSD_MAY_NOP 0
+#define NFSD_MAY_EXEC 1 /* == MAY_EXEC */
+#define NFSD_MAY_WRITE 2 /* == MAY_WRITE */
+#define NFSD_MAY_READ 4 /* == MAY_READ */
+#define NFSD_MAY_SATTR 8
+#define NFSD_MAY_TRUNC 16
+#define NFSD_MAY_LOCK 32
+#define NFSD_MAY_OWNER_OVERRIDE 64
+#define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/
+
+#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE)
+#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
/*
* Callback function for readdir
@@ -54,6 +54,7 @@ typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int);
extern struct svc_program nfsd_program;
extern struct svc_version nfsd_version2, nfsd_version3,
nfsd_version4;
+extern struct mutex nfsd_mutex;
extern struct svc_serv *nfsd_serv;
extern struct seq_operations nfs_exports_op;
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index db348f749376..d0fe2e378452 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -98,8 +98,6 @@ struct nfs4_callback {
u32 cb_ident;
/* RPC client info */
atomic_t cb_set; /* successful CB_NULL call */
- struct rpc_program cb_program;
- struct rpc_stat cb_stat;
struct rpc_clnt * cb_client;
};
diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index a10f1fb0bf7c..e7bbdba474d5 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -51,6 +51,9 @@ struct krb5_ctx {
extern spinlock_t krb5_seq_lock;
+/* The length of the Kerberos GSS token header */
+#define GSS_KRB5_TOK_HDR_LEN (16)
+
#define KG_TOK_MIC_MSG 0x0101
#define KG_TOK_WRAP_MSG 0x0201
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 4b54c5fdcfd9..dc69068d94c7 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -22,7 +22,7 @@
/*
* This is the RPC server thread function prototype
*/
-typedef void (*svc_thread_fn)(struct svc_rqst *);
+typedef int (*svc_thread_fn)(void *);
/*
*
@@ -80,7 +80,6 @@ struct svc_serv {
struct module * sv_module; /* optional module to count when
* adding threads */
svc_thread_fn sv_function; /* main function for threads */
- int sv_kill_signal; /* signal to kill threads */
};
/*
@@ -388,8 +387,8 @@ struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
struct svc_pool *pool);
void svc_exit_thread(struct svc_rqst *);
struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int,
- void (*shutdown)(struct svc_serv*),
- svc_thread_fn, int sig, struct module *);
+ void (*shutdown)(struct svc_serv*), svc_thread_fn,
+ struct module *);
int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
void svc_destroy(struct svc_serv *);
int svc_process(struct svc_rqst *);
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 05eb4664d0dd..ef2e3a20bf3b 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -72,7 +72,7 @@ extern atomic_t rdma_stat_sq_prod;
*/
struct svc_rdma_op_ctxt {
struct svc_rdma_op_ctxt *read_hdr;
- struct list_head free_list;
+ int hdr_count;
struct xdr_buf arg;
struct list_head dto_q;
enum ib_wr_opcode wr_op;
@@ -86,6 +86,31 @@ struct svc_rdma_op_ctxt {
struct page *pages[RPCSVC_MAXPAGES];
};
+/*
+ * NFS_ requests are mapped on the client side by the chunk lists in
+ * the RPCRDMA header. During the fetching of the RPC from the client
+ * and the writing of the reply to the client, the memory in the
+ * client and the memory in the server must be mapped as contiguous
+ * vaddr/len for access by the hardware. These data strucures keep
+ * these mappings.
+ *
+ * For an RDMA_WRITE, the 'sge' maps the RPC REPLY. For RDMA_READ, the
+ * 'sge' in the svc_rdma_req_map maps the server side RPC reply and the
+ * 'ch' field maps the read-list of the RPCRDMA header to the 'sge'
+ * mapping of the reply.
+ */
+struct svc_rdma_chunk_sge {
+ int start; /* sge no for this chunk */
+ int count; /* sge count for this chunk */
+};
+struct svc_rdma_req_map {
+ unsigned long count;
+ union {
+ struct kvec sge[RPCSVC_MAXPAGES];
+ struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES];
+ };
+};
+
#define RDMACTXT_F_LAST_CTXT 2
struct svcxprt_rdma {
@@ -93,7 +118,6 @@ struct svcxprt_rdma {
struct rdma_cm_id *sc_cm_id; /* RDMA connection id */
struct list_head sc_accept_q; /* Conn. waiting accept */
int sc_ord; /* RDMA read limit */
- wait_queue_head_t sc_read_wait;
int sc_max_sge;
int sc_sq_depth; /* Depth of SQ */
@@ -104,12 +128,8 @@ struct svcxprt_rdma {
struct ib_pd *sc_pd;
+ atomic_t sc_dma_used;
atomic_t sc_ctxt_used;
- struct list_head sc_ctxt_free;
- int sc_ctxt_cnt;
- int sc_ctxt_bump;
- int sc_ctxt_max;
- spinlock_t sc_ctxt_lock;
struct list_head sc_rq_dto_q;
spinlock_t sc_rq_dto_lock;
struct ib_qp *sc_qp;
@@ -173,6 +193,8 @@ extern int svc_rdma_post_recv(struct svcxprt_rdma *);
extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
+extern struct svc_rdma_req_map *svc_rdma_get_req_map(void);
+extern void svc_rdma_put_req_map(struct svc_rdma_req_map *);
extern void svc_sq_reap(struct svcxprt_rdma *);
extern void svc_rq_reap(struct svcxprt_rdma *);
extern struct svc_xprt_class svc_rdma_class;
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index f3431a7e33da..4de8bcf26fa7 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -5,12 +5,12 @@
obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o
auth_rpcgss-objs := auth_gss.o gss_generic_token.o \
- gss_mech_switch.o svcauth_gss.o gss_krb5_crypto.o
+ gss_mech_switch.o svcauth_gss.o
obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
- gss_krb5_seqnum.o gss_krb5_wrap.o
+ gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o
obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 1d52308ca324..c93fca204558 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -83,8 +83,6 @@ out:
return ret;
}
-EXPORT_SYMBOL(krb5_encrypt);
-
u32
krb5_decrypt(
struct crypto_blkcipher *tfm,
@@ -118,8 +116,6 @@ out:
return ret;
}
-EXPORT_SYMBOL(krb5_decrypt);
-
static int
checksummer(struct scatterlist *sg, void *data)
{
@@ -161,8 +157,6 @@ out:
return err ? GSS_S_FAILURE : 0;
}
-EXPORT_SYMBOL(make_checksum);
-
struct encryptor_desc {
u8 iv[8]; /* XXX hard-coded blocksize */
struct blkcipher_desc desc;
@@ -262,8 +256,6 @@ gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
return ret;
}
-EXPORT_SYMBOL(gss_encrypt_xdr_buf);
-
struct decryptor_desc {
u8 iv[8]; /* XXX hard-coded blocksize */
struct blkcipher_desc desc;
@@ -334,5 +326,3 @@ gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
return xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc);
}
-
-EXPORT_SYMBOL(gss_decrypt_xdr_buf);
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 5f1d36dfbcf7..b8f42ef7178e 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -78,7 +78,7 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
struct krb5_ctx *ctx = gss_ctx->internal_ctx_id;
char cksumdata[16];
struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
- unsigned char *ptr, *krb5_hdr, *msg_start;
+ unsigned char *ptr, *msg_start;
s32 now;
u32 seq_send;
@@ -87,36 +87,36 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
now = get_seconds();
- token->len = g_token_size(&ctx->mech_used, 24);
+ token->len = g_token_size(&ctx->mech_used, GSS_KRB5_TOK_HDR_LEN + 8);
ptr = token->data;
- g_make_token_header(&ctx->mech_used, 24, &ptr);
+ g_make_token_header(&ctx->mech_used, GSS_KRB5_TOK_HDR_LEN + 8, &ptr);
- *ptr++ = (unsigned char) ((KG_TOK_MIC_MSG>>8)&0xff);
- *ptr++ = (unsigned char) (KG_TOK_MIC_MSG&0xff);
+ /* ptr now at header described in rfc 1964, section 1.2.1: */
+ ptr[0] = (unsigned char) ((KG_TOK_MIC_MSG >> 8) & 0xff);
+ ptr[1] = (unsigned char) (KG_TOK_MIC_MSG & 0xff);
- /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */
- krb5_hdr = ptr - 2;
- msg_start = krb5_hdr + 24;
+ msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + 8;
- *(__be16 *)(krb5_hdr + 2) = htons(SGN_ALG_DES_MAC_MD5);
- memset(krb5_hdr + 4, 0xff, 4);
+ *(__be16 *)(ptr + 2) = htons(SGN_ALG_DES_MAC_MD5);
+ memset(ptr + 4, 0xff, 4);
- if (make_checksum("md5", krb5_hdr, 8, text, 0, &md5cksum))
+ if (make_checksum("md5", ptr, 8, text, 0, &md5cksum))
return GSS_S_FAILURE;
if (krb5_encrypt(ctx->seq, NULL, md5cksum.data,
md5cksum.data, md5cksum.len))
return GSS_S_FAILURE;
- memcpy(krb5_hdr + 16, md5cksum.data + md5cksum.len - 8, 8);
+ memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data + md5cksum.len - 8, 8);
spin_lock(&krb5_seq_lock);
seq_send = ctx->seq_send++;
spin_unlock(&krb5_seq_lock);
if (krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff,
- seq_send, krb5_hdr + 16, krb5_hdr + 8))
+ seq_send, ptr + GSS_KRB5_TOK_HDR_LEN,
+ ptr + 8))
return GSS_S_FAILURE;
return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index d91a5d004803..066ec73c84d6 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -92,30 +92,30 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
read_token->len))
return GSS_S_DEFECTIVE_TOKEN;
- if ((*ptr++ != ((KG_TOK_MIC_MSG>>8)&0xff)) ||
- (*ptr++ != ( KG_TOK_MIC_MSG &0xff)) )
+ if ((ptr[0] != ((KG_TOK_MIC_MSG >> 8) & 0xff)) ||
+ (ptr[1] != (KG_TOK_MIC_MSG & 0xff)))
return GSS_S_DEFECTIVE_TOKEN;
/* XXX sanity-check bodysize?? */
- signalg = ptr[0] + (ptr[1] << 8);
+ signalg = ptr[2] + (ptr[3] << 8);
if (signalg != SGN_ALG_DES_MAC_MD5)
return GSS_S_DEFECTIVE_TOKEN;
- sealalg = ptr[2] + (ptr[3] << 8);
+ sealalg = ptr[4] + (ptr[5] << 8);
if (sealalg != SEAL_ALG_NONE)
return GSS_S_DEFECTIVE_TOKEN;
- if ((ptr[4] != 0xff) || (ptr[5] != 0xff))
+ if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
return GSS_S_DEFECTIVE_TOKEN;
- if (make_checksum("md5", ptr - 2, 8, message_buffer, 0, &md5cksum))
+ if (make_checksum("md5", ptr, 8, message_buffer, 0, &md5cksum))
return GSS_S_FAILURE;
if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, md5cksum.data, 16))
return GSS_S_FAILURE;
- if (memcmp(md5cksum.data + 8, ptr + 14, 8))
+ if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN, 8))
return GSS_S_BAD_SIG;
/* it got through unscathed. Make sure the context is unexpired */
@@ -127,7 +127,7 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
/* do sequencing checks */
- if (krb5_get_seq_num(ctx->seq, ptr + 14, ptr + 6, &direction, &seqnum))
+ if (krb5_get_seq_num(ctx->seq, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8, &direction, &seqnum))
return GSS_S_FAILURE;
if ((ctx->initiate && direction != 0xff) ||
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index b00b1b426301..ae8e69b59c4c 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -87,8 +87,8 @@ out:
return 0;
}
-static inline void
-make_confounder(char *p, int blocksize)
+static void
+make_confounder(char *p, u32 conflen)
{
static u64 i = 0;
u64 *q = (u64 *)p;
@@ -102,8 +102,22 @@ make_confounder(char *p, int blocksize)
* uniqueness would mean worrying about atomicity and rollover, and I
* don't care enough. */
- BUG_ON(blocksize != 8);
- *q = i++;
+ /* initialize to random value */
+ if (i == 0) {
+ i = random32();
+ i = (i << 32) | random32();
+ }
+
+ switch (conflen) {
+ case 16:
+ *q++ = i++;
+ /* fall through */
+ case 8:
+ *q++ = i++;
+ break;
+ default:
+ BUG();
+ }
}
/* Assumptions: the head and tail of inbuf are ours to play with.
@@ -122,7 +136,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
char cksumdata[16];
struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
int blocksize = 0, plainlen;
- unsigned char *ptr, *krb5_hdr, *msg_start;
+ unsigned char *ptr, *msg_start;
s32 now;
int headlen;
struct page **tmp_pages;
@@ -149,26 +163,26 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
buf->len += headlen;
BUG_ON((buf->len - offset - headlen) % blocksize);
- g_make_token_header(&kctx->mech_used, 24 + plainlen, &ptr);
+ g_make_token_header(&kctx->mech_used,
+ GSS_KRB5_TOK_HDR_LEN + 8 + plainlen, &ptr);
- *ptr++ = (unsigned char) ((KG_TOK_WRAP_MSG>>8)&0xff);
- *ptr++ = (unsigned char) (KG_TOK_WRAP_MSG&0xff);
+ /* ptr now at header described in rfc 1964, section 1.2.1: */
+ ptr[0] = (unsigned char) ((KG_TOK_WRAP_MSG >> 8) & 0xff);
+ ptr[1] = (unsigned char) (KG_TOK_WRAP_MSG & 0xff);
- /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */
- krb5_hdr = ptr - 2;
- msg_start = krb5_hdr + 24;
+ msg_start = ptr + 24;
- *(__be16 *)(krb5_hdr + 2) = htons(SGN_ALG_DES_MAC_MD5);
- memset(krb5_hdr + 4, 0xff, 4);
- *(__be16 *)(krb5_hdr + 4) = htons(SEAL_ALG_DES);
+ *(__be16 *)(ptr + 2) = htons(SGN_ALG_DES_MAC_MD5);
+ memset(ptr + 4, 0xff, 4);
+ *(__be16 *)(ptr + 4) = htons(SEAL_ALG_DES);
make_confounder(msg_start, blocksize);
/* XXXJBF: UGH!: */
tmp_pages = buf->pages;
buf->pages = pages;
- if (make_checksum("md5", krb5_hdr, 8, buf,
+ if (make_checksum("md5", ptr, 8, buf,
offset + headlen - blocksize, &md5cksum))
return GSS_S_FAILURE;
buf->pages = tmp_pages;
@@ -176,7 +190,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
md5cksum.data, md5cksum.len))
return GSS_S_FAILURE;
- memcpy(krb5_hdr + 16, md5cksum.data + md5cksum.len - 8, 8);
+ memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data + md5cksum.len - 8, 8);
spin_lock(&krb5_seq_lock);
seq_send = kctx->seq_send++;
@@ -185,7 +199,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
/* XXX would probably be more efficient to compute checksum
* and encrypt at the same time: */
if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff,
- seq_send, krb5_hdr + 16, krb5_hdr + 8)))
+ seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8)))
return GSS_S_FAILURE;
if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize,
@@ -219,38 +233,38 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
buf->len - offset))
return GSS_S_DEFECTIVE_TOKEN;
- if ((*ptr++ != ((KG_TOK_WRAP_MSG>>8)&0xff)) ||
- (*ptr++ != (KG_TOK_WRAP_MSG &0xff)) )
+ if ((ptr[0] != ((KG_TOK_WRAP_MSG >> 8) & 0xff)) ||
+ (ptr[1] != (KG_TOK_WRAP_MSG & 0xff)))
return GSS_S_DEFECTIVE_TOKEN;
/* XXX sanity-check bodysize?? */
/* get the sign and seal algorithms */
- signalg = ptr[0] + (ptr[1] << 8);
+ signalg = ptr[2] + (ptr[3] << 8);
if (signalg != SGN_ALG_DES_MAC_MD5)
return GSS_S_DEFECTIVE_TOKEN;
- sealalg = ptr[2] + (ptr[3] << 8);
+ sealalg = ptr[4] + (ptr[5] << 8);
if (sealalg != SEAL_ALG_DES)
return GSS_S_DEFECTIVE_TOKEN;
- if ((ptr[4] != 0xff) || (ptr[5] != 0xff))
+ if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
return GSS_S_DEFECTIVE_TOKEN;
if (gss_decrypt_xdr_buf(kctx->enc, buf,
- ptr + 22 - (unsigned char *)buf->head[0].iov_base))
+ ptr + GSS_KRB5_TOK_HDR_LEN + 8 - (unsigned char *)buf->head[0].iov_base))
return GSS_S_DEFECTIVE_TOKEN;
- if (make_checksum("md5", ptr - 2, 8, buf,
- ptr + 22 - (unsigned char *)buf->head[0].iov_base, &md5cksum))
+ if (make_checksum("md5", ptr, 8, buf,
+ ptr + GSS_KRB5_TOK_HDR_LEN + 8 - (unsigned char *)buf->head[0].iov_base, &md5cksum))
return GSS_S_FAILURE;
if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
md5cksum.data, md5cksum.len))
return GSS_S_FAILURE;
- if (memcmp(md5cksum.data + 8, ptr + 14, 8))
+ if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN, 8))
return GSS_S_BAD_SIG;
/* it got through unscathed. Make sure the context is unexpired */
@@ -262,8 +276,8 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
/* do sequencing checks */
- if (krb5_get_seq_num(kctx->seq, ptr + 14, ptr + 6, &direction,
- &seqnum))
+ if (krb5_get_seq_num(kctx->seq, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8,
+ &direction, &seqnum))
return GSS_S_BAD_SIG;
if ((kctx->initiate && direction != 0xff) ||
@@ -274,7 +288,7 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
* better to copy and encrypt at the same time. */
blocksize = crypto_blkcipher_blocksize(kctx->enc);
- data_start = ptr + 22 + blocksize;
+ data_start = ptr + GSS_KRB5_TOK_HDR_LEN + 8 + blocksize;
orig_start = buf->head[0].iov_base + offset;
data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start;
memmove(orig_start, data_start, data_len);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 01c7e311b904..5a32cb7c4bb4 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -18,6 +18,7 @@
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/module.h>
+#include <linux/kthread.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/xdr.h>
@@ -291,15 +292,14 @@ svc_pool_map_put(void)
/*
- * Set the current thread's cpus_allowed mask so that it
+ * Set the given thread's cpus_allowed mask so that it
* will only run on cpus in the given pool.
- *
- * Returns 1 and fills in oldmask iff a cpumask was applied.
*/
-static inline int
-svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask)
+static inline void
+svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx)
{
struct svc_pool_map *m = &svc_pool_map;
+ unsigned int node = m->pool_to[pidx];
/*
* The caller checks for sv_nrpools > 1, which
@@ -307,26 +307,17 @@ svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask)
*/
BUG_ON(m->count == 0);
- switch (m->mode)
- {
- default:
- return 0;
+ switch (m->mode) {
case SVC_POOL_PERCPU:
{
- unsigned int cpu = m->pool_to[pidx];
-
- *oldmask = current->cpus_allowed;
- set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
- return 1;
+ set_cpus_allowed_ptr(task, &cpumask_of_cpu(node));
+ break;
}
case SVC_POOL_PERNODE:
{
- unsigned int node = m->pool_to[pidx];
node_to_cpumask_ptr(nodecpumask, node);
-
- *oldmask = current->cpus_allowed;
- set_cpus_allowed_ptr(current, nodecpumask);
- return 1;
+ set_cpus_allowed_ptr(task, nodecpumask);
+ break;
}
}
}
@@ -443,7 +434,7 @@ EXPORT_SYMBOL(svc_create);
struct svc_serv *
svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
void (*shutdown)(struct svc_serv *serv),
- svc_thread_fn func, int sig, struct module *mod)
+ svc_thread_fn func, struct module *mod)
{
struct svc_serv *serv;
unsigned int npools = svc_pool_map_get();
@@ -452,7 +443,6 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
if (serv != NULL) {
serv->sv_function = func;
- serv->sv_kill_signal = sig;
serv->sv_module = mod;
}
@@ -461,7 +451,8 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
EXPORT_SYMBOL(svc_create_pooled);
/*
- * Destroy an RPC service. Should be called with the BKL held
+ * Destroy an RPC service. Should be called with appropriate locking to
+ * protect the sv_nrthreads, sv_permsocks and sv_tempsocks.
*/
void
svc_destroy(struct svc_serv *serv)
@@ -578,46 +569,6 @@ out_enomem:
EXPORT_SYMBOL(svc_prepare_thread);
/*
- * Create a thread in the given pool. Caller must hold BKL.
- * On a NUMA or SMP machine, with a multi-pool serv, the thread
- * will be restricted to run on the cpus belonging to the pool.
- */
-static int
-__svc_create_thread(svc_thread_fn func, struct svc_serv *serv,
- struct svc_pool *pool)
-{
- struct svc_rqst *rqstp;
- int error = -ENOMEM;
- int have_oldmask = 0;
- cpumask_t uninitialized_var(oldmask);
-
- rqstp = svc_prepare_thread(serv, pool);
- if (IS_ERR(rqstp)) {
- error = PTR_ERR(rqstp);
- goto out;
- }
-
- if (serv->sv_nrpools > 1)
- have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask);
-
- error = kernel_thread((int (*)(void *)) func, rqstp, 0);
-
- if (have_oldmask)
- set_cpus_allowed(current, oldmask);
-
- if (error < 0)
- goto out_thread;
- svc_sock_update_bufs(serv);
- error = 0;
-out:
- return error;
-
-out_thread:
- svc_exit_thread(rqstp);
- goto out;
-}
-
-/*
* Choose a pool in which to create a new thread, for svc_set_num_threads
*/
static inline struct svc_pool *
@@ -674,7 +625,7 @@ found_pool:
* of threads the given number. If `pool' is non-NULL, applies
* only to threads in that pool, otherwise round-robins between
* all pools. Must be called with a svc_get() reference and
- * the BKL held.
+ * the BKL or another lock to protect access to svc_serv fields.
*
* Destroying threads relies on the service threads filling in
* rqstp->rq_task, which only the nfs ones do. Assumes the serv
@@ -686,7 +637,9 @@ found_pool:
int
svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
{
- struct task_struct *victim;
+ struct svc_rqst *rqstp;
+ struct task_struct *task;
+ struct svc_pool *chosen_pool;
int error = 0;
unsigned int state = serv->sv_nrthreads-1;
@@ -702,18 +655,34 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
/* create new threads */
while (nrservs > 0) {
nrservs--;
+ chosen_pool = choose_pool(serv, pool, &state);
+
+ rqstp = svc_prepare_thread(serv, chosen_pool);
+ if (IS_ERR(rqstp)) {
+ error = PTR_ERR(rqstp);
+ break;
+ }
+
__module_get(serv->sv_module);
- error = __svc_create_thread(serv->sv_function, serv,
- choose_pool(serv, pool, &state));
- if (error < 0) {
+ task = kthread_create(serv->sv_function, rqstp, serv->sv_name);
+ if (IS_ERR(task)) {
+ error = PTR_ERR(task);
module_put(serv->sv_module);
+ svc_exit_thread(rqstp);
break;
}
+
+ rqstp->rq_task = task;
+ if (serv->sv_nrpools > 1)
+ svc_pool_map_set_cpumask(task, chosen_pool->sp_id);
+
+ svc_sock_update_bufs(serv);
+ wake_up_process(task);
}
/* destroy old threads */
while (nrservs < 0 &&
- (victim = choose_victim(serv, pool, &state)) != NULL) {
- send_sig(serv->sv_kill_signal, victim, 1);
+ (task = choose_victim(serv, pool, &state)) != NULL) {
+ send_sig(SIGINT, task, 1);
nrservs++;
}
@@ -722,7 +691,8 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
EXPORT_SYMBOL(svc_set_num_threads);
/*
- * Called from a server thread as it's exiting. Caller must hold BKL.
+ * Called from a server thread as it's exiting. Caller must hold the BKL or
+ * the "service mutex", whichever is appropriate for the service.
*/
void
svc_exit_thread(struct svc_rqst *rqstp)
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 88c0ca20bb1e..87101177825b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -69,6 +69,10 @@ atomic_t rdma_stat_rq_prod;
atomic_t rdma_stat_sq_poll;
atomic_t rdma_stat_sq_prod;
+/* Temporary NFS request map and context caches */
+struct kmem_cache *svc_rdma_map_cachep;
+struct kmem_cache *svc_rdma_ctxt_cachep;
+
/*
* This function implements reading and resetting an atomic_t stat
* variable through read/write to a proc file. Any write to the file
@@ -236,11 +240,14 @@ static ctl_table svcrdma_root_table[] = {
void svc_rdma_cleanup(void)
{
dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
+ flush_scheduled_work();
if (svcrdma_table_header) {
unregister_sysctl_table(svcrdma_table_header);
svcrdma_table_header = NULL;
}
svc_unreg_xprt_class(&svc_rdma_class);
+ kmem_cache_destroy(svc_rdma_map_cachep);
+ kmem_cache_destroy(svc_rdma_ctxt_cachep);
}
int svc_rdma_init(void)
@@ -255,9 +262,37 @@ int svc_rdma_init(void)
svcrdma_table_header =
register_sysctl_table(svcrdma_root_table);
+ /* Create the temporary map cache */
+ svc_rdma_map_cachep = kmem_cache_create("svc_rdma_map_cache",
+ sizeof(struct svc_rdma_req_map),
+ 0,
+ SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (!svc_rdma_map_cachep) {
+ printk(KERN_INFO "Could not allocate map cache.\n");
+ goto err0;
+ }
+
+ /* Create the temporary context cache */
+ svc_rdma_ctxt_cachep =
+ kmem_cache_create("svc_rdma_ctxt_cache",
+ sizeof(struct svc_rdma_op_ctxt),
+ 0,
+ SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (!svc_rdma_ctxt_cachep) {
+ printk(KERN_INFO "Could not allocate WR ctxt cache.\n");
+ goto err1;
+ }
+
/* Register RDMA with the SVC transport switch */
svc_reg_xprt_class(&svc_rdma_class);
return 0;
+ err1:
+ kmem_cache_destroy(svc_rdma_map_cachep);
+ err0:
+ unregister_sysctl_table(svcrdma_table_header);
+ return -ENOMEM;
}
MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
MODULE_DESCRIPTION("SVC RDMA Transport");
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 06ab4841537b..b4b17f44cb29 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -112,11 +112,6 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
rqstp->rq_arg.tail[0].iov_len = 0;
}
-struct chunk_sge {
- int start; /* sge no for this chunk */
- int count; /* sge count for this chunk */
-};
-
/* Encode a read-chunk-list as an array of IB SGE
*
* Assumptions:
@@ -134,8 +129,8 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *head,
struct rpcrdma_msg *rmsgp,
- struct ib_sge *sge,
- struct chunk_sge *ch_sge_ary,
+ struct svc_rdma_req_map *rpl_map,
+ struct svc_rdma_req_map *chl_map,
int ch_count,
int byte_count)
{
@@ -156,22 +151,18 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
head->arg.head[0] = rqstp->rq_arg.head[0];
head->arg.tail[0] = rqstp->rq_arg.tail[0];
head->arg.pages = &head->pages[head->count];
- head->sge[0].length = head->count; /* save count of hdr pages */
+ head->hdr_count = head->count; /* save count of hdr pages */
head->arg.page_base = 0;
head->arg.page_len = ch_bytes;
head->arg.len = rqstp->rq_arg.len + ch_bytes;
head->arg.buflen = rqstp->rq_arg.buflen + ch_bytes;
head->count++;
- ch_sge_ary[0].start = 0;
+ chl_map->ch[0].start = 0;
while (byte_count) {
+ rpl_map->sge[sge_no].iov_base =
+ page_address(rqstp->rq_arg.pages[page_no]) + page_off;
sge_bytes = min_t(int, PAGE_SIZE-page_off, ch_bytes);
- sge[sge_no].addr =
- ib_dma_map_page(xprt->sc_cm_id->device,
- rqstp->rq_arg.pages[page_no],
- page_off, sge_bytes,
- DMA_FROM_DEVICE);
- sge[sge_no].length = sge_bytes;
- sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
+ rpl_map->sge[sge_no].iov_len = sge_bytes;
/*
* Don't bump head->count here because the same page
* may be used by multiple SGE.
@@ -187,11 +178,11 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
* SGE, move to the next SGE
*/
if (ch_bytes == 0) {
- ch_sge_ary[ch_no].count =
- sge_no - ch_sge_ary[ch_no].start;
+ chl_map->ch[ch_no].count =
+ sge_no - chl_map->ch[ch_no].start;
ch_no++;
ch++;
- ch_sge_ary[ch_no].start = sge_no;
+ chl_map->ch[ch_no].start = sge_no;
ch_bytes = ch->rc_target.rs_length;
/* If bytes remaining account for next chunk */
if (byte_count) {
@@ -220,18 +211,25 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
return sge_no;
}
-static void rdma_set_ctxt_sge(struct svc_rdma_op_ctxt *ctxt,
- struct ib_sge *sge,
+static void rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
+ struct svc_rdma_op_ctxt *ctxt,
+ struct kvec *vec,
u64 *sgl_offset,
int count)
{
int i;
ctxt->count = count;
+ ctxt->direction = DMA_FROM_DEVICE;
for (i = 0; i < count; i++) {
- ctxt->sge[i].addr = sge[i].addr;
- ctxt->sge[i].length = sge[i].length;
- *sgl_offset = *sgl_offset + sge[i].length;
+ atomic_inc(&xprt->sc_dma_used);
+ ctxt->sge[i].addr =
+ ib_dma_map_single(xprt->sc_cm_id->device,
+ vec[i].iov_base, vec[i].iov_len,
+ DMA_FROM_DEVICE);
+ ctxt->sge[i].length = vec[i].iov_len;
+ ctxt->sge[i].lkey = xprt->sc_phys_mr->lkey;
+ *sgl_offset = *sgl_offset + vec[i].iov_len;
}
}
@@ -282,34 +280,29 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
struct ib_send_wr read_wr;
int err = 0;
int ch_no;
- struct ib_sge *sge;
int ch_count;
int byte_count;
int sge_count;
u64 sgl_offset;
struct rpcrdma_read_chunk *ch;
struct svc_rdma_op_ctxt *ctxt = NULL;
- struct svc_rdma_op_ctxt *tmp_sge_ctxt;
- struct svc_rdma_op_ctxt *tmp_ch_ctxt;
- struct chunk_sge *ch_sge_ary;
+ struct svc_rdma_req_map *rpl_map;
+ struct svc_rdma_req_map *chl_map;
/* If no read list is present, return 0 */
ch = svc_rdma_get_read_chunk(rmsgp);
if (!ch)
return 0;
- /* Allocate temporary contexts to keep SGE */
- BUG_ON(sizeof(struct ib_sge) < sizeof(struct chunk_sge));
- tmp_sge_ctxt = svc_rdma_get_context(xprt);
- sge = tmp_sge_ctxt->sge;
- tmp_ch_ctxt = svc_rdma_get_context(xprt);
- ch_sge_ary = (struct chunk_sge *)tmp_ch_ctxt->sge;
+ /* Allocate temporary reply and chunk maps */
+ rpl_map = svc_rdma_get_req_map();
+ chl_map = svc_rdma_get_req_map();
svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
if (ch_count > RPCSVC_MAXPAGES)
return -EINVAL;
sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp,
- sge, ch_sge_ary,
+ rpl_map, chl_map,
ch_count, byte_count);
sgl_offset = 0;
ch_no = 0;
@@ -331,14 +324,15 @@ next_sge:
read_wr.wr.rdma.remote_addr =
get_unaligned(&(ch->rc_target.rs_offset)) +
sgl_offset;
- read_wr.sg_list = &sge[ch_sge_ary[ch_no].start];
+ read_wr.sg_list = ctxt->sge;
read_wr.num_sge =
- rdma_read_max_sge(xprt, ch_sge_ary[ch_no].count);
- rdma_set_ctxt_sge(ctxt, &sge[ch_sge_ary[ch_no].start],
+ rdma_read_max_sge(xprt, chl_map->ch[ch_no].count);
+ rdma_set_ctxt_sge(xprt, ctxt,
+ &rpl_map->sge[chl_map->ch[ch_no].start],
&sgl_offset,
read_wr.num_sge);
if (((ch+1)->rc_discrim == 0) &&
- (read_wr.num_sge == ch_sge_ary[ch_no].count)) {
+ (read_wr.num_sge == chl_map->ch[ch_no].count)) {
/*
* Mark the last RDMA_READ with a bit to
* indicate all RPC data has been fetched from
@@ -358,9 +352,9 @@ next_sge:
}
atomic_inc(&rdma_stat_read);
- if (read_wr.num_sge < ch_sge_ary[ch_no].count) {
- ch_sge_ary[ch_no].count -= read_wr.num_sge;
- ch_sge_ary[ch_no].start += read_wr.num_sge;
+ if (read_wr.num_sge < chl_map->ch[ch_no].count) {
+ chl_map->ch[ch_no].count -= read_wr.num_sge;
+ chl_map->ch[ch_no].start += read_wr.num_sge;
goto next_sge;
}
sgl_offset = 0;
@@ -368,8 +362,8 @@ next_sge:
}
out:
- svc_rdma_put_context(tmp_sge_ctxt, 0);
- svc_rdma_put_context(tmp_ch_ctxt, 0);
+ svc_rdma_put_req_map(rpl_map);
+ svc_rdma_put_req_map(chl_map);
/* Detach arg pages. svc_recv will replenish them */
for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++)
@@ -399,7 +393,7 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
rqstp->rq_pages[page_no] = head->pages[page_no];
}
/* Point rq_arg.pages past header */
- rqstp->rq_arg.pages = &rqstp->rq_pages[head->sge[0].length];
+ rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count];
rqstp->rq_arg.page_len = head->arg.page_len;
rqstp->rq_arg.page_base = head->arg.page_base;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index fb82b1b683f8..a19b22b452a3 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -63,52 +63,44 @@
* SGE[2..sge_count-2] data from xdr->pages[]
* SGE[sge_count-1] data from xdr->tail.
*
+ * The max SGE we need is the length of the XDR / pagesize + one for
+ * head + one for tail + one for RPCRDMA header. Since RPCSVC_MAXPAGES
+ * reserves a page for both the request and the reply header, and this
+ * array is only concerned with the reply we are assured that we have
+ * on extra page for the RPCRMDA header.
*/
-static struct ib_sge *xdr_to_sge(struct svcxprt_rdma *xprt,
- struct xdr_buf *xdr,
- struct ib_sge *sge,
- int *sge_count)
+static void xdr_to_sge(struct svcxprt_rdma *xprt,
+ struct xdr_buf *xdr,
+ struct svc_rdma_req_map *vec)
{
- /* Max we need is the length of the XDR / pagesize + one for
- * head + one for tail + one for RPCRDMA header
- */
int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3;
int sge_no;
- u32 byte_count = xdr->len;
u32 sge_bytes;
u32 page_bytes;
- int page_off;
+ u32 page_off;
int page_no;
+ BUG_ON(xdr->len !=
+ (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
+
/* Skip the first sge, this is for the RPCRDMA header */
sge_no = 1;
/* Head SGE */
- sge[sge_no].addr = ib_dma_map_single(xprt->sc_cm_id->device,
- xdr->head[0].iov_base,
- xdr->head[0].iov_len,
- DMA_TO_DEVICE);
- sge_bytes = min_t(u32, byte_count, xdr->head[0].iov_len);
- byte_count -= sge_bytes;
- sge[sge_no].length = sge_bytes;
- sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
+ vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
+ vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
sge_no++;
/* pages SGE */
page_no = 0;
page_bytes = xdr->page_len;
page_off = xdr->page_base;
- while (byte_count && page_bytes) {
- sge_bytes = min_t(u32, byte_count, (PAGE_SIZE-page_off));
- sge[sge_no].addr =
- ib_dma_map_page(xprt->sc_cm_id->device,
- xdr->pages[page_no], page_off,
- sge_bytes, DMA_TO_DEVICE);
- sge_bytes = min(sge_bytes, page_bytes);
- byte_count -= sge_bytes;
+ while (page_bytes) {
+ vec->sge[sge_no].iov_base =
+ page_address(xdr->pages[page_no]) + page_off;
+ sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
page_bytes -= sge_bytes;
- sge[sge_no].length = sge_bytes;
- sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
+ vec->sge[sge_no].iov_len = sge_bytes;
sge_no++;
page_no++;
@@ -116,36 +108,24 @@ static struct ib_sge *xdr_to_sge(struct svcxprt_rdma *xprt,
}
/* Tail SGE */
- if (byte_count && xdr->tail[0].iov_len) {
- sge[sge_no].addr =
- ib_dma_map_single(xprt->sc_cm_id->device,
- xdr->tail[0].iov_base,
- xdr->tail[0].iov_len,
- DMA_TO_DEVICE);
- sge_bytes = min_t(u32, byte_count, xdr->tail[0].iov_len);
- byte_count -= sge_bytes;
- sge[sge_no].length = sge_bytes;
- sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
+ if (xdr->tail[0].iov_len) {
+ vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
+ vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
sge_no++;
}
BUG_ON(sge_no > sge_max);
- BUG_ON(byte_count != 0);
-
- *sge_count = sge_no;
- return sge;
+ vec->count = sge_no;
}
-
/* Assumptions:
* - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
*/
static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
u32 rmr, u64 to,
u32 xdr_off, int write_len,
- struct ib_sge *xdr_sge, int sge_count)
+ struct svc_rdma_req_map *vec)
{
- struct svc_rdma_op_ctxt *tmp_sge_ctxt;
struct ib_send_wr write_wr;
struct ib_sge *sge;
int xdr_sge_no;
@@ -154,25 +134,23 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
int sge_off;
int bc;
struct svc_rdma_op_ctxt *ctxt;
- int ret = 0;
- BUG_ON(sge_count > RPCSVC_MAXPAGES);
+ BUG_ON(vec->count > RPCSVC_MAXPAGES);
dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, "
- "write_len=%d, xdr_sge=%p, sge_count=%d\n",
+ "write_len=%d, vec->sge=%p, vec->count=%lu\n",
rmr, (unsigned long long)to, xdr_off,
- write_len, xdr_sge, sge_count);
+ write_len, vec->sge, vec->count);
ctxt = svc_rdma_get_context(xprt);
- ctxt->count = 0;
- tmp_sge_ctxt = svc_rdma_get_context(xprt);
- sge = tmp_sge_ctxt->sge;
+ ctxt->direction = DMA_TO_DEVICE;
+ sge = ctxt->sge;
/* Find the SGE associated with xdr_off */
- for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < sge_count;
+ for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < vec->count;
xdr_sge_no++) {
- if (xdr_sge[xdr_sge_no].length > bc)
+ if (vec->sge[xdr_sge_no].iov_len > bc)
break;
- bc -= xdr_sge[xdr_sge_no].length;
+ bc -= vec->sge[xdr_sge_no].iov_len;
}
sge_off = bc;
@@ -180,21 +158,28 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
sge_no = 0;
/* Copy the remaining SGE */
- while (bc != 0 && xdr_sge_no < sge_count) {
- sge[sge_no].addr = xdr_sge[xdr_sge_no].addr + sge_off;
- sge[sge_no].lkey = xdr_sge[xdr_sge_no].lkey;
+ while (bc != 0 && xdr_sge_no < vec->count) {
+ sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
sge_bytes = min((size_t)bc,
- (size_t)(xdr_sge[xdr_sge_no].length-sge_off));
+ (size_t)(vec->sge[xdr_sge_no].iov_len-sge_off));
sge[sge_no].length = sge_bytes;
-
+ atomic_inc(&xprt->sc_dma_used);
+ sge[sge_no].addr =
+ ib_dma_map_single(xprt->sc_cm_id->device,
+ (void *)
+ vec->sge[xdr_sge_no].iov_base + sge_off,
+ sge_bytes, DMA_TO_DEVICE);
+ if (dma_mapping_error(sge[sge_no].addr))
+ goto err;
sge_off = 0;
sge_no++;
+ ctxt->count++;
xdr_sge_no++;
bc -= sge_bytes;
}
BUG_ON(bc != 0);
- BUG_ON(xdr_sge_no > sge_count);
+ BUG_ON(xdr_sge_no > vec->count);
/* Prepare WRITE WR */
memset(&write_wr, 0, sizeof write_wr);
@@ -209,21 +194,20 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
/* Post It */
atomic_inc(&rdma_stat_write);
- if (svc_rdma_send(xprt, &write_wr)) {
- svc_rdma_put_context(ctxt, 1);
- /* Fatal error, close transport */
- ret = -EIO;
- }
- svc_rdma_put_context(tmp_sge_ctxt, 0);
- return ret;
+ if (svc_rdma_send(xprt, &write_wr))
+ goto err;
+ return 0;
+ err:
+ svc_rdma_put_context(ctxt, 0);
+ /* Fatal error, close transport */
+ return -EIO;
}
static int send_write_chunks(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rdma_argp,
struct rpcrdma_msg *rdma_resp,
struct svc_rqst *rqstp,
- struct ib_sge *sge,
- int sge_count)
+ struct svc_rdma_req_map *vec)
{
u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len;
int write_len;
@@ -269,8 +253,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
rs_offset + chunk_off,
xdr_off,
this_write,
- sge,
- sge_count);
+ vec);
if (ret) {
dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
ret);
@@ -292,8 +275,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rdma_argp,
struct rpcrdma_msg *rdma_resp,
struct svc_rqst *rqstp,
- struct ib_sge *sge,
- int sge_count)
+ struct svc_rdma_req_map *vec)
{
u32 xfer_len = rqstp->rq_res.len;
int write_len;
@@ -341,8 +323,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
rs_offset + chunk_off,
xdr_off,
this_write,
- sge,
- sge_count);
+ vec);
if (ret) {
dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
ret);
@@ -380,7 +361,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
struct page *page,
struct rpcrdma_msg *rdma_resp,
struct svc_rdma_op_ctxt *ctxt,
- int sge_count,
+ struct svc_rdma_req_map *vec,
int byte_count)
{
struct ib_send_wr send_wr;
@@ -405,6 +386,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->count = 1;
/* Prepare the SGE for the RPCRDMA Header */
+ atomic_inc(&rdma->sc_dma_used);
ctxt->sge[0].addr =
ib_dma_map_page(rdma->sc_cm_id->device,
page, 0, PAGE_SIZE, DMA_TO_DEVICE);
@@ -413,10 +395,16 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey;
/* Determine how many of our SGE are to be transmitted */
- for (sge_no = 1; byte_count && sge_no < sge_count; sge_no++) {
- sge_bytes = min((size_t)ctxt->sge[sge_no].length,
- (size_t)byte_count);
+ for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
+ sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
byte_count -= sge_bytes;
+ atomic_inc(&rdma->sc_dma_used);
+ ctxt->sge[sge_no].addr =
+ ib_dma_map_single(rdma->sc_cm_id->device,
+ vec->sge[sge_no].iov_base,
+ sge_bytes, DMA_TO_DEVICE);
+ ctxt->sge[sge_no].length = sge_bytes;
+ ctxt->sge[sge_no].lkey = rdma->sc_phys_mr->lkey;
}
BUG_ON(byte_count != 0);
@@ -428,8 +416,10 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
ctxt->count++;
rqstp->rq_respages[page_no] = NULL;
+ /* If there are more pages than SGE, terminate SGE list */
+ if (page_no+1 >= sge_no)
+ ctxt->sge[page_no+1].length = 0;
}
-
BUG_ON(sge_no > rdma->sc_max_sge);
memset(&send_wr, 0, sizeof send_wr);
ctxt->wr_op = IB_WR_SEND;
@@ -473,20 +463,20 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
enum rpcrdma_proc reply_type;
int ret;
int inline_bytes;
- struct ib_sge *sge;
- int sge_count = 0;
struct page *res_page;
struct svc_rdma_op_ctxt *ctxt;
+ struct svc_rdma_req_map *vec;
dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
/* Get the RDMA request header. */
rdma_argp = xdr_start(&rqstp->rq_arg);
- /* Build an SGE for the XDR */
+ /* Build an req vec for the XDR */
ctxt = svc_rdma_get_context(rdma);
ctxt->direction = DMA_TO_DEVICE;
- sge = xdr_to_sge(rdma, &rqstp->rq_res, ctxt->sge, &sge_count);
+ vec = svc_rdma_get_req_map();
+ xdr_to_sge(rdma, &rqstp->rq_res, vec);
inline_bytes = rqstp->rq_res.len;
@@ -503,7 +493,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
/* Send any write-chunk data and build resp write-list */
ret = send_write_chunks(rdma, rdma_argp, rdma_resp,
- rqstp, sge, sge_count);
+ rqstp, vec);
if (ret < 0) {
printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n",
ret);
@@ -513,7 +503,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
/* Send any reply-list data and update resp reply-list */
ret = send_reply_chunks(rdma, rdma_argp, rdma_resp,
- rqstp, sge, sge_count);
+ rqstp, vec);
if (ret < 0) {
printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n",
ret);
@@ -521,11 +511,13 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
}
inline_bytes -= ret;
- ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, sge_count,
+ ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec,
inline_bytes);
+ svc_rdma_put_req_map(vec);
dprintk("svcrdma: send_reply returns %d\n", ret);
return ret;
error:
+ svc_rdma_put_req_map(vec);
svc_rdma_put_context(ctxt, 0);
put_page(res_page);
return ret;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index e132509d1db0..19ddc382b777 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -84,70 +84,37 @@ struct svc_xprt_class svc_rdma_class = {
.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
};
-static int rdma_bump_context_cache(struct svcxprt_rdma *xprt)
+/* WR context cache. Created in svc_rdma.c */
+extern struct kmem_cache *svc_rdma_ctxt_cachep;
+
+struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
{
- int target;
- int at_least_one = 0;
struct svc_rdma_op_ctxt *ctxt;
- target = min(xprt->sc_ctxt_cnt + xprt->sc_ctxt_bump,
- xprt->sc_ctxt_max);
-
- spin_lock_bh(&xprt->sc_ctxt_lock);
- while (xprt->sc_ctxt_cnt < target) {
- xprt->sc_ctxt_cnt++;
- spin_unlock_bh(&xprt->sc_ctxt_lock);
-
- ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
-
- spin_lock_bh(&xprt->sc_ctxt_lock);
- if (ctxt) {
- at_least_one = 1;
- INIT_LIST_HEAD(&ctxt->free_list);
- list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
- } else {
- /* kmalloc failed...give up for now */
- xprt->sc_ctxt_cnt--;
+ while (1) {
+ ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep, GFP_KERNEL);
+ if (ctxt)
break;
- }
+ schedule_timeout_uninterruptible(msecs_to_jiffies(500));
}
- spin_unlock_bh(&xprt->sc_ctxt_lock);
- dprintk("svcrdma: sc_ctxt_max=%d, sc_ctxt_cnt=%d\n",
- xprt->sc_ctxt_max, xprt->sc_ctxt_cnt);
- return at_least_one;
+ ctxt->xprt = xprt;
+ INIT_LIST_HEAD(&ctxt->dto_q);
+ ctxt->count = 0;
+ atomic_inc(&xprt->sc_ctxt_used);
+ return ctxt;
}
-struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
+static void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
{
- struct svc_rdma_op_ctxt *ctxt;
-
- while (1) {
- spin_lock_bh(&xprt->sc_ctxt_lock);
- if (unlikely(list_empty(&xprt->sc_ctxt_free))) {
- /* Try to bump my cache. */
- spin_unlock_bh(&xprt->sc_ctxt_lock);
-
- if (rdma_bump_context_cache(xprt))
- continue;
-
- printk(KERN_INFO "svcrdma: sleeping waiting for "
- "context memory on xprt=%p\n",
- xprt);
- schedule_timeout_uninterruptible(msecs_to_jiffies(500));
- continue;
- }
- ctxt = list_entry(xprt->sc_ctxt_free.next,
- struct svc_rdma_op_ctxt,
- free_list);
- list_del_init(&ctxt->free_list);
- spin_unlock_bh(&xprt->sc_ctxt_lock);
- ctxt->xprt = xprt;
- INIT_LIST_HEAD(&ctxt->dto_q);
- ctxt->count = 0;
- atomic_inc(&xprt->sc_ctxt_used);
- break;
+ struct svcxprt_rdma *xprt = ctxt->xprt;
+ int i;
+ for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
+ atomic_dec(&xprt->sc_dma_used);
+ ib_dma_unmap_single(xprt->sc_cm_id->device,
+ ctxt->sge[i].addr,
+ ctxt->sge[i].length,
+ ctxt->direction);
}
- return ctxt;
}
void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
@@ -161,18 +128,36 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
for (i = 0; i < ctxt->count; i++)
put_page(ctxt->pages[i]);
- for (i = 0; i < ctxt->count; i++)
- ib_dma_unmap_single(xprt->sc_cm_id->device,
- ctxt->sge[i].addr,
- ctxt->sge[i].length,
- ctxt->direction);
-
- spin_lock_bh(&xprt->sc_ctxt_lock);
- list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
- spin_unlock_bh(&xprt->sc_ctxt_lock);
+ kmem_cache_free(svc_rdma_ctxt_cachep, ctxt);
atomic_dec(&xprt->sc_ctxt_used);
}
+/* Temporary NFS request map cache. Created in svc_rdma.c */
+extern struct kmem_cache *svc_rdma_map_cachep;
+
+/*
+ * Temporary NFS req mappings are shared across all transport
+ * instances. These are short lived and should be bounded by the number
+ * of concurrent server threads * depth of the SQ.
+ */
+struct svc_rdma_req_map *svc_rdma_get_req_map(void)
+{
+ struct svc_rdma_req_map *map;
+ while (1) {
+ map = kmem_cache_alloc(svc_rdma_map_cachep, GFP_KERNEL);
+ if (map)
+ break;
+ schedule_timeout_uninterruptible(msecs_to_jiffies(500));
+ }
+ map->count = 0;
+ return map;
+}
+
+void svc_rdma_put_req_map(struct svc_rdma_req_map *map)
+{
+ kmem_cache_free(svc_rdma_map_cachep, map);
+}
+
/* ib_cq event handler */
static void cq_event_handler(struct ib_event *event, void *context)
{
@@ -302,6 +287,7 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
ctxt->wc_status = wc.status;
ctxt->byte_len = wc.byte_len;
+ svc_rdma_unmap_dma(ctxt);
if (wc.status != IB_WC_SUCCESS) {
/* Close the transport */
dprintk("svcrdma: transport closing putting ctxt %p\n", ctxt);
@@ -351,6 +337,7 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
xprt = ctxt->xprt;
+ svc_rdma_unmap_dma(ctxt);
if (wc.status != IB_WC_SUCCESS)
/* Close the transport */
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
@@ -361,10 +348,13 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
switch (ctxt->wr_op) {
case IB_WR_SEND:
- case IB_WR_RDMA_WRITE:
svc_rdma_put_context(ctxt, 1);
break;
+ case IB_WR_RDMA_WRITE:
+ svc_rdma_put_context(ctxt, 0);
+ break;
+
case IB_WR_RDMA_READ:
if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
@@ -423,40 +413,6 @@ static void sq_comp_handler(struct ib_cq *cq, void *cq_context)
tasklet_schedule(&dto_tasklet);
}
-static void create_context_cache(struct svcxprt_rdma *xprt,
- int ctxt_count, int ctxt_bump, int ctxt_max)
-{
- struct svc_rdma_op_ctxt *ctxt;
- int i;
-
- xprt->sc_ctxt_max = ctxt_max;
- xprt->sc_ctxt_bump = ctxt_bump;
- xprt->sc_ctxt_cnt = 0;
- atomic_set(&xprt->sc_ctxt_used, 0);
-
- INIT_LIST_HEAD(&xprt->sc_ctxt_free);
- for (i = 0; i < ctxt_count; i++) {
- ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
- if (ctxt) {
- INIT_LIST_HEAD(&ctxt->free_list);
- list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
- xprt->sc_ctxt_cnt++;
- }
- }
-}
-
-static void destroy_context_cache(struct svcxprt_rdma *xprt)
-{
- while (!list_empty(&xprt->sc_ctxt_free)) {
- struct svc_rdma_op_ctxt *ctxt;
- ctxt = list_entry(xprt->sc_ctxt_free.next,
- struct svc_rdma_op_ctxt,
- free_list);
- list_del_init(&ctxt->free_list);
- kfree(ctxt);
- }
-}
-
static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
int listener)
{
@@ -473,7 +429,6 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
spin_lock_init(&cma_xprt->sc_lock);
spin_lock_init(&cma_xprt->sc_read_complete_lock);
- spin_lock_init(&cma_xprt->sc_ctxt_lock);
spin_lock_init(&cma_xprt->sc_rq_dto_lock);
cma_xprt->sc_ord = svcrdma_ord;
@@ -482,21 +437,9 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
cma_xprt->sc_max_requests = svcrdma_max_requests;
cma_xprt->sc_sq_depth = svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT;
atomic_set(&cma_xprt->sc_sq_count, 0);
+ atomic_set(&cma_xprt->sc_ctxt_used, 0);
- if (!listener) {
- int reqs = cma_xprt->sc_max_requests;
- create_context_cache(cma_xprt,
- reqs << 1, /* starting size */
- reqs, /* bump amount */
- reqs +
- cma_xprt->sc_sq_depth +
- RPCRDMA_MAX_THREADS + 1); /* max */
- if (list_empty(&cma_xprt->sc_ctxt_free)) {
- kfree(cma_xprt);
- return NULL;
- }
- clear_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
- } else
+ if (listener)
set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
return cma_xprt;
@@ -532,6 +475,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
BUG_ON(sge_no >= xprt->sc_max_sge);
page = svc_rdma_get_page();
ctxt->pages[sge_no] = page;
+ atomic_inc(&xprt->sc_dma_used);
pa = ib_dma_map_page(xprt->sc_cm_id->device,
page, 0, PAGE_SIZE,
DMA_FROM_DEVICE);
@@ -566,7 +510,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
* will call the recvfrom method on the listen xprt which will accept the new
* connection.
*/
-static void handle_connect_req(struct rdma_cm_id *new_cma_id)
+static void handle_connect_req(struct rdma_cm_id *new_cma_id, size_t client_ird)
{
struct svcxprt_rdma *listen_xprt = new_cma_id->context;
struct svcxprt_rdma *newxprt;
@@ -583,6 +527,9 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id)
dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n",
newxprt, newxprt->sc_cm_id, listen_xprt);
+ /* Save client advertised inbound read limit for use later in accept. */
+ newxprt->sc_ord = client_ird;
+
/* Set the local and remote addresses in the transport */
sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa));
@@ -619,7 +566,8 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
case RDMA_CM_EVENT_CONNECT_REQUEST:
dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
"event=%d\n", cma_id, cma_id->context, event->event);
- handle_connect_req(cma_id);
+ handle_connect_req(cma_id,
+ event->param.conn.responder_resources);
break;
case RDMA_CM_EVENT_ESTABLISHED:
@@ -793,8 +741,12 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
(size_t)svcrdma_max_requests);
newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests;
- newxprt->sc_ord = min((size_t)devattr.max_qp_rd_atom,
- (size_t)svcrdma_ord);
+ /*
+ * Limit ORD based on client limit, local device limit, and
+ * configured svcrdma limit.
+ */
+ newxprt->sc_ord = min_t(size_t, devattr.max_qp_rd_atom, newxprt->sc_ord);
+ newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord);
newxprt->sc_pd = ib_alloc_pd(newxprt->sc_cm_id->device);
if (IS_ERR(newxprt->sc_pd)) {
@@ -987,7 +939,6 @@ static void __svc_rdma_free(struct work_struct *work)
* cm_id because the device ptr is needed to unmap the dma in
* svc_rdma_put_context.
*/
- spin_lock_bh(&rdma->sc_read_complete_lock);
while (!list_empty(&rdma->sc_read_complete_q)) {
struct svc_rdma_op_ctxt *ctxt;
ctxt = list_entry(rdma->sc_read_complete_q.next,
@@ -996,10 +947,8 @@ static void __svc_rdma_free(struct work_struct *work)
list_del_init(&ctxt->dto_q);
svc_rdma_put_context(ctxt, 1);
}
- spin_unlock_bh(&rdma->sc_read_complete_lock);
/* Destroy queued, but not processed recv completions */
- spin_lock_bh(&rdma->sc_rq_dto_lock);
while (!list_empty(&rdma->sc_rq_dto_q)) {
struct svc_rdma_op_ctxt *ctxt;
ctxt = list_entry(rdma->sc_rq_dto_q.next,
@@ -1008,10 +957,10 @@ static void __svc_rdma_free(struct work_struct *work)
list_del_init(&ctxt->dto_q);
svc_rdma_put_context(ctxt, 1);
}
- spin_unlock_bh(&rdma->sc_rq_dto_lock);
/* Warn if we leaked a resource or under-referenced */
WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0);
+ WARN_ON(atomic_read(&rdma->sc_dma_used) != 0);
/* Destroy the QP if present (not a listener) */
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
@@ -1032,7 +981,6 @@ static void __svc_rdma_free(struct work_struct *work)
/* Destroy the CM ID */
rdma_destroy_id(rdma->sc_cm_id);
- destroy_context_cache(rdma);
kfree(rdma);
}
@@ -1132,6 +1080,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
/* Prepare SGE for local address */
+ atomic_inc(&xprt->sc_dma_used);
sge.addr = ib_dma_map_page(xprt->sc_cm_id->device,
p, 0, PAGE_SIZE, DMA_FROM_DEVICE);
sge.lkey = xprt->sc_phys_mr->lkey;