From d0687be7c7ae21461da4438d5fd059b48487bfe1 Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Fri, 3 Apr 2009 15:18:24 -0500
Subject: svcrdma: Fix dma map direction for rdma read targets

The nfs server rdma transport was mapping rdma read target pages for
TO_DEVICE instead of FROM_DEVICE.  This causes data corruption on non
cache-coherent systems if frmrs are used.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 629a28764da9..42a6f9f20285 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -265,7 +265,7 @@ static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
 		frmr->page_list->page_list[page_no] =
 			ib_dma_map_single(xprt->sc_cm_id->device,
 					  page_address(rqstp->rq_arg.pages[page_no]),
-					  PAGE_SIZE, DMA_TO_DEVICE);
+					  PAGE_SIZE, DMA_FROM_DEVICE);
 		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
 					 frmr->page_list->page_list[page_no]))
 			goto fatal_err;
-- 
cgit v1.2.3


From 21515e46bc6a6279dd13f6c01898ada9720100a3 Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Wed, 29 Apr 2009 14:14:00 -0500
Subject: svcrdma: clean up error paths.

These fixes resolved crashes due to resource leak BUG_ON checks. The
resource leaks were detected by introducing asynchronous transport errors.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/xprtrdma/svc_rdma_sendto.c    | 3 +++
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 6c26a675435a..8b510c5e8777 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -183,6 +183,7 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
 
  fatal_err:
 	printk("svcrdma: Error fast registering memory for xprt %p\n", xprt);
+	vec->frmr = NULL;
 	svc_rdma_put_frmr(xprt, frmr);
 	return -EIO;
 }
@@ -516,6 +517,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
 		       "svcrdma: could not post a receive buffer, err=%d."
 		       "Closing transport %p.\n", ret, rdma);
 		set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+		svc_rdma_put_frmr(rdma, vec->frmr);
 		svc_rdma_put_context(ctxt, 0);
 		return -ENOTCONN;
 	}
@@ -606,6 +608,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
 	return 0;
 
  err:
+	svc_rdma_unmap_dma(ctxt);
 	svc_rdma_put_frmr(rdma, vec->frmr);
 	svc_rdma_put_context(ctxt, 1);
 	return -EIO;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 3d810e7df3fb..4b0c2fa15e0b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -520,8 +520,9 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
 	svc_xprt_get(&xprt->sc_xprt);
 	ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr);
 	if (ret) {
-		svc_xprt_put(&xprt->sc_xprt);
+		svc_rdma_unmap_dma(ctxt);
 		svc_rdma_put_context(ctxt, 1);
+		svc_xprt_put(&xprt->sc_xprt);
 	}
 	return ret;
 
-- 
cgit v1.2.3


From ccecee1e5e42981f5eb37f4411e8552b9db04d30 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Fri, 1 May 2009 11:33:53 -0400
Subject: nfsd41: slots are freed with session

The session and slots are allocated all in one piece.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c65a27b76a9d..3b711f5147a7 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -580,7 +580,6 @@ free_session(struct kref *kref)
 		struct nfsd4_cache_entry *e = &ses->se_slots[i].sl_cache_entry;
 		nfsd4_release_respages(e->ce_respages, e->ce_resused);
 	}
-	kfree(ses->se_slots);
 	kfree(ses);
 }
 
-- 
cgit v1.2.3


From b2c0cea6b1cb210e962f07047df602875564069e Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 5 May 2009 19:04:29 -0400
Subject: nfsd4: check for negative dentry before use in nfsv4 readdir

After 2f9092e1020246168b1309b35e085ecd7ff9ff72 "Fix i_mutex vs.  readdir
handling in nfsd" (and 14f7dd63 "Copy XFS readdir hack into nfsd code"),
an entry may be removed between the first mutex_unlock and the second
mutex_lock. In this case, lookup_one_len() will return a negative
dentry.  Check for this case to avoid a NULL dereference.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Reviewed-by: J. R. Okajima <hooanon05@yahoo.co.jp>
Cc: stable@kernel.org
---
 fs/nfsd/nfs4xdr.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index b820c311931c..b73549d293be 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2214,6 +2214,15 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
 	dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen);
 	if (IS_ERR(dentry))
 		return nfserrno(PTR_ERR(dentry));
+	if (!dentry->d_inode) {
+		/*
+		 * nfsd_buffered_readdir drops the i_mutex between
+		 * readdir and calling this callback, leaving a window
+		 * where this directory entry could have gone away.
+		 */
+		dput(dentry);
+		return nfserr_noent;
+	}
 
 	exp_get(exp);
 	/*
@@ -2276,6 +2285,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
 	struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
 	int buflen;
 	__be32 *p = cd->buffer;
+	__be32 *cookiep;
 	__be32 nfserr = nfserr_toosmall;
 
 	/* In nfsv4, "." and ".." never make it onto the wire.. */
@@ -2292,7 +2302,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
 		goto fail;
 
 	*p++ = xdr_one;                             /* mark entry present */
-	cd->offset = p;                             /* remember pointer */
+	cookiep = p;
 	p = xdr_encode_hyper(p, NFS_OFFSET_MAX);    /* offset of next entry */
 	p = xdr_encode_array(p, name, namlen);      /* name length & name */
 
@@ -2306,6 +2316,8 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
 		goto fail;
 	case nfserr_dropit:
 		goto fail;
+	case nfserr_noent:
+		goto skip_entry;
 	default:
 		/*
 		 * If the client requested the RDATTR_ERROR attribute,
@@ -2324,6 +2336,8 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
 	}
 	cd->buflen -= (p - cd->buffer);
 	cd->buffer = p;
+	cd->offset = cookiep;
+skip_entry:
 	cd->common.err = nfs_ok;
 	return 0;
 fail:
-- 
cgit v1.2.3


From 89996df4b5b1a09c279f50b3fd03aa9df735f5cb Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 6 May 2009 16:32:54 -0400
Subject: lockd: fix list corruption on lockd restart

If lockd is signalled soon enough after restart then locks_start_grace()
will try to re-add an entry to a list and trigger a lock corruption
warning.

Thanks to Wang Chen for the problem report and diagnosis.

WARNING: at lib/list_debug.c:26 __list_add+0x27/0x5c()
...
list_add corruption. next->prev should be prev (ef8fe958), but was ef8ff128.  (next=ef8ff128).
...
Pid: 23062, comm: lockd Tainted: G        W  2.6.30-rc2 #3
Call Trace:
[<c042d5b5>] warn_slowpath+0x71/0xa0
[<c0422a96>] ? update_curr+0x11d/0x125
[<c044b12d>] ? trace_hardirqs_on_caller+0x18/0x150
[<c044b270>] ? trace_hardirqs_on+0xb/0xd
[<c051c61a>] ? _raw_spin_lock+0x53/0xfa
[<c051c89f>] __list_add+0x27/0x5c
[<ef8f6daa>] locks_start_grace+0x22/0x30 [lockd]
[<ef8f34da>] set_grace_period+0x39/0x53 [lockd]
[<c06b8921>] ? lock_kernel+0x1c/0x28
[<ef8f3558>] lockd+0x64/0x164 [lockd]
[<c044b12d>] ? trace_hardirqs_on_caller+0x18/0x150
[<c04227b0>] ? complete+0x34/0x3e
[<ef8f34f4>] ? lockd+0x0/0x164 [lockd]
[<ef8f34f4>] ? lockd+0x0/0x164 [lockd]
[<c043dd42>] kthread+0x45/0x6b
[<c043dcfd>] ? kthread+0x0/0x6b
[<c0403c23>] kernel_thread_helper+0x7/0x10

Reported-by: Wang Chen <wangchen@cn.fujitsu.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Cc: stable@kernel.org
---
 fs/lockd/svc.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index abf83881f68a..1a54ae14a192 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -104,6 +104,16 @@ static void set_grace_period(void)
 	schedule_delayed_work(&grace_period_end, grace_period);
 }
 
+static void restart_grace(void)
+{
+	if (nlmsvc_ops) {
+		cancel_delayed_work_sync(&grace_period_end);
+		locks_end_grace(&lockd_manager);
+		nlmsvc_invalidate_all();
+		set_grace_period();
+	}
+}
+
 /*
  * This is the lockd kernel thread
  */
@@ -149,10 +159,7 @@ lockd(void *vrqstp)
 
 		if (signalled()) {
 			flush_signals(current);
-			if (nlmsvc_ops) {
-				nlmsvc_invalidate_all();
-				set_grace_period();
-			}
+			restart_grace();
 			continue;
 		}
 
-- 
cgit v1.2.3


From 8daed1e549b55827758b3af7b8132a73fc51526f Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Mon, 11 May 2009 16:10:19 -0400
Subject: nfsd: silence lockdep warning

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4recover.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 5275097a7565..b5348405046b 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -229,7 +229,7 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
 		goto out;
 	status = vfs_readdir(filp, nfsd4_build_namelist, &names);
 	fput(filp);
-	mutex_lock(&dir->d_inode->i_mutex);
+	mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
 	while (!list_empty(&names)) {
 		entry = list_entry(names.next, struct name_list, list);
 
@@ -264,7 +264,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen)
 
 	dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
 
-	mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
+	mutex_lock_nested(&rec_dir.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
 	dentry = lookup_one_len(name, rec_dir.dentry, namlen);
 	if (IS_ERR(dentry)) {
 		status = PTR_ERR(dentry);
-- 
cgit v1.2.3