ceph: stop forwarding the request when exceeding 256 times

The type of 'num_fwd' in ceph 'MClientRequestForward' is 'int32_t', while in 'ceph_mds_request_head' the type is '__u8'. So in case the request bounces between MDSes exceeding 256 times, the client will get stuck. In this case it's ususally a bug in MDS and continue bouncing the request makes no sense. URL: https://tracker.ceph.com/issues/55130 Signed-off-by: Xiubo Li <xiubli@redhat.com> Reviewed-by: Jeff Layton <jlayton@kernel.org> Reviewed-by: Luís Henriques <lhenriques@suse.de> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
author: Xiubo Li <xiubli@redhat.com> 2022-03-29 12:48:01 +0800
committer: Ilya Dryomov <idryomov@gmail.com> 2022-05-25 20:45:13 +0200
commit: 1980b1bf17a4975fee5ee42df167f50f7f67b3d9 (patch)
tree: 85ccf90fb9122521b686bc00e4e11a710da75f31 /fs/ceph
parent: 6c1dc50284c4dab9f1e563efba6f9c4a47af894a (diff)
download: linux-1980b1bf17a4975fee5ee42df167f50f7f67b3d9.tar.bz2
1 files changed, 34 insertions, 5 deletions
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 7b9a16dc8353..75506ac30307 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3265,6 +3265,7 @@ static void handle_forward(struct ceph_mds_client *mdsc,
 	int err = -EINVAL;
 	void *p = msg->front.iov_base;
 	void *end = p + msg->front.iov_len;
+	bool aborted = false;
 
 	ceph_decode_need(&p, end, 2*sizeof(u32), bad);
 	next_mds = ceph_decode_32(&p);
@@ -3273,16 +3274,41 @@ static void handle_forward(struct ceph_mds_client *mdsc,
 	mutex_lock(&mdsc->mutex);
 	req = lookup_get_request(mdsc, tid);
 	if (!req) {
+		mutex_unlock(&mdsc->mutex);
 		dout("forward tid %llu to mds%d - req dne\n", tid, next_mds);
-		goto out;  /* dup reply? */
+		return;  /* dup reply? */
 	}
 
 	if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
 		dout("forward tid %llu aborted, unregistering\n", tid);
 		__unregister_request(mdsc, req);
 	} else if (fwd_seq <= req->r_num_fwd) {
-		dout("forward tid %llu to mds%d - old seq %d <= %d\n",
-		     tid, next_mds, req->r_num_fwd, fwd_seq);
+		/*
+		 * The type of 'num_fwd' in ceph 'MClientRequestForward'
+		 * is 'int32_t', while in 'ceph_mds_request_head' the
+		 * type is '__u8'. So in case the request bounces between
+		 * MDSes exceeding 256 times, the client will get stuck.
+		 *
+		 * In this case it's ususally a bug in MDS and continue
+		 * bouncing the request makes no sense.
+		 *
+		 * In future this could be fixed in ceph code, so avoid
+		 * using the hardcode here.
+		 */
+		int max = sizeof_field(struct ceph_mds_request_head, num_fwd);
+		max = 1 << (max * BITS_PER_BYTE);
+		if (req->r_num_fwd >= max) {
+			mutex_lock(&req->r_fill_mutex);
+			req->r_err = -EMULTIHOP;
+			set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
+			mutex_unlock(&req->r_fill_mutex);
+			aborted = true;
+			pr_warn_ratelimited("forward tid %llu seq overflow\n",
+					    tid);
+		} else {
+			dout("forward tid %llu to mds%d - old seq %d <= %d\n",
+			     tid, next_mds, req->r_num_fwd, fwd_seq);
+		}
 	} else {
 		/* resend. forward race not possible; mds would drop */
 		dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds);
@@ -3294,9 +3320,12 @@ static void handle_forward(struct ceph_mds_client *mdsc,
 		put_request_session(req);
 		__do_request(mdsc, req);
 	}
-	ceph_mdsc_put_request(req);
-out:
 	mutex_unlock(&mdsc->mutex);
+
+	/* kick calling process */
+	if (aborted)
+		complete_request(mdsc, req);
+	ceph_mdsc_put_request(req);
 	return;
 
 bad:
author	Xiubo Li <xiubli@redhat.com>	2022-03-29 12:48:01 +0800
committer	Ilya Dryomov <idryomov@gmail.com>	2022-05-25 20:45:13 +0200
commit	1980b1bf17a4975fee5ee42df167f50f7f67b3d9 (patch)
tree	85ccf90fb9122521b686bc00e4e11a710da75f31 /fs/ceph
parent	6c1dc50284c4dab9f1e563efba6f9c4a47af894a (diff)
download	linux-1980b1bf17a4975fee5ee42df167f50f7f67b3d9.tar.bz2