IB/iser: Fix RNR errors

Some users complain about RNR errors on the target, when heavy high-priority tasks run on the initiator. After the investigation, we found out that the receive WRs were exhausted, because the initiator could not post them on time. Receive work reqeusts are posted in chunks to reduce the number of hits to the HCA. The WRs are posted in the receive completion handler when the number of free receive buffers reaches the threshold. But on a high-loaded host, receive CQEs processing can be delayed and all receive WRs will be exhausted. In this case, the target will get an RNR error. To avoid this, we post receive WR, as soon as possible and not in a batch. This increases the number of hits to the HCA, but also the common implementation in most of Linux ULPs (e.g. NVMe-oF/RDMA). As a rule of thumb, performance improvements and heuristics are being added to the RDMA core layer or vendors low level drivers and it's about time to align iSER as well. Link: https://lore.kernel.org/r/20211215135721.3662-3-mgurtovoy@nvidia.com Signed-off-by: Sergey Gorenko <sergeygo@nvidia.com> Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com> Reviewed-by: Israel Rukshin <israelr@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
author: Sergey Gorenko <sergeygo@nvidia.com> 2021-12-15 15:57:17 +0200
committer: Jason Gunthorpe <jgg@nvidia.com> 2022-01-05 19:36:20 -0400
commit: 39b169ea0d36b9c445ab6849002e4edf00c7fcc1 (patch)
tree: 720c41a6fc91f371b00febe70c380062f587da49 /drivers/infiniband/ulp/iser/iser_initiator.c
parent: b28801a08924e887d7e3d33f43f510ccd12bbce8 (diff)
download: linux-39b169ea0d36b9c445ab6849002e4edf00c7fcc1.tar.bz2
1 files changed, 28 insertions, 36 deletions
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 27a6f75a9912..ca22b6d1f5e3 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -247,8 +247,6 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
 	struct iser_device *device = ib_conn->device;
 
 	iser_conn->qp_max_recv_dtos = session->cmds_max;
-	iser_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */
-	iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2;
 
 	if (iser_alloc_fastreg_pool(ib_conn, session->scsi_cmds_max,
 				    iser_conn->pages_per_mr))
@@ -280,7 +278,6 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
 		rx_sg->lkey = device->pd->local_dma_lkey;
 	}
 
-	iser_conn->rx_desc_head = 0;
 	return 0;
 
 rx_desc_dma_map_failed:
@@ -322,32 +319,35 @@ void iser_free_rx_descriptors(struct iser_conn *iser_conn)
 static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
 {
 	struct iser_conn *iser_conn = conn->dd_data;
-	struct ib_conn *ib_conn = &iser_conn->ib_conn;
 	struct iscsi_session *session = conn->session;
+	int err = 0;
+	int i;
 
 	iser_dbg("req op %x flags %x\n", req->opcode, req->flags);
 	/* check if this is the last login - going to full feature phase */
 	if ((req->flags & ISCSI_FULL_FEATURE_PHASE) != ISCSI_FULL_FEATURE_PHASE)
-		return 0;
-
-	/*
-	 * Check that there is one posted recv buffer
-	 * (for the last login response).
-	 */
-	WARN_ON(ib_conn->post_recv_buf_count != 1);
+		goto out;
 
 	if (session->discovery_sess) {
 		iser_info("Discovery session, re-using login RX buffer\n");
-		return 0;
-	} else
-		iser_info("Normal session, posting batch of RX %d buffers\n",
-			  iser_conn->min_posted_rx);
+		goto out;
+	}
 
-	/* Initial post receive buffers */
-	if (iser_post_recvm(iser_conn, iser_conn->min_posted_rx))
-		return -ENOMEM;
+	iser_info("Normal session, posting batch of RX %d buffers\n",
+		  iser_conn->qp_max_recv_dtos - 1);
 
-	return 0;
+	/*
+	 * Initial post receive buffers.
+	 * There is one already posted recv buffer (for the last login
+	 * response). Therefore, the first recv buffer is skipped here.
+	 */
+	for (i = 1; i < iser_conn->qp_max_recv_dtos; i++) {
+		err = iser_post_recvm(iser_conn, &iser_conn->rx_descs[i]);
+		if (err)
+			goto out;
+	}
+out:
+	return err;
 }
 
 static inline bool iser_signal_comp(u8 sig_count)
@@ -590,7 +590,11 @@ void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc)
 				      desc->rsp_dma, ISER_RX_LOGIN_SIZE,
 				      DMA_FROM_DEVICE);
 
-	ib_conn->post_recv_buf_count--;
+	if (iser_conn->iscsi_conn->session->discovery_sess)
+		return;
+
+	/* Post the first RX buffer that is skipped in iser_post_rx_bufs() */
+	iser_post_recvm(iser_conn, iser_conn->rx_descs);
 }
 
 static inline int
@@ -657,8 +661,7 @@ void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc)
 	struct iser_conn *iser_conn = to_iser_conn(ib_conn);
 	struct iser_rx_desc *desc = iser_rx(wc->wr_cqe);
 	struct iscsi_hdr *hdr;
-	int length;
-	int outstanding, count, err;
+	int length, err;
 
 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
 		iser_err_comp(wc, "task_rsp");
@@ -687,20 +690,9 @@ void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc)
 				      desc->dma_addr, ISER_RX_PAYLOAD_SIZE,
 				      DMA_FROM_DEVICE);
 
-	/* decrementing conn->post_recv_buf_count only --after-- freeing the   *
-	 * task eliminates the need to worry on tasks which are completed in   *
-	 * parallel to the execution of iser_conn_term. So the code that waits *
-	 * for the posted rx bufs refcount to become zero handles everything   */
-	ib_conn->post_recv_buf_count--;
-
-	outstanding = ib_conn->post_recv_buf_count;
-	if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) {
-		count = min(iser_conn->qp_max_recv_dtos - outstanding,
-			    iser_conn->min_posted_rx);
-		err = iser_post_recvm(iser_conn, count);
-		if (err)
-			iser_err("posting %d rx bufs err %d\n", count, err);
-	}
+	err = iser_post_recvm(iser_conn, desc);
+	if (err)
+		iser_err("posting rx buffer err %d\n", err);
 }
 
 void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc)
author	Sergey Gorenko <sergeygo@nvidia.com>	2021-12-15 15:57:17 +0200
committer	Jason Gunthorpe <jgg@nvidia.com>	2022-01-05 19:36:20 -0400
commit	39b169ea0d36b9c445ab6849002e4edf00c7fcc1 (patch)
tree	720c41a6fc91f371b00febe70c380062f587da49 /drivers/infiniband/ulp/iser/iser_initiator.c
parent	b28801a08924e887d7e3d33f43f510ccd12bbce8 (diff)
download	linux-39b169ea0d36b9c445ab6849002e4edf00c7fcc1.tar.bz2