From 71d236399160ad9beaae7267b93d2d487e8f19a0 Mon Sep 17 00:00:00 2001 From: "yangx.jy@fujitsu.com" Date: Fri, 21 Oct 2022 13:45:17 +0000 Subject: RDMA/rxe: Remove the member 'type' of struct rxe_mr The member 'type' is included in both struct rxe_mr and struct ib_mr so remove the duplicate one of struct rxe_mr. Signed-off-by: Xiao Yang Link: https://lore.kernel.org/r/20221021134513.17730-1-yangx.jy@fujitsu.com Reviewed-by: Bob Pearson Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_mr.c | 16 ++++++++-------- drivers/infiniband/sw/rxe/rxe_verbs.h | 1 - 2 files changed, 8 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 502e9ada99b3..d4f10c2d1aa7 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -26,7 +26,7 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) { - switch (mr->type) { + switch (mr->ibmr.type) { case IB_MR_TYPE_DMA: return 0; @@ -39,7 +39,7 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) default: pr_warn("%s: mr type (%d) not supported\n", - __func__, mr->type); + __func__, mr->ibmr.type); return -EFAULT; } } @@ -109,7 +109,7 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr) mr->access = access; mr->state = RXE_MR_STATE_VALID; - mr->type = IB_MR_TYPE_DMA; + mr->ibmr.type = IB_MR_TYPE_DMA; } int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, @@ -178,7 +178,7 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, mr->access = access; mr->offset = ib_umem_offset(umem); mr->state = RXE_MR_STATE_VALID; - mr->type = IB_MR_TYPE_USER; + mr->ibmr.type = IB_MR_TYPE_USER; return 0; @@ -205,7 +205,7 @@ int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr) mr->max_buf = max_pages; mr->state = RXE_MR_STATE_FREE; - mr->type = IB_MR_TYPE_MEM_REG; + mr->ibmr.type = IB_MR_TYPE_MEM_REG; return 0; @@ -304,7 +304,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, if (length == 0) return 0; - if (mr->type == IB_MR_TYPE_DMA) { + if (mr->ibmr.type == IB_MR_TYPE_DMA) { u8 *src, *dest; src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova); @@ -547,8 +547,8 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 key) goto err_drop_ref; } - if (unlikely(mr->type != IB_MR_TYPE_MEM_REG)) { - pr_warn("%s: mr->type (%d) is wrong type\n", __func__, mr->type); + if (unlikely(mr->ibmr.type != IB_MR_TYPE_MEM_REG)) { + pr_warn("%s: mr type (%d) is wrong\n", __func__, mr->ibmr.type); ret = -EINVAL; goto err_drop_ref; } diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 5f5cbfcb3569..22a299b0a9f0 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -304,7 +304,6 @@ struct rxe_mr { u32 lkey; u32 rkey; enum rxe_mr_state state; - enum ib_mr_type type; u32 offset; int access; -- cgit v1.2.3 From 5ebc548f4f54fe971d741d80cd108f1a45c9e88d Mon Sep 17 00:00:00 2001 From: Daisuke Matsuda Date: Thu, 13 Oct 2022 10:47:23 +0900 Subject: RDMA/rxe: Make responder handle RDMA Read failures Currently, responder can reply packets with invalid payloads if it fails to copy messages to the packets. Add an error handling in read_reply() to inform a requesting node of the failure. Link: https://lore.kernel.org/r/20221013014724.3786212-1-matsuda-daisuke@fujitsu.com Suggested-by: Li Zhijian Signed-off-by: Daisuke Matsuda Reviewed-by: Li Zhijian Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_resp.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index ed5a09e86417..82b74e926e09 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -809,10 +809,14 @@ static enum resp_states read_reply(struct rxe_qp *qp, if (!skb) return RESPST_ERR_RNR; - rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt), - payload, RXE_FROM_MR_OBJ); + err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt), + payload, RXE_FROM_MR_OBJ); if (mr) rxe_put(mr); + if (err) { + kfree_skb(skb); + return RESPST_ERR_RKEY_VIOLATION; + } if (bth_pad(&ack_pkt)) { u8 *pad = payload_addr(&ack_pkt) + payload; -- cgit v1.2.3 From 5ac814e02ece516761d2e244cef93843df911ae0 Mon Sep 17 00:00:00 2001 From: Daisuke Matsuda Date: Thu, 13 Oct 2022 10:47:24 +0900 Subject: RDMA/rxe: Handle remote errors in the midst of a Read reply sequence Requesting nodes do not handle a reported error correctly if it is generated in the middle of multi-packet Read responses, and the node tries to resend the request endlessly. Let completer terminate the connection in that case. Link: https://lore.kernel.org/r/20221013014724.3786212-2-matsuda-daisuke@fujitsu.com Signed-off-by: Daisuke Matsuda Reviewed-by: Li Zhijian Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_comp.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index fb0c008af78c..c9170dd99f3a 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -200,6 +200,10 @@ static inline enum comp_state check_psn(struct rxe_qp *qp, */ if (pkt->psn == wqe->last_psn) return COMPST_COMP_ACK; + else if (pkt->opcode == IB_OPCODE_RC_ACKNOWLEDGE && + (qp->comp.opcode == IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST || + qp->comp.opcode == IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE)) + return COMPST_CHECK_ACK; else return COMPST_DONE; } else if ((diff > 0) && (wqe->mask & WR_ATOMIC_OR_READ_MASK)) { @@ -228,6 +232,10 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST: case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: + /* Check NAK code to handle a remote error */ + if (pkt->opcode == IB_OPCODE_RC_ACKNOWLEDGE) + break; + if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE && pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) { /* read retries of partial data may restart from -- cgit v1.2.3 From 686d348476ee8006087cfcbef591e28f4f91bd8b Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Mon, 24 Oct 2022 03:31:54 +0000 Subject: RDMA/rxe: Remove unnecessary mr testing Before the testing, we already passed it to rxe_mr_copy() where mr could be dereferenced. so this checking is not needed. The only way that mr is NULL is when it reaches below line 780 with 'qp->resp.mr = NULL', which is not possible in Bob's explanation[1]. 778 if (res->state == rdatm_res_state_new) { 779 if (!res->replay) { 780 mr = qp->resp.mr; 781 qp->resp.mr = NULL; 782 } else { [1] https://lore.kernel.org/lkml/30ff25c4-ce66-eac4-eaa2-64c0db203a19@gmail.com/ Link: https://lore.kernel.org/r/1666582315-2-1-git-send-email-lizhijian@fujitsu.com CC: Bob Pearson Signed-off-by: Li Zhijian Reviewed-by: Bob Pearson Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_resp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 82b74e926e09..95d372db934d 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -811,8 +811,7 @@ static enum resp_states read_reply(struct rxe_qp *qp, err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt), payload, RXE_FROM_MR_OBJ); - if (mr) - rxe_put(mr); + rxe_put(mr); if (err) { kfree_skb(skb); return RESPST_ERR_RKEY_VIOLATION; -- cgit v1.2.3 From 05e88ebb9ecfe9631ccc6483a79b0eabf554da60 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Fri, 21 Oct 2022 15:01:02 -0500 Subject: RDMA/rxe: Remove redundant header files Remove unneeded include files. Link: https://lore.kernel.org/r/20221021200118.2163-2-rpearsonhpe@gmail.com Signed-off-by: Ian Ziemba Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_task.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index ec2b7de1c497..3fbaba9eec39 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -4,10 +4,6 @@ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. */ -#include -#include -#include - #include "rxe.h" int __rxe_do_task(struct rxe_task *task) -- cgit v1.2.3 From 98a54f170617746b5d09b18b23b295efc7a42a5e Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Fri, 21 Oct 2022 15:01:03 -0500 Subject: RDMA/rxe: Remove init of task locks from rxe_qp.c The calls to spin_lock_init() for the tasklet spinlocks in rxe_qp_init_misc() are redundant since they are intiialized in rxe_init_task(). This patch removes them. Link: https://lore.kernel.org/r/20221021200118.2163-3-rpearsonhpe@gmail.com Signed-off-by: Ian Ziemba Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_qp.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index a62bab88415c..57c3f05ad15b 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -172,10 +172,6 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp, spin_lock_init(&qp->state_lock); - spin_lock_init(&qp->req.task.state_lock); - spin_lock_init(&qp->resp.task.state_lock); - spin_lock_init(&qp->comp.task.state_lock); - spin_lock_init(&qp->sq.sq_lock); spin_lock_init(&qp->rq.producer_lock); spin_lock_init(&qp->rq.consumer_lock); -- cgit v1.2.3 From de669ae8af49ceed0eed44f5b3d51dc62affc5e4 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Fri, 21 Oct 2022 15:01:04 -0500 Subject: RDMA/rxe: Removed unused name from rxe_task struct The name field in struct rxe_task is never used. This patch removes it. Link: https://lore.kernel.org/r/20221021200118.2163-4-rpearsonhpe@gmail.com Signed-off-by: Ian Ziemba Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_qp.c | 9 +++------ drivers/infiniband/sw/rxe/rxe_task.c | 4 +--- drivers/infiniband/sw/rxe/rxe_task.h | 4 +--- 3 files changed, 5 insertions(+), 12 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 57c3f05ad15b..03bd9f3e9956 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -238,10 +238,8 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, skb_queue_head_init(&qp->req_pkts); - rxe_init_task(&qp->req.task, qp, - rxe_requester, "req"); - rxe_init_task(&qp->comp.task, qp, - rxe_completer, "comp"); + rxe_init_task(&qp->req.task, qp, rxe_requester); + rxe_init_task(&qp->comp.task, qp, rxe_completer); qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */ if (init->qp_type == IB_QPT_RC) { @@ -288,8 +286,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, skb_queue_head_init(&qp->resp_pkts); - rxe_init_task(&qp->resp.task, qp, - rxe_responder, "resp"); + rxe_init_task(&qp->resp.task, qp, rxe_responder); qp->resp.opcode = OPCODE_NONE; qp->resp.msn = 0; diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index 3fbaba9eec39..0cbba455fefd 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -90,12 +90,10 @@ void rxe_do_task(struct tasklet_struct *t) task->ret = ret; } -int rxe_init_task(struct rxe_task *task, - void *arg, int (*func)(void *), char *name) +int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *)) { task->arg = arg; task->func = func; - snprintf(task->name, sizeof(task->name), "%s", name); task->destroyed = false; tasklet_setup(&task->tasklet, rxe_do_task); diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h index 7f612a1c68a7..b3dfd970d1dc 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.h +++ b/drivers/infiniband/sw/rxe/rxe_task.h @@ -25,7 +25,6 @@ struct rxe_task { void *arg; int (*func)(void *arg); int ret; - char name[16]; bool destroyed; }; @@ -34,8 +33,7 @@ struct rxe_task { * arg => parameter to pass to fcn * func => function to call until it returns != 0 */ -int rxe_init_task(struct rxe_task *task, - void *arg, int (*func)(void *), char *name); +int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *)); /* cleanup task */ void rxe_cleanup_task(struct rxe_task *task); -- cgit v1.2.3 From dccb23f6c312e4480fe32ccbc2afac1a5cac7e5e Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Fri, 21 Oct 2022 15:01:05 -0500 Subject: RDMA/rxe: Split rxe_run_task() into two subroutines Split rxe_run_task(task, sched) into rxe_run_task(task) and rxe_sched_task(task). Link: https://lore.kernel.org/r/20221021200118.2163-5-rpearsonhpe@gmail.com Signed-off-by: Ian Ziemba Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_comp.c | 19 +++++++++++-------- drivers/infiniband/sw/rxe/rxe_net.c | 4 ++-- drivers/infiniband/sw/rxe/rxe_qp.c | 10 +++++----- drivers/infiniband/sw/rxe/rxe_req.c | 10 +++++----- drivers/infiniband/sw/rxe/rxe_resp.c | 5 ++++- drivers/infiniband/sw/rxe/rxe_task.c | 15 ++++++++++----- drivers/infiniband/sw/rxe/rxe_task.h | 7 +++---- drivers/infiniband/sw/rxe/rxe_verbs.c | 8 ++++---- 8 files changed, 44 insertions(+), 34 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index c9170dd99f3a..66f392810c86 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -118,7 +118,7 @@ void retransmit_timer(struct timer_list *t) if (qp->valid) { qp->comp.timeout = 1; - rxe_run_task(&qp->comp.task, 1); + rxe_sched_task(&qp->comp.task); } } @@ -132,7 +132,10 @@ void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) if (must_sched != 0) rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_COMPLETER_SCHED); - rxe_run_task(&qp->comp.task, must_sched); + if (must_sched) + rxe_sched_task(&qp->comp.task); + else + rxe_run_task(&qp->comp.task); } static inline enum comp_state get_wqe(struct rxe_qp *qp, @@ -313,7 +316,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, qp->comp.psn = pkt->psn; if (qp->req.wait_psn) { qp->req.wait_psn = 0; - rxe_run_task(&qp->req.task, 0); + rxe_run_task(&qp->req.task); } } return COMPST_ERROR_RETRY; @@ -460,7 +463,7 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) */ if (qp->req.wait_fence) { qp->req.wait_fence = 0; - rxe_run_task(&qp->req.task, 0); + rxe_run_task(&qp->req.task); } } @@ -474,7 +477,7 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp, if (qp->req.need_rd_atomic) { qp->comp.timeout_retry = 0; qp->req.need_rd_atomic = 0; - rxe_run_task(&qp->req.task, 0); + rxe_run_task(&qp->req.task); } } @@ -520,7 +523,7 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp, if (qp->req.wait_psn) { qp->req.wait_psn = 0; - rxe_run_task(&qp->req.task, 1); + rxe_sched_task(&qp->req.task); } } @@ -654,7 +657,7 @@ int rxe_completer(void *arg) if (qp->req.wait_psn) { qp->req.wait_psn = 0; - rxe_run_task(&qp->req.task, 1); + rxe_sched_task(&qp->req.task); } state = COMPST_DONE; @@ -722,7 +725,7 @@ int rxe_completer(void *arg) RXE_CNT_COMP_RETRY); qp->req.need_retry = 1; qp->comp.started_retry = 1; - rxe_run_task(&qp->req.task, 0); + rxe_run_task(&qp->req.task); } goto done; diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 35f327b9d4b8..c36cad9c7a66 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -345,7 +345,7 @@ static void rxe_skb_tx_dtor(struct sk_buff *skb) if (unlikely(qp->need_req_skb && skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW)) - rxe_run_task(&qp->req.task, 1); + rxe_sched_task(&qp->req.task); rxe_put(qp); } @@ -429,7 +429,7 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt, if ((qp_type(qp) != IB_QPT_RC) && (pkt->mask & RXE_END_MASK)) { pkt->wqe->state = wqe_state_done; - rxe_run_task(&qp->comp.task, 1); + rxe_sched_task(&qp->comp.task); } rxe_counter_inc(rxe, RXE_CNT_SENT_PKTS); diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 03bd9f3e9956..3f6d62a80bea 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -536,10 +536,10 @@ static void rxe_qp_drain(struct rxe_qp *qp) if (qp->req.state != QP_STATE_DRAINED) { qp->req.state = QP_STATE_DRAIN; if (qp_type(qp) == IB_QPT_RC) - rxe_run_task(&qp->comp.task, 1); + rxe_sched_task(&qp->comp.task); else __rxe_do_task(&qp->comp.task); - rxe_run_task(&qp->req.task, 1); + rxe_sched_task(&qp->req.task); } } } @@ -553,13 +553,13 @@ void rxe_qp_error(struct rxe_qp *qp) qp->attr.qp_state = IB_QPS_ERR; /* drain work and packet queues */ - rxe_run_task(&qp->resp.task, 1); + rxe_sched_task(&qp->resp.task); if (qp_type(qp) == IB_QPT_RC) - rxe_run_task(&qp->comp.task, 1); + rxe_sched_task(&qp->comp.task); else __rxe_do_task(&qp->comp.task); - rxe_run_task(&qp->req.task, 1); + rxe_sched_task(&qp->req.task); } /* called by the modify qp verb */ diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index f63771207970..41f1d84f0acb 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -105,7 +105,7 @@ void rnr_nak_timer(struct timer_list *t) /* request a send queue retry */ qp->req.need_retry = 1; qp->req.wait_for_rnr_timer = 0; - rxe_run_task(&qp->req.task, 1); + rxe_sched_task(&qp->req.task); } static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp) @@ -608,7 +608,7 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe) * which can lead to a deadlock. So go ahead and complete * it now. */ - rxe_run_task(&qp->comp.task, 1); + rxe_sched_task(&qp->comp.task); return 0; } @@ -733,7 +733,7 @@ int rxe_requester(void *arg) qp->req.wqe_index); wqe->state = wqe_state_done; wqe->status = IB_WC_SUCCESS; - rxe_run_task(&qp->comp.task, 0); + rxe_run_task(&qp->comp.task); goto done; } payload = mtu; @@ -795,7 +795,7 @@ int rxe_requester(void *arg) rollback_state(wqe, qp, &rollback_wqe, rollback_psn); if (err == -EAGAIN) { - rxe_run_task(&qp->req.task, 1); + rxe_sched_task(&qp->req.task); goto exit; } @@ -817,7 +817,7 @@ err: qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index); wqe->state = wqe_state_error; qp->req.state = QP_STATE_ERROR; - rxe_run_task(&qp->comp.task, 0); + rxe_run_task(&qp->comp.task); exit: ret = -EAGAIN; out: diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 95d372db934d..c32bc12cc82f 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -91,7 +91,10 @@ void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) || (skb_queue_len(&qp->req_pkts) > 1); - rxe_run_task(&qp->resp.task, must_sched); + if (must_sched) + rxe_sched_task(&qp->resp.task); + else + rxe_run_task(&qp->resp.task); } static inline enum resp_states get_req(struct rxe_qp *qp, diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index 0cbba455fefd..442b7348acdc 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -123,15 +123,20 @@ void rxe_cleanup_task(struct rxe_task *task) tasklet_kill(&task->tasklet); } -void rxe_run_task(struct rxe_task *task, int sched) +void rxe_run_task(struct rxe_task *task) { if (task->destroyed) return; - if (sched) - tasklet_schedule(&task->tasklet); - else - rxe_do_task(&task->tasklet); + rxe_do_task(&task->tasklet); +} + +void rxe_sched_task(struct rxe_task *task) +{ + if (task->destroyed) + return; + + tasklet_schedule(&task->tasklet); } void rxe_disable_task(struct rxe_task *task) diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h index b3dfd970d1dc..590b1c1d7e7c 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.h +++ b/drivers/infiniband/sw/rxe/rxe_task.h @@ -52,10 +52,9 @@ int __rxe_do_task(struct rxe_task *task); */ void rxe_do_task(struct tasklet_struct *t); -/* run a task, else schedule it to run as a tasklet, The decision - * to run or schedule tasklet is based on the parameter sched. - */ -void rxe_run_task(struct rxe_task *task, int sched); +void rxe_run_task(struct rxe_task *task); + +void rxe_sched_task(struct rxe_task *task); /* keep a task from scheduling */ void rxe_disable_task(struct rxe_task *task); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 88825edc7dce..f2f82efbaf6d 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -695,9 +695,9 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr, wr = next; } - rxe_run_task(&qp->req.task, 1); + rxe_sched_task(&qp->req.task); if (unlikely(qp->req.state == QP_STATE_ERROR)) - rxe_run_task(&qp->comp.task, 1); + rxe_sched_task(&qp->comp.task); return err; } @@ -719,7 +719,7 @@ static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, if (qp->is_user) { /* Utilize process context to do protocol processing */ - rxe_run_task(&qp->req.task, 0); + rxe_run_task(&qp->req.task); return 0; } else return rxe_post_send_kernel(qp, wr, bad_wr); @@ -759,7 +759,7 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, spin_unlock_irqrestore(&rq->producer_lock, flags); if (qp->resp.state == QP_STATE_ERROR) - rxe_run_task(&qp->resp.task, 1); + rxe_sched_task(&qp->resp.task); err1: return err; -- cgit v1.2.3 From dcef28528cce82a82134abd393aa0f38f2edf77e Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Fri, 21 Oct 2022 15:01:06 -0500 Subject: RDMA/rxe: Make rxe_do_task static The subroutine rxe_do_task() is only called in rxe_task.c. This patch makes it static and renames it do_task(). Link: https://lore.kernel.org/r/20221021200118.2163-6-rpearsonhpe@gmail.com Signed-off-by: Ian Ziemba Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_task.c | 6 +++--- drivers/infiniband/sw/rxe/rxe_task.h | 8 -------- 2 files changed, 3 insertions(+), 11 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index 442b7348acdc..fb953f5195b8 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -24,7 +24,7 @@ int __rxe_do_task(struct rxe_task *task) * a second caller finds the task already running * but looks just after the last call to func */ -void rxe_do_task(struct tasklet_struct *t) +static void do_task(struct tasklet_struct *t) { int cont; int ret; @@ -96,7 +96,7 @@ int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *)) task->func = func; task->destroyed = false; - tasklet_setup(&task->tasklet, rxe_do_task); + tasklet_setup(&task->tasklet, do_task); task->state = TASK_STATE_START; spin_lock_init(&task->state_lock); @@ -128,7 +128,7 @@ void rxe_run_task(struct rxe_task *task) if (task->destroyed) return; - rxe_do_task(&task->tasklet); + do_task(&task->tasklet); } void rxe_sched_task(struct rxe_task *task) diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h index 590b1c1d7e7c..99e0173e5c46 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.h +++ b/drivers/infiniband/sw/rxe/rxe_task.h @@ -44,14 +44,6 @@ void rxe_cleanup_task(struct rxe_task *task); */ int __rxe_do_task(struct rxe_task *task); -/* - * common function called by any of the main tasklets - * If there is any chance that there is additional - * work to do someone must reschedule the task before - * leaving - */ -void rxe_do_task(struct tasklet_struct *t); - void rxe_run_task(struct rxe_task *task); void rxe_sched_task(struct rxe_task *task); -- cgit v1.2.3 From 63a18baef2653f59a7c5b990283628bd54d062fd Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Fri, 21 Oct 2022 15:01:07 -0500 Subject: RDMA/rxe: Rename task->state_lock to task->lock Rename task-state_lock to task->lock Link: https://lore.kernel.org/r/20221021200118.2163-7-rpearsonhpe@gmail.com Signed-off-by: Ian Ziemba Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_task.c | 18 +++++++++--------- drivers/infiniband/sw/rxe/rxe_task.h | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index fb953f5195b8..0208d833a41b 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -31,22 +31,22 @@ static void do_task(struct tasklet_struct *t) struct rxe_task *task = from_tasklet(task, t, tasklet); unsigned int iterations = RXE_MAX_ITERATIONS; - spin_lock_bh(&task->state_lock); + spin_lock_bh(&task->lock); switch (task->state) { case TASK_STATE_START: task->state = TASK_STATE_BUSY; - spin_unlock_bh(&task->state_lock); + spin_unlock_bh(&task->lock); break; case TASK_STATE_BUSY: task->state = TASK_STATE_ARMED; fallthrough; case TASK_STATE_ARMED: - spin_unlock_bh(&task->state_lock); + spin_unlock_bh(&task->lock); return; default: - spin_unlock_bh(&task->state_lock); + spin_unlock_bh(&task->lock); pr_warn("%s failed with bad state %d\n", __func__, task->state); return; } @@ -55,7 +55,7 @@ static void do_task(struct tasklet_struct *t) cont = 0; ret = task->func(task->arg); - spin_lock_bh(&task->state_lock); + spin_lock_bh(&task->lock); switch (task->state) { case TASK_STATE_BUSY: if (ret) { @@ -84,7 +84,7 @@ static void do_task(struct tasklet_struct *t) pr_warn("%s failed with bad state %d\n", __func__, task->state); } - spin_unlock_bh(&task->state_lock); + spin_unlock_bh(&task->lock); } while (cont); task->ret = ret; @@ -99,7 +99,7 @@ int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *)) tasklet_setup(&task->tasklet, do_task); task->state = TASK_STATE_START; - spin_lock_init(&task->state_lock); + spin_lock_init(&task->lock); return 0; } @@ -115,9 +115,9 @@ void rxe_cleanup_task(struct rxe_task *task) task->destroyed = true; do { - spin_lock_bh(&task->state_lock); + spin_lock_bh(&task->lock); idle = (task->state == TASK_STATE_START); - spin_unlock_bh(&task->state_lock); + spin_unlock_bh(&task->lock); } while (!idle); tasklet_kill(&task->tasklet); diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h index 99e0173e5c46..7b88129702ac 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.h +++ b/drivers/infiniband/sw/rxe/rxe_task.h @@ -21,7 +21,7 @@ enum { struct rxe_task { struct tasklet_struct tasklet; int state; - spinlock_t state_lock; /* spinlock for task state */ + spinlock_t lock; void *arg; int (*func)(void *arg); int ret; -- cgit v1.2.3 From 875ab4a8d9a7e559c4aaad28f5886d39923301b7 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Tue, 27 Sep 2022 13:53:27 +0800 Subject: RDMA/rxe: Make sure requested access is a subset of {mr,mw}->access We should reject the requests with access flags that is not registered by MR/MW. For example, lookup_mr() should return NULL when requested access is 0x03 and mr->access is 0x01. Link: https://lore.kernel.org/r/20220927055337.22630-2-lizhijian@fujitsu.com Signed-off-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mr.c | 2 +- drivers/infiniband/sw/rxe/rxe_mw.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index d4f10c2d1aa7..014c27bba049 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -511,7 +511,7 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key, if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) || (type == RXE_LOOKUP_REMOTE && mr->rkey != key) || - mr_pd(mr) != pd || (access && !(access & mr->access)) || + mr_pd(mr) != pd || ((access & mr->access) != access) || mr->state != RXE_MR_STATE_VALID)) { rxe_put(mr); mr = NULL; diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c index 902b7df7aaed..8df1c9066ed8 100644 --- a/drivers/infiniband/sw/rxe/rxe_mw.c +++ b/drivers/infiniband/sw/rxe/rxe_mw.c @@ -293,8 +293,7 @@ struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey) if (unlikely((mw->rkey != rkey) || rxe_mw_pd(mw) != pd || (mw->ibmw.type == IB_MW_TYPE_2 && mw->qp != qp) || - (mw->length == 0) || - (access && !(access & mw->access)) || + (mw->length == 0) || ((access & mw->access) != access) || mw->state != RXE_MW_STATE_VALID)) { rxe_put(mw); return NULL; -- cgit v1.2.3 From b071850ef62e36b2fc2ec81863f07be857151409 Mon Sep 17 00:00:00 2001 From: Xiao Yang Date: Thu, 27 Oct 2022 07:31:33 +0000 Subject: RDMA/rxe: Remove the duplicate assignment of mr->map_shift mr->map_shift is set to ilog2(RXE_BUF_PER_MAP) in both rxe_mr_init() and rxe_mr_alloc() so remove the duplicate one in rxe_mr_init(). Link: https://lore.kernel.org/r/1666855893-145-1-git-send-email-yangx.jy@fujitsu.com Signed-off-by: Xiao Yang Reviewed-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mr.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 014c27bba049..bc081002bddc 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -62,7 +62,6 @@ static void rxe_mr_init(int access, struct rxe_mr *mr) mr->rkey = mr->ibmr.rkey = rkey; mr->state = RXE_MR_STATE_INVALID; - mr->map_shift = ilog2(RXE_BUF_PER_MAP); } static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf) -- cgit v1.2.3 From 692373d186205dfb1b56f35f22702412d94d9420 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Fri, 28 Oct 2022 15:50:53 +0800 Subject: RDMA/rxe: cleanup some error handling in rxe_verbs.c Instead of 'goto and return', just return directly to simplify the error handling, and avoid some unnecessary return value check. Link: https://lore.kernel.org/r/20221028075053.3990467-1-xuhaoyue1@hisilicon.com Signed-off-by: Yunsheng Lin Signed-off-by: Haoyue Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_verbs.c | 80 ++++++++++------------------------- 1 file changed, 23 insertions(+), 57 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index f2f82efbaf6d..bcdfdadaebbc 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -238,7 +238,6 @@ static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags) static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) { - int err; int i; u32 length; struct rxe_recv_wqe *recv_wqe; @@ -246,15 +245,11 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) int full; full = queue_full(rq->queue, QUEUE_TYPE_TO_DRIVER); - if (unlikely(full)) { - err = -ENOMEM; - goto err1; - } + if (unlikely(full)) + return -ENOMEM; - if (unlikely(num_sge > rq->max_sge)) { - err = -EINVAL; - goto err1; - } + if (unlikely(num_sge > rq->max_sge)) + return -EINVAL; length = 0; for (i = 0; i < num_sge; i++) @@ -275,9 +270,6 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) queue_advance_producer(rq->queue, QUEUE_TYPE_TO_DRIVER); return 0; - -err1: - return err; } static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init, @@ -343,10 +335,7 @@ static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, if (err) return err; - err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata); - if (err) - return err; - return 0; + return rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata); } static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) @@ -453,11 +442,11 @@ static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, err = rxe_qp_chk_attr(rxe, qp, attr, mask); if (err) - goto err1; + return err; err = rxe_qp_from_attr(qp, attr, mask, udata); if (err) - goto err1; + return err; if ((mask & IB_QP_AV) && (attr->ah_attr.ah_flags & IB_AH_GRH)) qp->src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label, @@ -465,9 +454,6 @@ static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, qp->attr.dest_qp_num); return 0; - -err1: - return err; } static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, @@ -501,24 +487,21 @@ static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr, struct rxe_sq *sq = &qp->sq; if (unlikely(num_sge > sq->max_sge)) - goto err1; + return -EINVAL; if (unlikely(mask & WR_ATOMIC_MASK)) { if (length < 8) - goto err1; + return -EINVAL; if (atomic_wr(ibwr)->remote_addr & 0x7) - goto err1; + return -EINVAL; } if (unlikely((ibwr->send_flags & IB_SEND_INLINE) && (length > sq->max_inline))) - goto err1; + return -EINVAL; return 0; - -err1: - return -EINVAL; } static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, @@ -735,14 +718,12 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) { *bad_wr = wr; - err = -EINVAL; - goto err1; + return -EINVAL; } if (unlikely(qp->srq)) { *bad_wr = wr; - err = -EINVAL; - goto err1; + return -EINVAL; } spin_lock_irqsave(&rq->producer_lock, flags); @@ -761,7 +742,6 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, if (qp->resp.state == QP_STATE_ERROR) rxe_sched_task(&qp->resp.task); -err1: return err; } @@ -826,16 +806,9 @@ static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) err = rxe_cq_chk_attr(rxe, cq, cqe, 0); if (err) - goto err1; - - err = rxe_cq_resize_queue(cq, cqe, uresp, udata); - if (err) - goto err1; - - return 0; + return err; -err1: - return err; + return rxe_cq_resize_queue(cq, cqe, uresp, udata); } static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) @@ -921,26 +894,22 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, struct rxe_mr *mr; mr = rxe_alloc(&rxe->mr_pool); - if (!mr) { - err = -ENOMEM; - goto err2; - } - + if (!mr) + return ERR_PTR(-ENOMEM); rxe_get(pd); mr->ibmr.pd = ibpd; err = rxe_mr_init_user(rxe, start, length, iova, access, mr); if (err) - goto err3; + goto err1; rxe_finalize(mr); return &mr->ibmr; -err3: +err1: rxe_cleanup(mr); -err2: return ERR_PTR(err); } @@ -956,25 +925,22 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, return ERR_PTR(-EINVAL); mr = rxe_alloc(&rxe->mr_pool); - if (!mr) { - err = -ENOMEM; - goto err1; - } + if (!mr) + return ERR_PTR(-ENOMEM); rxe_get(pd); mr->ibmr.pd = ibpd; err = rxe_mr_init_fast(max_num_sg, mr); if (err) - goto err2; + goto err1; rxe_finalize(mr); return &mr->ibmr; -err2: - rxe_cleanup(mr); err1: + rxe_cleanup(mr); return ERR_PTR(err); } -- cgit v1.2.3 From bdf1da5df9da680589a7f74448dd0a94dd3e1446 Mon Sep 17 00:00:00 2001 From: Bernard Metzler Date: Mon, 7 Nov 2022 15:50:57 +0100 Subject: RDMA/siw: Fix immediate work request flush to completion queue Correctly set send queue element opcode during immediate work request flushing in post sendqueue operation, if the QP is in ERROR state. An undefined ocode value results in out-of-bounds access to an array for mapping the opcode between siw internal and RDMA core representation in work completion generation. It resulted in a KASAN BUG report of type 'global-out-of-bounds' during NFSoRDMA testing. This patch further fixes a potential case of a malicious user which may write undefined values for completion queue elements status or opcode, if the CQ is memory mapped to user land. It avoids the same out-of-bounds access to arrays for status and opcode mapping as described above. Fixes: 303ae1cdfdf7 ("rdma/siw: application interface") Fixes: b0fff7317bb4 ("rdma/siw: completion queue methods") Reported-by: Olga Kornievskaia Reviewed-by: Tom Talpey Signed-off-by: Bernard Metzler Link: https://lore.kernel.org/r/20221107145057.895747-1-bmt@zurich.ibm.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw_cq.c | 24 +++++++++++++++++++-- drivers/infiniband/sw/siw/siw_verbs.c | 40 +++++++++++++++++++++++++++++++---- 2 files changed, 58 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/siw/siw_cq.c b/drivers/infiniband/sw/siw/siw_cq.c index d68e37859e73..acc7bcd538b5 100644 --- a/drivers/infiniband/sw/siw/siw_cq.c +++ b/drivers/infiniband/sw/siw/siw_cq.c @@ -56,8 +56,6 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc) if (READ_ONCE(cqe->flags) & SIW_WQE_VALID) { memset(wc, 0, sizeof(*wc)); wc->wr_id = cqe->id; - wc->status = map_cqe_status[cqe->status].ib; - wc->opcode = map_wc_opcode[cqe->opcode]; wc->byte_len = cqe->bytes; /* @@ -71,10 +69,32 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc) wc->wc_flags = IB_WC_WITH_INVALIDATE; } wc->qp = cqe->base_qp; + wc->opcode = map_wc_opcode[cqe->opcode]; + wc->status = map_cqe_status[cqe->status].ib; siw_dbg_cq(cq, "idx %u, type %d, flags %2x, id 0x%pK\n", cq->cq_get % cq->num_cqe, cqe->opcode, cqe->flags, (void *)(uintptr_t)cqe->id); + } else { + /* + * A malicious user may set invalid opcode or + * status in the user mmapped CQE array. + * Sanity check and correct values in that case + * to avoid out-of-bounds access to global arrays + * for opcode and status mapping. + */ + u8 opcode = cqe->opcode; + u16 status = cqe->status; + + if (opcode >= SIW_NUM_OPCODES) { + opcode = 0; + status = IB_WC_GENERAL_ERR; + } else if (status >= SIW_NUM_WC_STATUS) { + status = IB_WC_GENERAL_ERR; + } + wc->opcode = map_wc_opcode[opcode]; + wc->status = map_cqe_status[status].ib; + } WRITE_ONCE(cqe->flags, 0); cq->cq_get++; diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 3e814cfb298c..906fde1a2a0d 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -676,13 +676,45 @@ static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr, static int siw_sq_flush_wr(struct siw_qp *qp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr) { - struct siw_sqe sqe = {}; int rv = 0; while (wr) { - sqe.id = wr->wr_id; - sqe.opcode = wr->opcode; - rv = siw_sqe_complete(qp, &sqe, 0, SIW_WC_WR_FLUSH_ERR); + struct siw_sqe sqe = {}; + + switch (wr->opcode) { + case IB_WR_RDMA_WRITE: + sqe.opcode = SIW_OP_WRITE; + break; + case IB_WR_RDMA_READ: + sqe.opcode = SIW_OP_READ; + break; + case IB_WR_RDMA_READ_WITH_INV: + sqe.opcode = SIW_OP_READ_LOCAL_INV; + break; + case IB_WR_SEND: + sqe.opcode = SIW_OP_SEND; + break; + case IB_WR_SEND_WITH_IMM: + sqe.opcode = SIW_OP_SEND_WITH_IMM; + break; + case IB_WR_SEND_WITH_INV: + sqe.opcode = SIW_OP_SEND_REMOTE_INV; + break; + case IB_WR_LOCAL_INV: + sqe.opcode = SIW_OP_INVAL_STAG; + break; + case IB_WR_REG_MR: + sqe.opcode = SIW_OP_REG_MR; + break; + default: + rv = -EINVAL; + break; + } + if (!rv) { + sqe.id = wr->wr_id; + rv = siw_sqe_complete(qp, &sqe, 0, + SIW_WC_WR_FLUSH_ERR); + } if (rv) { if (bad_wr) *bad_wr = wr; -- cgit v1.2.3 From 837a55847ead27362aac80aa1cf402459a9757f7 Mon Sep 17 00:00:00 2001 From: Daisuke Matsuda Date: Mon, 7 Nov 2022 14:53:38 +0900 Subject: RDMA/rxe: Implement packet length validation on responder The function check_length() is supposed to check the length of inbound packets on responder, but it actually has been a stub since the driver was born. Let it check the payload length and the DMA length. Signed-off-by: Daisuke Matsuda Link: https://lore.kernel.org/r/20221107055338.357184-1-matsuda-daisuke@fujitsu.com Reviewed-by: Li Zhijian Acked-by: Zhu Yanjun Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_resp.c | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index c32bc12cc82f..382d2053db43 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -393,16 +393,36 @@ static enum resp_states check_resource(struct rxe_qp *qp, static enum resp_states check_length(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { - switch (qp_type(qp)) { - case IB_QPT_RC: - return RESPST_CHK_RKEY; + int mtu = qp->mtu; + u32 payload = payload_size(pkt); + u32 dmalen = reth_len(pkt); - case IB_QPT_UC: - return RESPST_CHK_RKEY; + /* RoCEv2 packets do not have LRH. + * Let's skip checking it. + */ - default: - return RESPST_CHK_RKEY; + if ((pkt->opcode & RXE_START_MASK) && + (pkt->opcode & RXE_END_MASK)) { + /* "only" packets */ + if (payload > mtu) + return RESPST_ERR_LENGTH; + } else if ((pkt->opcode & RXE_START_MASK) || + (pkt->opcode & RXE_MIDDLE_MASK)) { + /* "first" or "middle" packets */ + if (payload != mtu) + return RESPST_ERR_LENGTH; + } else if (pkt->opcode & RXE_END_MASK) { + /* "last" packets */ + if ((payload == 0) || (payload > mtu)) + return RESPST_ERR_LENGTH; + } + + if (pkt->opcode & (RXE_WRITE_MASK | RXE_READ_MASK)) { + if (dmalen > (1 << 31)) + return RESPST_ERR_LENGTH; } + + return RESPST_CHK_RKEY; } static enum resp_states check_rkey(struct rxe_qp *qp, -- cgit v1.2.3 From 4554bac48a8c464ff00136a64efe8847e4da4ea8 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:09:59 -0500 Subject: RDMA/rxe: Add ibdev_dbg macros for rxe Add macros borrowed from siw to call dynamic debug macro ibdev_dbg. Link: https://lore.kernel.org/r/20221103171013.20659-2-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h index 30fbdf3bc76a..ab334900fcc3 100644 --- a/drivers/infiniband/sw/rxe/rxe.h +++ b/drivers/infiniband/sw/rxe/rxe.h @@ -38,6 +38,25 @@ #define RXE_ROCE_V2_SPORT (0xc000) +#define rxe_dbg(rxe, fmt, ...) ibdev_dbg(&(rxe)->ib_dev, \ + "%s: " fmt, __func__, ##__VA_ARGS__) +#define rxe_dbg_uc(uc, fmt, ...) ibdev_dbg((uc)->ibuc.device, \ + "uc#%d %s: " fmt, (uc)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_dbg_pd(pd, fmt, ...) ibdev_dbg((pd)->ibpd.device, \ + "pd#%d %s: " fmt, (pd)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_dbg_ah(ah, fmt, ...) ibdev_dbg((ah)->ibah.device, \ + "ah#%d %s: " fmt, (ah)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_dbg_srq(srq, fmt, ...) ibdev_dbg((srq)->ibsrq.device, \ + "srq#%d %s: " fmt, (srq)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_dbg_qp(qp, fmt, ...) ibdev_dbg((qp)->ibqp.device, \ + "qp#%d %s: " fmt, (qp)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_dbg_cq(cq, fmt, ...) ibdev_dbg((cq)->ibcq.device, \ + "cq#%d %s: " fmt, (cq)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_dbg_mr(mr, fmt, ...) ibdev_dbg((mr)->ibmr.device, \ + "mr#%d %s: " fmt, (mr)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_dbg_mw(mw, fmt, ...) ibdev_dbg((mw)->ibmw.device, \ + "mw#%d %s: " fmt, (mw)->elem.index, __func__, ##__VA_ARGS__) + void rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu); int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name); -- cgit v1.2.3 From 27c4c520bd3908c095401e93c71e7d3696ae8bdd Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:00 -0500 Subject: RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_comp.c Replace calls to pr_xxx() in rxe_comp.c with rxe_dbg_xxx(). Link: https://lore.kernel.org/r/20221103171013.20659-3-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_comp.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 66f392810c86..4dca4f8bbb5a 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -114,7 +114,7 @@ void retransmit_timer(struct timer_list *t) { struct rxe_qp *qp = from_timer(qp, t, retrans_timer); - pr_debug("%s: fired for qp#%d\n", __func__, qp->elem.index); + rxe_dbg_qp(qp, "retransmit timer fired\n"); if (qp->valid) { qp->comp.timeout = 1; @@ -334,7 +334,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, return COMPST_ERROR; default: - pr_warn("unexpected nak %x\n", syn); + rxe_dbg_qp(qp, "unexpected nak %x\n", syn); wqe->status = IB_WC_REM_OP_ERR; return COMPST_ERROR; } @@ -345,7 +345,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, break; default: - pr_warn("unexpected opcode\n"); + rxe_dbg_qp(qp, "unexpected opcode\n"); } return COMPST_ERROR; @@ -598,8 +598,7 @@ int rxe_completer(void *arg) state = COMPST_GET_ACK; while (1) { - pr_debug("qp#%d state = %s\n", qp_num(qp), - comp_state_name[state]); + rxe_dbg_qp(qp, "state = %s\n", comp_state_name[state]); switch (state) { case COMPST_GET_ACK: skb = skb_dequeue(&qp->resp_pkts); @@ -746,8 +745,7 @@ int rxe_completer(void *arg) * rnr timer has fired */ qp->req.wait_for_rnr_timer = 1; - pr_debug("qp#%d set rnr nak timer\n", - qp_num(qp)); + rxe_dbg_qp(qp, "set rnr nak timer\n"); mod_timer(&qp->rnr_nak_timer, jiffies + rnrnak_jiffies(aeth_syn(pkt) & ~AETH_TYPE_MASK)); -- cgit v1.2.3 From 52920f537ab08237bd8a3d30ccbb06a9d57717cf Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:01 -0500 Subject: RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_cq.c Replace calls to pr_xxx() in rxe_cq.c with rxe_dbg_xxx(). Link: https://lore.kernel.org/r/20221103171013.20659-4-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_cq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index b1a0ab3cd4bd..1df186534639 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -14,12 +14,12 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq, int count; if (cqe <= 0) { - pr_warn("cqe(%d) <= 0\n", cqe); + rxe_dbg(rxe, "cqe(%d) <= 0\n", cqe); goto err1; } if (cqe > rxe->attr.max_cqe) { - pr_debug("cqe(%d) > max_cqe(%d)\n", + rxe_dbg(rxe, "cqe(%d) > max_cqe(%d)\n", cqe, rxe->attr.max_cqe); goto err1; } @@ -27,7 +27,7 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq, if (cq) { count = queue_count(cq->queue, QUEUE_TYPE_TO_CLIENT); if (cqe < count) { - pr_debug("cqe(%d) < current # elements in queue (%d)", + rxe_dbg_cq(cq, "cqe(%d) < current # elements in queue (%d)", cqe, count); goto err1; } @@ -65,7 +65,7 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, cq->queue = rxe_queue_init(rxe, &cqe, sizeof(struct rxe_cqe), type); if (!cq->queue) { - pr_warn("unable to create cq\n"); + rxe_dbg(rxe, "unable to create cq\n"); return -ENOMEM; } -- cgit v1.2.3 From 2778b72b1df0c8ef61e0b3b3ef1c8c62eb03fa75 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:02 -0500 Subject: RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_mr.c Replace calls to pr_xxx() in rxe_mr.c by rxe_dbg_mr(). Link: https://lore.kernel.org/r/20221103171013.20659-5-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mr.c | 40 +++++++++++++++-------------------- drivers/infiniband/sw/rxe/rxe_verbs.c | 3 +++ 2 files changed, 20 insertions(+), 23 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index bc081002bddc..cd846cf82a84 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -38,8 +38,7 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) return 0; default: - pr_warn("%s: mr type (%d) not supported\n", - __func__, mr->ibmr.type); + rxe_dbg_mr(mr, "type (%d) not supported\n", mr->ibmr.type); return -EFAULT; } } @@ -125,8 +124,8 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, umem = ib_umem_get(&rxe->ib_dev, start, length, access); if (IS_ERR(umem)) { - pr_warn("%s: Unable to pin memory region err = %d\n", - __func__, (int)PTR_ERR(umem)); + rxe_dbg_mr(mr, "Unable to pin memory region err = %d\n", + (int)PTR_ERR(umem)); err = PTR_ERR(umem); goto err_out; } @@ -137,8 +136,7 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, err = rxe_mr_alloc(mr, num_buf); if (err) { - pr_warn("%s: Unable to allocate memory for map\n", - __func__); + rxe_dbg_mr(mr, "Unable to allocate memory for map\n"); goto err_release_umem; } @@ -159,8 +157,7 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, vaddr = page_address(sg_page_iter_page(&sg_iter)); if (!vaddr) { - pr_warn("%s: Unable to get virtual address\n", - __func__); + rxe_dbg_mr(mr, "Unable to get virtual address\n"); err = -ENOMEM; goto err_cleanup_map; } @@ -255,7 +252,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) void *addr; if (mr->state != RXE_MR_STATE_VALID) { - pr_warn("mr not in valid state\n"); + rxe_dbg_mr(mr, "Not in valid state\n"); addr = NULL; goto out; } @@ -266,7 +263,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) } if (mr_check_range(mr, iova, length)) { - pr_warn("range violation\n"); + rxe_dbg_mr(mr, "Range violation\n"); addr = NULL; goto out; } @@ -274,7 +271,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) lookup_iova(mr, iova, &m, &n, &offset); if (offset + length > mr->map[m]->buf[n].size) { - pr_warn("crosses page boundary\n"); + rxe_dbg_mr(mr, "Crosses page boundary\n"); addr = NULL; goto out; } @@ -527,27 +524,26 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 key) mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8); if (!mr) { - pr_err("%s: No MR for key %#x\n", __func__, key); + rxe_dbg_mr(mr, "No MR for key %#x\n", key); ret = -EINVAL; goto err; } if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) { - pr_err("%s: wr key (%#x) doesn't match mr key (%#x)\n", - __func__, key, (mr->rkey ? mr->rkey : mr->lkey)); + rxe_dbg_mr(mr, "wr key (%#x) doesn't match mr key (%#x)\n", + key, (mr->rkey ? mr->rkey : mr->lkey)); ret = -EINVAL; goto err_drop_ref; } if (atomic_read(&mr->num_mw) > 0) { - pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n", - __func__); + rxe_dbg_mr(mr, "Attempt to invalidate an MR while bound to MWs\n"); ret = -EINVAL; goto err_drop_ref; } if (unlikely(mr->ibmr.type != IB_MR_TYPE_MEM_REG)) { - pr_warn("%s: mr type (%d) is wrong\n", __func__, mr->ibmr.type); + rxe_dbg_mr(mr, "Type (%d) is wrong\n", mr->ibmr.type); ret = -EINVAL; goto err_drop_ref; } @@ -576,22 +572,20 @@ int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe) /* user can only register MR in free state */ if (unlikely(mr->state != RXE_MR_STATE_FREE)) { - pr_warn("%s: mr->lkey = 0x%x not free\n", - __func__, mr->lkey); + rxe_dbg_mr(mr, "mr->lkey = 0x%x not free\n", mr->lkey); return -EINVAL; } /* user can only register mr with qp in same protection domain */ if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) { - pr_warn("%s: qp->pd and mr->pd don't match\n", - __func__); + rxe_dbg_mr(mr, "qp->pd and mr->pd don't match\n"); return -EINVAL; } /* user is only allowed to change key portion of l/rkey */ if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) { - pr_warn("%s: key = 0x%x has wrong index mr->lkey = 0x%x\n", - __func__, key, mr->lkey); + rxe_dbg_mr(mr, "key = 0x%x has wrong index mr->lkey = 0x%x\n", + key, mr->lkey); return -EINVAL; } diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index bcdfdadaebbc..510ae471ac7a 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -875,6 +875,7 @@ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access) rxe_get(pd); mr->ibmr.pd = ibpd; + mr->ibmr.device = ibpd->device; rxe_mr_init_dma(access, mr); rxe_finalize(mr); @@ -899,6 +900,7 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, rxe_get(pd); mr->ibmr.pd = ibpd; + mr->ibmr.device = ibpd->device; err = rxe_mr_init_user(rxe, start, length, iova, access, mr); if (err) @@ -930,6 +932,7 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, rxe_get(pd); mr->ibmr.pd = ibpd; + mr->ibmr.device = ibpd->device; err = rxe_mr_init_fast(max_num_sg, mr); if (err) -- cgit v1.2.3 From e8a87efdf87455454d0a14fd486c679769bfeee2 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:03 -0500 Subject: RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_mw.c Replace calls to pr_xxx() int rxe_mw.c with rxe_dbg_xxx(). Link: https://lore.kernel.org/r/20221103171013.20659-6-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mw.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c index 8df1c9066ed8..afa5ce1a7116 100644 --- a/drivers/infiniband/sw/rxe/rxe_mw.c +++ b/drivers/infiniband/sw/rxe/rxe_mw.c @@ -52,14 +52,14 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, { if (mw->ibmw.type == IB_MW_TYPE_1) { if (unlikely(mw->state != RXE_MW_STATE_VALID)) { - pr_err_once( + rxe_dbg_mw(mw, "attempt to bind a type 1 MW not in the valid state\n"); return -EINVAL; } /* o10-36.2.2 */ if (unlikely((mw->access & IB_ZERO_BASED))) { - pr_err_once("attempt to bind a zero based type 1 MW\n"); + rxe_dbg_mw(mw, "attempt to bind a zero based type 1 MW\n"); return -EINVAL; } } @@ -67,21 +67,21 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, if (mw->ibmw.type == IB_MW_TYPE_2) { /* o10-37.2.30 */ if (unlikely(mw->state != RXE_MW_STATE_FREE)) { - pr_err_once( + rxe_dbg_mw(mw, "attempt to bind a type 2 MW not in the free state\n"); return -EINVAL; } /* C10-72 */ if (unlikely(qp->pd != to_rpd(mw->ibmw.pd))) { - pr_err_once( + rxe_dbg_mw(mw, "attempt to bind type 2 MW with qp with different PD\n"); return -EINVAL; } /* o10-37.2.40 */ if (unlikely(!mr || wqe->wr.wr.mw.length == 0)) { - pr_err_once( + rxe_dbg_mw(mw, "attempt to invalidate type 2 MW by binding with NULL or zero length MR\n"); return -EINVAL; } @@ -92,13 +92,13 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, return 0; if (unlikely(mr->access & IB_ZERO_BASED)) { - pr_err_once("attempt to bind MW to zero based MR\n"); + rxe_dbg_mw(mw, "attempt to bind MW to zero based MR\n"); return -EINVAL; } /* C10-73 */ if (unlikely(!(mr->access & IB_ACCESS_MW_BIND))) { - pr_err_once( + rxe_dbg_mw(mw, "attempt to bind an MW to an MR without bind access\n"); return -EINVAL; } @@ -107,7 +107,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, if (unlikely((mw->access & (IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC)) && !(mr->access & IB_ACCESS_LOCAL_WRITE))) { - pr_err_once( + rxe_dbg_mw(mw, "attempt to bind an Writable MW to an MR without local write access\n"); return -EINVAL; } @@ -115,7 +115,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, /* C10-75 */ if (mw->access & IB_ZERO_BASED) { if (unlikely(wqe->wr.wr.mw.length > mr->ibmr.length)) { - pr_err_once( + rxe_dbg_mw(mw, "attempt to bind a ZB MW outside of the MR\n"); return -EINVAL; } @@ -123,7 +123,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, if (unlikely((wqe->wr.wr.mw.addr < mr->ibmr.iova) || ((wqe->wr.wr.mw.addr + wqe->wr.wr.mw.length) > (mr->ibmr.iova + mr->ibmr.length)))) { - pr_err_once( + rxe_dbg_mw(mw, "attempt to bind a VA MW outside of the MR\n"); return -EINVAL; } -- cgit v1.2.3 From 34549e88e0a3088416177023abf1232fe40e721c Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:04 -0500 Subject: RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_net.c Replace (some) calls to pr_xxx() in rxe_net.c with rxe_dbg_xxx(). Calls with a rxe device not yet in scope are left as is. Link: https://lore.kernel.org/r/20221103171013.20659-7-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_net.c | 38 ++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index c36cad9c7a66..e02e1624bcf4 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -20,9 +20,10 @@ static struct rxe_recv_sockets recv_sockets; -static struct dst_entry *rxe_find_route4(struct net_device *ndev, - struct in_addr *saddr, - struct in_addr *daddr) +static struct dst_entry *rxe_find_route4(struct rxe_qp *qp, + struct net_device *ndev, + struct in_addr *saddr, + struct in_addr *daddr) { struct rtable *rt; struct flowi4 fl = { { 0 } }; @@ -35,7 +36,7 @@ static struct dst_entry *rxe_find_route4(struct net_device *ndev, rt = ip_route_output_key(&init_net, &fl); if (IS_ERR(rt)) { - pr_err_ratelimited("no route to %pI4\n", &daddr->s_addr); + rxe_dbg_qp(qp, "no route to %pI4\n", &daddr->s_addr); return NULL; } @@ -43,7 +44,8 @@ static struct dst_entry *rxe_find_route4(struct net_device *ndev, } #if IS_ENABLED(CONFIG_IPV6) -static struct dst_entry *rxe_find_route6(struct net_device *ndev, +static struct dst_entry *rxe_find_route6(struct rxe_qp *qp, + struct net_device *ndev, struct in6_addr *saddr, struct in6_addr *daddr) { @@ -60,12 +62,12 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev, recv_sockets.sk6->sk, &fl6, NULL); if (IS_ERR(ndst)) { - pr_err_ratelimited("no route to %pI6\n", daddr); + rxe_dbg_qp(qp, "no route to %pI6\n", daddr); return NULL; } if (unlikely(ndst->error)) { - pr_err("no route to %pI6\n", daddr); + rxe_dbg_qp(qp, "no route to %pI6\n", daddr); goto put; } @@ -77,7 +79,8 @@ put: #else -static struct dst_entry *rxe_find_route6(struct net_device *ndev, +static struct dst_entry *rxe_find_route6(struct rxe_qp *qp, + struct net_device *ndev, struct in6_addr *saddr, struct in6_addr *daddr) { @@ -105,14 +108,14 @@ static struct dst_entry *rxe_find_route(struct net_device *ndev, saddr = &av->sgid_addr._sockaddr_in.sin_addr; daddr = &av->dgid_addr._sockaddr_in.sin_addr; - dst = rxe_find_route4(ndev, saddr, daddr); + dst = rxe_find_route4(qp, ndev, saddr, daddr); } else if (av->network_type == RXE_NETWORK_TYPE_IPV6) { struct in6_addr *saddr6; struct in6_addr *daddr6; saddr6 = &av->sgid_addr._sockaddr_in6.sin6_addr; daddr6 = &av->dgid_addr._sockaddr_in6.sin6_addr; - dst = rxe_find_route6(ndev, saddr6, daddr6); + dst = rxe_find_route6(qp, ndev, saddr6, daddr6); #if IS_ENABLED(CONFIG_IPV6) if (dst) qp->dst_cookie = @@ -282,7 +285,7 @@ static int prepare4(struct rxe_av *av, struct rxe_pkt_info *pkt, dst = rxe_find_route(skb->dev, qp, av); if (!dst) { - pr_err("Host not reachable\n"); + rxe_dbg_qp(qp, "Host not reachable\n"); return -EHOSTUNREACH; } @@ -306,7 +309,7 @@ static int prepare6(struct rxe_av *av, struct rxe_pkt_info *pkt, dst = rxe_find_route(skb->dev, qp, av); if (!dst) { - pr_err("Host not reachable\n"); + rxe_dbg_qp(qp, "Host not reachable\n"); return -EHOSTUNREACH; } @@ -365,7 +368,8 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt) } else if (skb->protocol == htons(ETH_P_IPV6)) { err = ip6_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); } else { - pr_err("Unknown layer 3 protocol: %d\n", skb->protocol); + rxe_dbg_qp(pkt->qp, "Unknown layer 3 protocol: %d\n", + skb->protocol); atomic_dec(&pkt->qp->skb_out); rxe_put(pkt->qp); kfree_skb(skb); @@ -373,7 +377,7 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt) } if (unlikely(net_xmit_eval(err))) { - pr_debug("error sending packet: %d\n", err); + rxe_dbg_qp(pkt->qp, "error sending packet: %d\n", err); return -EAGAIN; } @@ -411,7 +415,7 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt, if ((is_request && (qp->req.state != QP_STATE_READY)) || (!is_request && (qp->resp.state != QP_STATE_READY))) { - pr_info("Packet dropped. QP is not in ready state\n"); + rxe_dbg_qp(qp, "Packet dropped. QP is not in ready state\n"); goto drop; } @@ -592,7 +596,7 @@ static int rxe_notify(struct notifier_block *not_blk, rxe_port_down(rxe); break; case NETDEV_CHANGEMTU: - pr_info("%s changed mtu to %d\n", ndev->name, ndev->mtu); + rxe_dbg(rxe, "%s changed mtu to %d\n", ndev->name, ndev->mtu); rxe_set_mtu(rxe, ndev->mtu); break; case NETDEV_CHANGE: @@ -604,7 +608,7 @@ static int rxe_notify(struct notifier_block *not_blk, case NETDEV_CHANGENAME: case NETDEV_FEAT_CHANGE: default: - pr_info("ignoring netdev event = %ld for %s\n", + rxe_dbg(rxe, "ignoring netdev event = %ld for %s\n", event, ndev->name); break; } -- cgit v1.2.3 From 6af70060d2e561d6479b33fe94cc2f38582e830b Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:05 -0500 Subject: RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_qp.c Replace calls to pr_xxx() in rxe_qp.c with rxe_dbg_xxx(). Link: https://lore.kernel.org/r/20221103171013.20659-8-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_qp.c | 65 ++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 35 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 3f6d62a80bea..bcbfe6068b8b 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -19,33 +19,33 @@ static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap, int has_srq) { if (cap->max_send_wr > rxe->attr.max_qp_wr) { - pr_debug("invalid send wr = %u > %d\n", + rxe_dbg(rxe, "invalid send wr = %u > %d\n", cap->max_send_wr, rxe->attr.max_qp_wr); goto err1; } if (cap->max_send_sge > rxe->attr.max_send_sge) { - pr_debug("invalid send sge = %u > %d\n", + rxe_dbg(rxe, "invalid send sge = %u > %d\n", cap->max_send_sge, rxe->attr.max_send_sge); goto err1; } if (!has_srq) { if (cap->max_recv_wr > rxe->attr.max_qp_wr) { - pr_debug("invalid recv wr = %u > %d\n", + rxe_dbg(rxe, "invalid recv wr = %u > %d\n", cap->max_recv_wr, rxe->attr.max_qp_wr); goto err1; } if (cap->max_recv_sge > rxe->attr.max_recv_sge) { - pr_debug("invalid recv sge = %u > %d\n", + rxe_dbg(rxe, "invalid recv sge = %u > %d\n", cap->max_recv_sge, rxe->attr.max_recv_sge); goto err1; } } if (cap->max_inline_data > rxe->max_inline_data) { - pr_debug("invalid max inline data = %u > %d\n", + rxe_dbg(rxe, "invalid max inline data = %u > %d\n", cap->max_inline_data, rxe->max_inline_data); goto err1; } @@ -73,7 +73,7 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init) } if (!init->recv_cq || !init->send_cq) { - pr_debug("missing cq\n"); + rxe_dbg(rxe, "missing cq\n"); goto err1; } @@ -82,14 +82,14 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init) if (init->qp_type == IB_QPT_GSI) { if (!rdma_is_port_valid(&rxe->ib_dev, port_num)) { - pr_debug("invalid port = %d\n", port_num); + rxe_dbg(rxe, "invalid port = %d\n", port_num); goto err1; } port = &rxe->port; if (init->qp_type == IB_QPT_GSI && port->qp_gsi_index) { - pr_debug("GSI QP exists for port %d\n", port_num); + rxe_dbg(rxe, "GSI QP exists for port %d\n", port_num); goto err1; } } @@ -264,9 +264,6 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, wqe_size = rcv_wqe_size(qp->rq.max_sge); - pr_debug("qp#%d max_wr = %d, max_sge = %d, wqe_size = %d\n", - qp_num(qp), qp->rq.max_wr, qp->rq.max_sge, wqe_size); - type = QUEUE_TYPE_FROM_CLIENT; qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr, wqe_size, type); @@ -395,7 +392,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, attr->qp_state : cur_state; if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask)) { - pr_debug("invalid mask or state for qp\n"); + rxe_dbg_qp(qp, "invalid mask or state\n"); goto err1; } @@ -409,7 +406,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (mask & IB_QP_PORT) { if (!rdma_is_port_valid(&rxe->ib_dev, attr->port_num)) { - pr_debug("invalid port %d\n", attr->port_num); + rxe_dbg_qp(qp, "invalid port %d\n", attr->port_num); goto err1; } } @@ -424,11 +421,11 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr)) goto err1; if (!rdma_is_port_valid(&rxe->ib_dev, attr->alt_port_num)) { - pr_debug("invalid alt port %d\n", attr->alt_port_num); + rxe_dbg_qp(qp, "invalid alt port %d\n", attr->alt_port_num); goto err1; } if (attr->alt_timeout > 31) { - pr_debug("invalid QP alt timeout %d > 31\n", + rxe_dbg_qp(qp, "invalid alt timeout %d > 31\n", attr->alt_timeout); goto err1; } @@ -441,7 +438,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, enum ib_mtu mtu = attr->path_mtu; if (mtu > max_mtu) { - pr_debug("invalid mtu (%d) > (%d)\n", + rxe_dbg_qp(qp, "invalid mtu (%d) > (%d)\n", ib_mtu_enum_to_int(mtu), ib_mtu_enum_to_int(max_mtu)); goto err1; @@ -450,7 +447,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (mask & IB_QP_MAX_QP_RD_ATOMIC) { if (attr->max_rd_atomic > rxe->attr.max_qp_rd_atom) { - pr_debug("invalid max_rd_atomic %d > %d\n", + rxe_dbg_qp(qp, "invalid max_rd_atomic %d > %d\n", attr->max_rd_atomic, rxe->attr.max_qp_rd_atom); goto err1; @@ -459,7 +456,8 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (mask & IB_QP_TIMEOUT) { if (attr->timeout > 31) { - pr_debug("invalid QP timeout %d > 31\n", attr->timeout); + rxe_dbg_qp(qp, "invalid timeout %d > 31\n", + attr->timeout); goto err1; } } @@ -637,27 +635,24 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, if (mask & IB_QP_RETRY_CNT) { qp->attr.retry_cnt = attr->retry_cnt; qp->comp.retry_cnt = attr->retry_cnt; - pr_debug("qp#%d set retry count = %d\n", qp_num(qp), - attr->retry_cnt); + rxe_dbg_qp(qp, "set retry count = %d\n", attr->retry_cnt); } if (mask & IB_QP_RNR_RETRY) { qp->attr.rnr_retry = attr->rnr_retry; qp->comp.rnr_retry = attr->rnr_retry; - pr_debug("qp#%d set rnr retry count = %d\n", qp_num(qp), - attr->rnr_retry); + rxe_dbg_qp(qp, "set rnr retry count = %d\n", attr->rnr_retry); } if (mask & IB_QP_RQ_PSN) { qp->attr.rq_psn = (attr->rq_psn & BTH_PSN_MASK); qp->resp.psn = qp->attr.rq_psn; - pr_debug("qp#%d set resp psn = 0x%x\n", qp_num(qp), - qp->resp.psn); + rxe_dbg_qp(qp, "set resp psn = 0x%x\n", qp->resp.psn); } if (mask & IB_QP_MIN_RNR_TIMER) { qp->attr.min_rnr_timer = attr->min_rnr_timer; - pr_debug("qp#%d set min rnr timer = 0x%x\n", qp_num(qp), + rxe_dbg_qp(qp, "set min rnr timer = 0x%x\n", attr->min_rnr_timer); } @@ -665,7 +660,7 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, qp->attr.sq_psn = (attr->sq_psn & BTH_PSN_MASK); qp->req.psn = qp->attr.sq_psn; qp->comp.psn = qp->attr.sq_psn; - pr_debug("qp#%d set req psn = 0x%x\n", qp_num(qp), qp->req.psn); + rxe_dbg_qp(qp, "set req psn = 0x%x\n", qp->req.psn); } if (mask & IB_QP_PATH_MIG_STATE) @@ -679,40 +674,40 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, switch (attr->qp_state) { case IB_QPS_RESET: - pr_debug("qp#%d state -> RESET\n", qp_num(qp)); + rxe_dbg_qp(qp, "state -> RESET\n"); rxe_qp_reset(qp); break; case IB_QPS_INIT: - pr_debug("qp#%d state -> INIT\n", qp_num(qp)); + rxe_dbg_qp(qp, "state -> INIT\n"); qp->req.state = QP_STATE_INIT; qp->resp.state = QP_STATE_INIT; qp->comp.state = QP_STATE_INIT; break; case IB_QPS_RTR: - pr_debug("qp#%d state -> RTR\n", qp_num(qp)); + rxe_dbg_qp(qp, "state -> RTR\n"); qp->resp.state = QP_STATE_READY; break; case IB_QPS_RTS: - pr_debug("qp#%d state -> RTS\n", qp_num(qp)); + rxe_dbg_qp(qp, "state -> RTS\n"); qp->req.state = QP_STATE_READY; qp->comp.state = QP_STATE_READY; break; case IB_QPS_SQD: - pr_debug("qp#%d state -> SQD\n", qp_num(qp)); + rxe_dbg_qp(qp, "state -> SQD\n"); rxe_qp_drain(qp); break; case IB_QPS_SQE: - pr_warn("qp#%d state -> SQE !!?\n", qp_num(qp)); + rxe_dbg_qp(qp, "state -> SQE !!?\n"); /* Not possible from modify_qp. */ break; case IB_QPS_ERR: - pr_debug("qp#%d state -> ERR\n", qp_num(qp)); + rxe_dbg_qp(qp, "state -> ERR\n"); rxe_qp_error(qp); break; } @@ -752,7 +747,7 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask) attr->sq_draining = 0; } - pr_debug("attr->sq_draining = %d\n", attr->sq_draining); + rxe_dbg_qp(qp, "attr->sq_draining = %d\n", attr->sq_draining); return 0; } @@ -764,7 +759,7 @@ int rxe_qp_chk_destroy(struct rxe_qp *qp) * will fail immediately. */ if (atomic_read(&qp->mcg_num)) { - pr_debug("Attempt to destroy QP while attached to multicast group\n"); + rxe_dbg_qp(qp, "Attempt to destroy while attached to multicast group\n"); return -EBUSY; } -- cgit v1.2.3 From 0edfb15e30a53e8bd039c35a17f61e49cba9f4dd Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:06 -0500 Subject: RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_req.c Replace calls to pr_xxx() in rxe_req.c with rxe_dbg_xxx(). Link: https://lore.kernel.org/r/20221103171013.20659-9-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_req.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 41f1d84f0acb..4d45f508392f 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -100,7 +100,7 @@ void rnr_nak_timer(struct timer_list *t) { struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer); - pr_debug("%s: fired for qp#%d\n", __func__, qp_num(qp)); + rxe_dbg_qp(qp, "nak timer fired\n"); /* request a send queue retry */ qp->req.need_retry = 1; @@ -595,7 +595,7 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe) } break; default: - pr_err("Unexpected send wqe opcode %d\n", opcode); + rxe_dbg_qp(qp, "Unexpected send wqe opcode %d\n", opcode); wqe->status = IB_WC_LOC_QP_OP_ERR; return -EINVAL; } @@ -748,14 +748,14 @@ int rxe_requester(void *arg) av = rxe_get_av(&pkt, &ah); if (unlikely(!av)) { - pr_err("qp#%d Failed no address vector\n", qp_num(qp)); + rxe_dbg_qp(qp, "Failed no address vector\n"); wqe->status = IB_WC_LOC_QP_OP_ERR; goto err; } skb = init_req_packet(qp, av, wqe, opcode, payload, &pkt); if (unlikely(!skb)) { - pr_err("qp#%d Failed allocating skb\n", qp_num(qp)); + rxe_dbg_qp(qp, "Failed allocating skb\n"); wqe->status = IB_WC_LOC_QP_OP_ERR; if (ah) rxe_put(ah); @@ -764,7 +764,7 @@ int rxe_requester(void *arg) err = finish_packet(qp, av, wqe, &pkt, skb, payload); if (unlikely(err)) { - pr_debug("qp#%d Error during finish packet\n", qp_num(qp)); + rxe_dbg_qp(qp, "Error during finish packet\n"); if (err == -EFAULT) wqe->status = IB_WC_LOC_PROT_ERR; else -- cgit v1.2.3 From 74ddf7233c571d51bcb802bb192a9f7d77cd8830 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:07 -0500 Subject: RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_resp.c Replace calls to pr_xxx() in rxe_resp.c with rxe_dbg_xxx(). Link: https://lore.kernel.org/r/20221103171013.20659-10-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_resp.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 382d2053db43..6761bcd1d4d8 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -317,7 +317,7 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp) /* don't trust user space data */ if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) { spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); - pr_warn("%s: invalid num_sge in SRQ entry\n", __func__); + rxe_dbg_qp(qp, "invalid num_sge in SRQ entry\n"); return RESPST_ERR_MALFORMED_WQE; } size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge); @@ -473,15 +473,14 @@ static enum resp_states check_rkey(struct rxe_qp *qp, if (rkey_is_mw(rkey)) { mw = rxe_lookup_mw(qp, access, rkey); if (!mw) { - pr_debug("%s: no MW matches rkey %#x\n", - __func__, rkey); + rxe_dbg_qp(qp, "no MW matches rkey %#x\n", rkey); state = RESPST_ERR_RKEY_VIOLATION; goto err; } mr = mw->mr; if (!mr) { - pr_err("%s: MW doesn't have an MR\n", __func__); + rxe_dbg_qp(qp, "MW doesn't have an MR\n"); state = RESPST_ERR_RKEY_VIOLATION; goto err; } @@ -494,8 +493,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp, } else { mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE); if (!mr) { - pr_debug("%s: no MR matches rkey %#x\n", - __func__, rkey); + rxe_dbg_qp(qp, "no MR matches rkey %#x\n", rkey); state = RESPST_ERR_RKEY_VIOLATION; goto err; } @@ -1064,7 +1062,7 @@ static int send_common_ack(struct rxe_qp *qp, u8 syndrome, u32 psn, err = rxe_xmit_packet(qp, &ack_pkt, skb); if (err) - pr_err_ratelimited("Failed sending %s\n", msg); + rxe_dbg_qp(qp, "Failed sending %s\n", msg); return err; } @@ -1310,8 +1308,7 @@ int rxe_responder(void *arg) } while (1) { - pr_debug("qp#%d state = %s\n", qp_num(qp), - resp_state_name[state]); + rxe_dbg_qp(qp, "state = %s\n", resp_state_name[state]); switch (state) { case RESPST_GET_REQ: state = get_req(qp, &pkt); @@ -1468,7 +1465,7 @@ int rxe_responder(void *arg) case RESPST_ERROR: qp->resp.goto_error = 0; - pr_debug("qp#%d moved to error state\n", qp_num(qp)); + rxe_dbg_qp(qp, "moved to error state\n"); rxe_qp_error(qp); goto exit; -- cgit v1.2.3 From 0e6090024b3ebf8d162f82c542eba9632a9c85fc Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:08 -0500 Subject: RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_srq.c Replace calls to pr_xxx() in rxe_srq.c with rxe_dbg_xxx(). Link: https://lore.kernel.org/r/20221103171013.20659-11-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_srq.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c index 02b39498c370..82e37a41ced4 100644 --- a/drivers/infiniband/sw/rxe/rxe_srq.c +++ b/drivers/infiniband/sw/rxe/rxe_srq.c @@ -13,13 +13,13 @@ int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init) struct ib_srq_attr *attr = &init->attr; if (attr->max_wr > rxe->attr.max_srq_wr) { - pr_warn("max_wr(%d) > max_srq_wr(%d)\n", + rxe_dbg(rxe, "max_wr(%d) > max_srq_wr(%d)\n", attr->max_wr, rxe->attr.max_srq_wr); goto err1; } if (attr->max_wr <= 0) { - pr_warn("max_wr(%d) <= 0\n", attr->max_wr); + rxe_dbg(rxe, "max_wr(%d) <= 0\n", attr->max_wr); goto err1; } @@ -27,7 +27,7 @@ int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init) attr->max_wr = RXE_MIN_SRQ_WR; if (attr->max_sge > rxe->attr.max_srq_sge) { - pr_warn("max_sge(%d) > max_srq_sge(%d)\n", + rxe_dbg(rxe, "max_sge(%d) > max_srq_sge(%d)\n", attr->max_sge, rxe->attr.max_srq_sge); goto err1; } @@ -65,7 +65,7 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, type = QUEUE_TYPE_FROM_CLIENT; q = rxe_queue_init(rxe, &srq->rq.max_wr, srq_wqe_size, type); if (!q) { - pr_warn("unable to allocate queue for srq\n"); + rxe_dbg_srq(srq, "Unable to allocate queue\n"); return -ENOMEM; } @@ -94,24 +94,24 @@ int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask mask) { if (srq->error) { - pr_warn("srq in error state\n"); + rxe_dbg_srq(srq, "in error state\n"); goto err1; } if (mask & IB_SRQ_MAX_WR) { if (attr->max_wr > rxe->attr.max_srq_wr) { - pr_warn("max_wr(%d) > max_srq_wr(%d)\n", + rxe_dbg_srq(srq, "max_wr(%d) > max_srq_wr(%d)\n", attr->max_wr, rxe->attr.max_srq_wr); goto err1; } if (attr->max_wr <= 0) { - pr_warn("max_wr(%d) <= 0\n", attr->max_wr); + rxe_dbg_srq(srq, "max_wr(%d) <= 0\n", attr->max_wr); goto err1; } if (srq->limit && (attr->max_wr < srq->limit)) { - pr_warn("max_wr (%d) < srq->limit (%d)\n", + rxe_dbg_srq(srq, "max_wr (%d) < srq->limit (%d)\n", attr->max_wr, srq->limit); goto err1; } @@ -122,13 +122,13 @@ int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, if (mask & IB_SRQ_LIMIT) { if (attr->srq_limit > rxe->attr.max_srq_wr) { - pr_warn("srq_limit(%d) > max_srq_wr(%d)\n", + rxe_dbg_srq(srq, "srq_limit(%d) > max_srq_wr(%d)\n", attr->srq_limit, rxe->attr.max_srq_wr); goto err1; } if (attr->srq_limit > srq->rq.queue->buf->index_mask) { - pr_warn("srq_limit (%d) > cur limit(%d)\n", + rxe_dbg_srq(srq, "srq_limit (%d) > cur limit(%d)\n", attr->srq_limit, srq->rq.queue->buf->index_mask); goto err1; -- cgit v1.2.3 From 14e501fdb0de3b45b8ee8a2a031c3c64aaa01817 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:09 -0500 Subject: RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_verbs.c Replace calls to pr_xxx() in rxe_verbs.c with rxe_dbg_xxx(). Link: https://lore.kernel.org/r/20221103171013.20659-12-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 510ae471ac7a..e6eca21c54e6 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1103,7 +1103,7 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) err = ib_register_device(dev, ibdev_name, NULL); if (err) - pr_warn("%s failed with error %d\n", __func__, err); + rxe_dbg(rxe, "failed with error %d\n", err); /* * Note that rxe may be invalid at this point if another thread -- cgit v1.2.3 From 25fd735a4c9e38c65904eea2769ed92f6f8586d0 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:10 -0500 Subject: RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_av.c Replace calls to pr_xxx() in rxe_av.c with rxe_dbg_xxx(). Link: https://lore.kernel.org/r/20221103171013.20659-13-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_av.c | 43 +++++++++++++++++++++++++++++------ drivers/infiniband/sw/rxe/rxe_loc.h | 8 ++----- drivers/infiniband/sw/rxe/rxe_qp.c | 4 ++-- drivers/infiniband/sw/rxe/rxe_verbs.c | 13 ++++++----- 4 files changed, 47 insertions(+), 21 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c index 3b05314ca739..889d7adbd455 100644 --- a/drivers/infiniband/sw/rxe/rxe_av.c +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -14,26 +14,45 @@ void rxe_init_av(struct rdma_ah_attr *attr, struct rxe_av *av) memcpy(av->dmac, attr->roce.dmac, ETH_ALEN); } -int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr) +static int chk_attr(void *obj, struct rdma_ah_attr *attr, bool obj_is_ah) { const struct ib_global_route *grh = rdma_ah_read_grh(attr); struct rxe_port *port; + struct rxe_dev *rxe; + struct rxe_qp *qp; + struct rxe_ah *ah; int type; + if (obj_is_ah) { + ah = obj; + rxe = to_rdev(ah->ibah.device); + } else { + qp = obj; + rxe = to_rdev(qp->ibqp.device); + } + port = &rxe->port; if (rdma_ah_get_ah_flags(attr) & IB_AH_GRH) { if (grh->sgid_index > port->attr.gid_tbl_len) { - pr_warn("invalid sgid index = %d\n", - grh->sgid_index); + if (obj_is_ah) + rxe_dbg_ah(ah, "invalid sgid index = %d\n", + grh->sgid_index); + else + rxe_dbg_qp(qp, "invalid sgid index = %d\n", + grh->sgid_index); return -EINVAL; } type = rdma_gid_attr_network_type(grh->sgid_attr); if (type < RDMA_NETWORK_IPV4 || type > RDMA_NETWORK_IPV6) { - pr_warn("invalid network type for rdma_rxe = %d\n", - type); + if (obj_is_ah) + rxe_dbg_ah(ah, "invalid network type for rdma_rxe = %d\n", + type); + else + rxe_dbg_qp(qp, "invalid network type for rdma_rxe = %d\n", + type); return -EINVAL; } } @@ -41,6 +60,16 @@ int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr) return 0; } +int rxe_av_chk_attr(struct rxe_qp *qp, struct rdma_ah_attr *attr) +{ + return chk_attr(qp, attr, false); +} + +int rxe_ah_chk_attr(struct rxe_ah *ah, struct rdma_ah_attr *attr) +{ + return chk_attr(ah, attr, true); +} + void rxe_av_from_attr(u8 port_num, struct rxe_av *av, struct rdma_ah_attr *attr) { @@ -121,12 +150,12 @@ struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp) /* only new user provider or kernel client */ ah = rxe_pool_get_index(&pkt->rxe->ah_pool, ah_num); if (!ah) { - pr_warn("Unable to find AH matching ah_num\n"); + rxe_dbg_qp(pkt->qp, "Unable to find AH matching ah_num\n"); return NULL; } if (rxe_ah_pd(ah) != pkt->qp->pd) { - pr_warn("PDs don't match for AH and QP\n"); + rxe_dbg_qp(pkt->qp, "PDs don't match for AH and QP\n"); rxe_put(ah); return NULL; } diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index c2a5c8814a48..a22476d27b38 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -9,16 +9,12 @@ /* rxe_av.c */ void rxe_init_av(struct rdma_ah_attr *attr, struct rxe_av *av); - -int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr); - +int rxe_av_chk_attr(struct rxe_qp *qp, struct rdma_ah_attr *attr); +int rxe_ah_chk_attr(struct rxe_ah *ah, struct rdma_ah_attr *attr); void rxe_av_from_attr(u8 port_num, struct rxe_av *av, struct rdma_ah_attr *attr); - void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr); - void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr); - struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp); /* rxe_cq.c */ diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index bcbfe6068b8b..46f6c74ce00e 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -414,11 +414,11 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (mask & IB_QP_CAP && rxe_qp_chk_cap(rxe, &attr->cap, !!qp->srq)) goto err1; - if (mask & IB_QP_AV && rxe_av_chk_attr(rxe, &attr->ah_attr)) + if (mask & IB_QP_AV && rxe_av_chk_attr(qp, &attr->ah_attr)) goto err1; if (mask & IB_QP_ALT_PATH) { - if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr)) + if (rxe_av_chk_attr(qp, &attr->alt_ah_attr)) goto err1; if (!rdma_is_port_valid(&rxe->ib_dev, attr->alt_port_num)) { rxe_dbg_qp(qp, "invalid alt port %d\n", attr->alt_port_num); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index e6eca21c54e6..025b35bf014e 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -172,10 +172,6 @@ static int rxe_create_ah(struct ib_ah *ibah, ah->is_user = false; } - err = rxe_av_chk_attr(rxe, init_attr->ah_attr); - if (err) - return err; - err = rxe_add_to_pool_ah(&rxe->ah_pool, ah, init_attr->flags & RDMA_CREATE_AH_SLEEPABLE); if (err) @@ -184,6 +180,12 @@ static int rxe_create_ah(struct ib_ah *ibah, /* create index > 0 */ ah->ah_num = ah->elem.index; + err = rxe_ah_chk_attr(ah, init_attr->ah_attr); + if (err) { + rxe_cleanup(ah); + return err; + } + if (uresp) { /* only if new user provider */ err = copy_to_user(&uresp->ah_num, &ah->ah_num, @@ -206,10 +208,9 @@ static int rxe_create_ah(struct ib_ah *ibah, static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) { int err; - struct rxe_dev *rxe = to_rdev(ibah->device); struct rxe_ah *ah = to_rah(ibah); - err = rxe_av_chk_attr(rxe, attr); + err = rxe_ah_chk_attr(ah, attr); if (err) return err; -- cgit v1.2.3 From fc50597934411170ed149d3368b0b733bc5119f1 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:11 -0500 Subject: RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_task.c Replace calls to pr_xxx() in rxe_task.c with rxe_dbg_xxx(). Link: https://lore.kernel.org/r/20221103171013.20659-14-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_task.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index 0208d833a41b..60b90e33a884 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -29,6 +29,7 @@ static void do_task(struct tasklet_struct *t) int cont; int ret; struct rxe_task *task = from_tasklet(task, t, tasklet); + struct rxe_qp *qp = (struct rxe_qp *)task->arg; unsigned int iterations = RXE_MAX_ITERATIONS; spin_lock_bh(&task->lock); @@ -47,7 +48,7 @@ static void do_task(struct tasklet_struct *t) default: spin_unlock_bh(&task->lock); - pr_warn("%s failed with bad state %d\n", __func__, task->state); + rxe_dbg_qp(qp, "failed with bad state %d\n", task->state); return; } @@ -81,8 +82,8 @@ static void do_task(struct tasklet_struct *t) break; default: - pr_warn("%s failed with bad state %d\n", __func__, - task->state); + rxe_dbg_qp(qp, "failed with bad state %d\n", + task->state); } spin_unlock_bh(&task->lock); } while (cont); -- cgit v1.2.3 From c6aba5ea00550017629c56972b635f33bb6a6903 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:12 -0500 Subject: RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe.c Replace calls to pr_xxx() in rxe.c with rxe_dbg_xxx(). Calls with a rxe device not yet in scope are left as is. Link: https://lore.kernel.org/r/20221103171013.20659-15-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 51daac5c4feb..136c2efe3466 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -187,14 +187,14 @@ static int rxe_newlink(const char *ibdev_name, struct net_device *ndev) exists = rxe_get_dev_from_net(ndev); if (exists) { ib_device_put(&exists->ib_dev); - pr_err("already configured on %s\n", ndev->name); + rxe_dbg(exists, "already configured on %s\n", ndev->name); err = -EEXIST; goto err; } err = rxe_net_add(ibdev_name, ndev); if (err) { - pr_err("failed to add %s\n", ndev->name); + rxe_dbg(exists, "failed to add %s\n", ndev->name); goto err; } err: -- cgit v1.2.3 From 813728043b79a11f7029ba196decfc7f576ae487 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:13 -0500 Subject: RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_icrc.c Replace calls to pr_xxx() in rxe_icrc.c with rxe_dbg_xxx(). Link: https://lore.kernel.org/r/20221103171013.20659-16-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_icrc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_icrc.c b/drivers/infiniband/sw/rxe/rxe_icrc.c index 46bb07c5c4df..71bc2c189588 100644 --- a/drivers/infiniband/sw/rxe/rxe_icrc.c +++ b/drivers/infiniband/sw/rxe/rxe_icrc.c @@ -21,7 +21,7 @@ int rxe_icrc_init(struct rxe_dev *rxe) tfm = crypto_alloc_shash("crc32", 0, 0); if (IS_ERR(tfm)) { - pr_warn("failed to init crc32 algorithm err:%ld\n", + rxe_dbg(rxe, "failed to init crc32 algorithm err: %ld\n", PTR_ERR(tfm)); return PTR_ERR(tfm); } @@ -51,7 +51,7 @@ static __be32 rxe_crc32(struct rxe_dev *rxe, __be32 crc, void *next, size_t len) *(__be32 *)shash_desc_ctx(shash) = crc; err = crypto_shash_update(shash, next, len); if (unlikely(err)) { - pr_warn_ratelimited("failed crc calculation, err: %d\n", err); + rxe_dbg(rxe, "failed crc calculation, err: %d\n", err); return (__force __be32)crc32_le((__force u32)crc, next, len); } -- cgit v1.2.3 From 5de087250f1d8f7b81abaf94110884994793f073 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 3 Nov 2022 12:10:14 -0500 Subject: RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_mmap.c Replace calls to pr_xxx() in rxe_mmap.c with rxe_dbg_xxx(). Link: https://lore.kernel.org/r/20221103171013.20659-17-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mmap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c index 9149b6095429..a47d72dbc537 100644 --- a/drivers/infiniband/sw/rxe/rxe_mmap.c +++ b/drivers/infiniband/sw/rxe/rxe_mmap.c @@ -79,7 +79,7 @@ int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) /* Don't allow a mmap larger than the object. */ if (size > ip->info.size) { - pr_err("mmap region is larger than the object!\n"); + rxe_dbg(rxe, "mmap region is larger than the object!\n"); spin_unlock_bh(&rxe->pending_lock); ret = -EINVAL; goto done; @@ -87,7 +87,7 @@ int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) goto found_it; } - pr_warn("unable to find pending mmap info\n"); + rxe_dbg(rxe, "unable to find pending mmap info\n"); spin_unlock_bh(&rxe->pending_lock); ret = -EINVAL; goto done; @@ -98,7 +98,7 @@ found_it: ret = remap_vmalloc_range(vma, ip->obj, 0); if (ret) { - pr_err("err %d from remap_vmalloc_range\n", ret); + rxe_dbg(rxe, "err %d from remap_vmalloc_range\n", ret); goto done; } -- cgit v1.2.3 From 60da2d11fcbc043304910e4d2ca82f9bab953e63 Mon Sep 17 00:00:00 2001 From: Bernard Metzler Date: Tue, 15 Nov 2022 18:07:47 +0100 Subject: RDMA/siw: Set defined status for work completion with undefined status A malicious user may write undefined values into memory mapped completion queue elements status or opcode. Undefined status or opcode values will result in out-of-bounds access to an array mapping siw internal representation of opcode and status to RDMA core representation when reaping CQ elements. While siw detects those undefined values, it did not correctly set completion status to a defined value, thus defeating the whole purpose of the check. This bug leads to the following Smatch static checker warning: drivers/infiniband/sw/siw/siw_cq.c:96 siw_reap_cqe() error: buffer overflow 'map_cqe_status' 10 <= 21 Fixes: bdf1da5df9da ("RDMA/siw: Fix immediate work request flush to completion queue") Link: https://lore.kernel.org/r/20221115170747.1263298-1-bmt@zurich.ibm.com Reported-by: Dan Carpenter Signed-off-by: Bernard Metzler Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_cq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/siw/siw_cq.c b/drivers/infiniband/sw/siw/siw_cq.c index acc7bcd538b5..403029de6b92 100644 --- a/drivers/infiniband/sw/siw/siw_cq.c +++ b/drivers/infiniband/sw/siw/siw_cq.c @@ -88,9 +88,9 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc) if (opcode >= SIW_NUM_OPCODES) { opcode = 0; - status = IB_WC_GENERAL_ERR; + status = SIW_WC_GENERAL_ERR; } else if (status >= SIW_NUM_WC_STATUS) { - status = IB_WC_GENERAL_ERR; + status = SIW_WC_GENERAL_ERR; } wc->opcode = map_wc_opcode[opcode]; wc->status = map_cqe_status[status].ib; -- cgit v1.2.3 From 8e1a76493be9868fef34f977296a69769dcdfa6f Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Fri, 11 Nov 2022 21:35:37 -0500 Subject: RDMA/rxe: Remove reliable datagram support The rdma_rxe driver does not actually support the reliable datagram transport but contains a variable with RD opcodes in driver code. And this variable is never used. So remove it. Link: https://lore.kernel.org/r/20221112023537.432912-1-yanjun.zhu@intel.com Signed-off-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_hdr.h | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h index e432f9e37795..804594b76040 100644 --- a/drivers/infiniband/sw/rxe/rxe_hdr.h +++ b/drivers/infiniband/sw/rxe/rxe_hdr.h @@ -742,7 +742,6 @@ enum aeth_syndrome { AETH_NAK_INVALID_REQ = 0x61, AETH_NAK_REM_ACC_ERR = 0x62, AETH_NAK_REM_OP_ERR = 0x63, - AETH_NAK_INV_RD_REQ = 0x64, }; static inline u8 __aeth_syn(void *arg) -- cgit v1.2.3 From 7d984dac8f6bf4ebd3398af82b357e1d181ecaac Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Sun, 30 Oct 2022 03:04:33 +0000 Subject: RDMA/rxe: Fix mr->map double free rxe_mr_cleanup() which tries to free mr->map again will be called when rxe_mr_init_user() fails: CPU: 0 PID: 4917 Comm: rdma_flush_serv Kdump: loaded Not tainted 6.1.0-rc1-roce-flush+ #25 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x45/0x5d panic+0x19e/0x349 end_report.part.0+0x54/0x7c kasan_report.cold+0xa/0xf rxe_mr_cleanup+0x9d/0xf0 [rdma_rxe] __rxe_cleanup+0x10a/0x1e0 [rdma_rxe] rxe_reg_user_mr+0xb7/0xd0 [rdma_rxe] ib_uverbs_reg_mr+0x26a/0x480 [ib_uverbs] ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE+0x1a2/0x250 [ib_uverbs] ib_uverbs_cmd_verbs+0x1397/0x15a0 [ib_uverbs] This issue was firstly exposed since commit b18c7da63fcb ("RDMA/rxe: Fix memory leak in error path code") and then we fixed it in commit 8ff5f5d9d8cf ("RDMA/rxe: Prevent double freeing rxe_map_set()") but this fix was reverted together at last by commit 1e75550648da (Revert "RDMA/rxe: Create duplicate mapping tables for FMRs") Simply let rxe_mr_cleanup() always handle freeing the mr->map once it is successfully allocated. Fixes: 1e75550648da ("Revert "RDMA/rxe: Create duplicate mapping tables for FMRs"") Link: https://lore.kernel.org/r/1667099073-2-1-git-send-email-lizhijian@fujitsu.com Signed-off-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mr.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index cd846cf82a84..b1423000e4bc 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -97,6 +97,7 @@ err2: kfree(mr->map[i]); kfree(mr->map); + mr->map = NULL; err1: return -ENOMEM; } @@ -120,7 +121,6 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, int num_buf; void *vaddr; int err; - int i; umem = ib_umem_get(&rxe->ib_dev, start, length, access); if (IS_ERR(umem)) { @@ -159,9 +159,8 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, if (!vaddr) { rxe_dbg_mr(mr, "Unable to get virtual address\n"); err = -ENOMEM; - goto err_cleanup_map; + goto err_release_umem; } - buf->addr = (uintptr_t)vaddr; buf->size = PAGE_SIZE; num_buf++; @@ -178,10 +177,6 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, return 0; -err_cleanup_map: - for (i = 0; i < mr->num_map; i++) - kfree(mr->map[i]); - kfree(mr->map); err_release_umem: ib_umem_release(umem); err_out: -- cgit v1.2.3 From cb6562c380832a930ffd1722ac9d479b454aed4e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 22 Nov 2022 15:37:39 -0400 Subject: RDMA/rxe: Do not NULL deref on debugging failure path Correct the mistake, mr is obviously NULL in this code path. Fixes: 2778b72b1df0 ("RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_mr.c") Link: https://lore.kernel.org/r/Y3eeJW0AdyJYhYyQ@kili Reported-by: Dan Carpenter Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index b1423000e4bc..b7c9ff1ddf0e 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -519,7 +519,7 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 key) mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8); if (!mr) { - rxe_dbg_mr(mr, "No MR for key %#x\n", key); + rxe_dbg_qp(qp, "No MR for key %#x\n", key); ret = -EINVAL; goto err; } -- cgit v1.2.3 From f67376d801499f4fa0838c18c1efcad8840e550d Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Tue, 22 Nov 2022 23:14:37 +0800 Subject: RDMA/rxe: Fix NULL-ptr-deref in rxe_qp_do_cleanup() when socket create failed There is a null-ptr-deref when mount.cifs over rdma: BUG: KASAN: null-ptr-deref in rxe_qp_do_cleanup+0x2f3/0x360 [rdma_rxe] Read of size 8 at addr 0000000000000018 by task mount.cifs/3046 CPU: 2 PID: 3046 Comm: mount.cifs Not tainted 6.1.0-rc5+ #62 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc3 Call Trace: dump_stack_lvl+0x34/0x44 kasan_report+0xad/0x130 rxe_qp_do_cleanup+0x2f3/0x360 [rdma_rxe] execute_in_process_context+0x25/0x90 __rxe_cleanup+0x101/0x1d0 [rdma_rxe] rxe_create_qp+0x16a/0x180 [rdma_rxe] create_qp.part.0+0x27d/0x340 ib_create_qp_kernel+0x73/0x160 rdma_create_qp+0x100/0x230 _smbd_get_connection+0x752/0x20f0 smbd_get_connection+0x21/0x40 cifs_get_tcp_session+0x8ef/0xda0 mount_get_conns+0x60/0x750 cifs_mount+0x103/0xd00 cifs_smb3_do_mount+0x1dd/0xcb0 smb3_get_tree+0x1d5/0x300 vfs_get_tree+0x41/0xf0 path_mount+0x9b3/0xdd0 __x64_sys_mount+0x190/0x1d0 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 The root cause of the issue is the socket create failed in rxe_qp_init_req(). So move the reset rxe_qp_do_cleanup() after the NULL ptr check. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20221122151437.1057671-1-zhangxiaoxu5@huawei.com Signed-off-by: Zhang Xiaoxu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_qp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 46f6c74ce00e..ab72db68b58f 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -817,12 +817,12 @@ static void rxe_qp_do_cleanup(struct work_struct *work) if (qp->resp.mr) rxe_put(qp->resp.mr); - if (qp_type(qp) == IB_QPT_RC) - sk_dst_reset(qp->sk->sk); - free_rd_atomic_resources(qp); if (qp->sk) { + if (qp_type(qp) == IB_QPT_RC) + sk_dst_reset(qp->sk->sk); + kernel_sock_shutdown(qp->sk, SHUT_RDWR); sock_release(qp->sk); } -- cgit v1.2.3 From 5c7af6c7938466aa2f3c52057f4dd28b4a1e9e42 Mon Sep 17 00:00:00 2001 From: Xiao Yang Date: Thu, 1 Dec 2022 14:37:08 +0000 Subject: RDMA/rxe: Extend rxe packet format to support atomic write Extend rxe_wr_opcode_info[] and rxe_opcode[] for new atomic write opcode. Link: https://lore.kernel.org/r/1669905432-14-5-git-send-email-yangx.jy@fujitsu.com Signed-off-by: Xiao Yang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_opcode.c | 18 ++++++++++++++++++ drivers/infiniband/sw/rxe/rxe_opcode.h | 3 +++ 2 files changed, 21 insertions(+) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c index d4ba4d506f17..fb196029048e 100644 --- a/drivers/infiniband/sw/rxe/rxe_opcode.c +++ b/drivers/infiniband/sw/rxe/rxe_opcode.c @@ -101,6 +101,12 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = { [IB_QPT_UC] = WR_LOCAL_OP_MASK, }, }, + [IB_WR_ATOMIC_WRITE] = { + .name = "IB_WR_ATOMIC_WRITE", + .mask = { + [IB_QPT_RC] = WR_ATOMIC_WRITE_MASK, + }, + }, }; struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { @@ -378,6 +384,18 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { RXE_IETH_BYTES, } }, + [IB_OPCODE_RC_ATOMIC_WRITE] = { + .name = "IB_OPCODE_RC_ATOMIC_WRITE", + .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_ATOMIC_WRITE_MASK | RXE_START_MASK | + RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_RETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + RXE_RETH_BYTES, + } + }, /* UC */ [IB_OPCODE_UC_SEND_FIRST] = { diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h index 8f9aaaf260f2..a470e9b0b884 100644 --- a/drivers/infiniband/sw/rxe/rxe_opcode.h +++ b/drivers/infiniband/sw/rxe/rxe_opcode.h @@ -20,6 +20,7 @@ enum rxe_wr_mask { WR_READ_MASK = BIT(3), WR_WRITE_MASK = BIT(4), WR_LOCAL_OP_MASK = BIT(5), + WR_ATOMIC_WRITE_MASK = BIT(7), WR_READ_OR_WRITE_MASK = WR_READ_MASK | WR_WRITE_MASK, WR_WRITE_OR_SEND_MASK = WR_WRITE_MASK | WR_SEND_MASK, @@ -81,6 +82,8 @@ enum rxe_hdr_mask { RXE_LOOPBACK_MASK = BIT(NUM_HDR_TYPES + 12), + RXE_ATOMIC_WRITE_MASK = BIT(NUM_HDR_TYPES + 14), + RXE_READ_OR_ATOMIC_MASK = (RXE_READ_MASK | RXE_ATOMIC_MASK), RXE_WRITE_OR_SEND_MASK = (RXE_WRITE_MASK | RXE_SEND_MASK), RXE_READ_OR_WRITE_MASK = (RXE_READ_MASK | RXE_WRITE_MASK), -- cgit v1.2.3 From abb633cf28049e6a7c37c44f83a8584f7dbded7d Mon Sep 17 00:00:00 2001 From: Xiao Yang Date: Thu, 1 Dec 2022 14:39:25 +0000 Subject: RDMA/rxe: Make requester support atomic write on RC service Make requester process and send an atomic write request on RC service. Link: https://lore.kernel.org/r/1669905568-62-1-git-send-email-yangx.jy@fujitsu.com Signed-off-by: Xiao Yang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_req.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 4d45f508392f..2713e9058922 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -258,6 +258,10 @@ static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits) else return fits ? IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE : IB_OPCODE_RC_SEND_FIRST; + + case IB_WR_ATOMIC_WRITE: + return IB_OPCODE_RC_ATOMIC_WRITE; + case IB_WR_REG_MR: case IB_WR_LOCAL_INV: return opcode; @@ -486,6 +490,11 @@ static int finish_packet(struct rxe_qp *qp, struct rxe_av *av, } } + if (pkt->mask & RXE_ATOMIC_WRITE_MASK) { + memcpy(payload_addr(pkt), wqe->dma.atomic_wr, payload); + wqe->dma.resid -= payload; + } + return 0; } @@ -709,13 +718,15 @@ int rxe_requester(void *arg) } mask = rxe_opcode[opcode].mask; - if (unlikely(mask & RXE_READ_OR_ATOMIC_MASK)) { + if (unlikely(mask & (RXE_READ_OR_ATOMIC_MASK | + RXE_ATOMIC_WRITE_MASK))) { if (check_init_depth(qp, wqe)) goto exit; } mtu = get_mtu(qp); - payload = (mask & RXE_WRITE_OR_SEND_MASK) ? wqe->dma.resid : 0; + payload = (mask & (RXE_WRITE_OR_SEND_MASK | RXE_ATOMIC_WRITE_MASK)) ? + wqe->dma.resid : 0; if (payload > mtu) { if (qp_type(qp) == IB_QPT_UD) { /* C10-93.1.1: If the total sum of all the buffer lengths specified for a -- cgit v1.2.3 From 034e285f8b99062a0cf29112e1232154a6a44aa5 Mon Sep 17 00:00:00 2001 From: Xiao Yang Date: Thu, 1 Dec 2022 14:39:26 +0000 Subject: RDMA/rxe: Make responder support atomic write on RC service Make responder process an atomic write request and send a read response on RC service. Link: https://lore.kernel.org/r/1669905568-62-2-git-send-email-yangx.jy@fujitsu.com Signed-off-by: Xiao Yang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_resp.c | 84 +++++++++++++++++++++++++++++++++--- 1 file changed, 79 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 6761bcd1d4d8..6ac544477f3f 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -22,6 +22,7 @@ enum resp_states { RESPST_EXECUTE, RESPST_READ_REPLY, RESPST_ATOMIC_REPLY, + RESPST_ATOMIC_WRITE_REPLY, RESPST_COMPLETE, RESPST_ACKNOWLEDGE, RESPST_CLEANUP, @@ -57,6 +58,7 @@ static char *resp_state_name[] = { [RESPST_EXECUTE] = "EXECUTE", [RESPST_READ_REPLY] = "READ_REPLY", [RESPST_ATOMIC_REPLY] = "ATOMIC_REPLY", + [RESPST_ATOMIC_WRITE_REPLY] = "ATOMIC_WRITE_REPLY", [RESPST_COMPLETE] = "COMPLETE", [RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE", [RESPST_CLEANUP] = "CLEANUP", @@ -263,7 +265,7 @@ static enum resp_states check_op_valid(struct rxe_qp *qp, case IB_QPT_RC: if (((pkt->mask & RXE_READ_MASK) && !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) || - ((pkt->mask & RXE_WRITE_MASK) && + ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) && !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) || ((pkt->mask & RXE_ATOMIC_MASK) && !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) { @@ -367,7 +369,7 @@ static enum resp_states check_resource(struct rxe_qp *qp, } } - if (pkt->mask & RXE_READ_OR_ATOMIC_MASK) { + if (pkt->mask & (RXE_READ_OR_ATOMIC_MASK | RXE_ATOMIC_WRITE_MASK)) { /* it is the requesters job to not send * too many read/atomic ops, we just * recycle the responder resource queue @@ -438,7 +440,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp, enum resp_states state; int access; - if (pkt->mask & RXE_READ_OR_WRITE_MASK) { + if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { if (pkt->mask & RXE_RETH_MASK) { qp->resp.va = reth_va(pkt); qp->resp.offset = 0; @@ -504,7 +506,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp, goto err; } - if (pkt->mask & RXE_WRITE_MASK) { + if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { if (resid > mtu) { if (pktlen != mtu || bth_pad(pkt)) { state = RESPST_ERR_LENGTH; @@ -604,6 +606,7 @@ static struct resp_res *rxe_prepare_res(struct rxe_qp *qp, res->state = rdatm_res_state_new; break; case RXE_ATOMIC_MASK: + case RXE_ATOMIC_WRITE_MASK: res->first_psn = pkt->psn; res->last_psn = pkt->psn; res->cur_psn = pkt->psn; @@ -673,6 +676,55 @@ out: return ret; } +static enum resp_states atomic_write_reply(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + u64 src, *dst; + struct resp_res *res = qp->resp.res; + struct rxe_mr *mr = qp->resp.mr; + int payload = payload_size(pkt); + + if (!res) { + res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK); + qp->resp.res = res; + } + + if (!res->replay) { +#ifdef CONFIG_64BIT + if (mr->state != RXE_MR_STATE_VALID) + return RESPST_ERR_RKEY_VIOLATION; + + memcpy(&src, payload_addr(pkt), payload); + + dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload); + /* check vaddr is 8 bytes aligned. */ + if (!dst || (uintptr_t)dst & 7) + return RESPST_ERR_MISALIGNED_ATOMIC; + + /* Do atomic write after all prior operations have completed */ + smp_store_release(dst, src); + + /* decrease resp.resid to zero */ + qp->resp.resid -= sizeof(payload); + + qp->resp.msn++; + + /* next expected psn, read handles this separately */ + qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; + qp->resp.ack_psn = qp->resp.psn; + + qp->resp.opcode = pkt->opcode; + qp->resp.status = IB_WC_SUCCESS; + + return RESPST_ACKNOWLEDGE; +#else + return RESPST_ERR_UNSUPPORTED_OPCODE; +#endif /* CONFIG_64BIT */ + } + + return RESPST_ACKNOWLEDGE; +} + static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, struct rxe_pkt_info *ack, int opcode, @@ -912,6 +964,8 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) return RESPST_READ_REPLY; } else if (pkt->mask & RXE_ATOMIC_MASK) { return RESPST_ATOMIC_REPLY; + } else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) { + return RESPST_ATOMIC_WRITE_REPLY; } else { /* Unreachable */ WARN_ON_ONCE(1); @@ -1085,6 +1139,19 @@ static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) return ret; } +static int send_read_response_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) +{ + int ret = send_common_ack(qp, syndrome, psn, + IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY, + "RDMA READ response of length zero ACK"); + + /* have to clear this since it is used to trigger + * long read replies + */ + qp->resp.res = NULL; + return ret; +} + static enum resp_states acknowledge(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { @@ -1095,6 +1162,8 @@ static enum resp_states acknowledge(struct rxe_qp *qp, send_ack(qp, qp->resp.aeth_syndrome, pkt->psn); else if (pkt->mask & RXE_ATOMIC_MASK) send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); + else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) + send_read_response_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); else if (bth_ack(pkt)) send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); @@ -1206,7 +1275,9 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, res->replay = 1; res->cur_psn = pkt->psn; qp->resp.res = res; - rc = RESPST_ATOMIC_REPLY; + rc = pkt->mask & RXE_ATOMIC_MASK ? + RESPST_ATOMIC_REPLY : + RESPST_ATOMIC_WRITE_REPLY; goto out; } @@ -1343,6 +1414,9 @@ int rxe_responder(void *arg) case RESPST_ATOMIC_REPLY: state = atomic_reply(qp, pkt); break; + case RESPST_ATOMIC_WRITE_REPLY: + state = atomic_write_reply(qp, pkt); + break; case RESPST_ACKNOWLEDGE: state = acknowledge(qp, pkt); break; -- cgit v1.2.3 From 3aec427bb1499bd3325d2e251edb729a5a8643df Mon Sep 17 00:00:00 2001 From: Xiao Yang Date: Thu, 1 Dec 2022 14:39:27 +0000 Subject: RDMA/rxe: Implement atomic write completion Generate an atomic write completion when the atomic write request has been finished. Link: https://lore.kernel.org/r/1669905568-62-3-git-send-email-yangx.jy@fujitsu.com Signed-off-by: Xiao Yang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_comp.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 4dca4f8bbb5a..1c525325e271 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -104,6 +104,7 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode) case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV; case IB_WR_REG_MR: return IB_WC_REG_MR; case IB_WR_BIND_MW: return IB_WC_BIND_MW; + case IB_WR_ATOMIC_WRITE: return IB_WC_ATOMIC_WRITE; default: return 0xff; @@ -269,6 +270,9 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, if ((syn & AETH_TYPE_MASK) != AETH_ACK) return COMPST_ERROR; + if (wqe->wr.opcode == IB_WR_ATOMIC_WRITE) + return COMPST_WRITE_SEND; + fallthrough; /* (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE doesn't have an AETH) */ -- cgit v1.2.3 From 4cd9f1d320f905e7bc60f030566d15003745ba91 Mon Sep 17 00:00:00 2001 From: Xiao Yang Date: Thu, 1 Dec 2022 14:39:28 +0000 Subject: RDMA/rxe: Enable atomic write capability for rxe device The capability shows that rxe device supports atomic write operation. Link: https://lore.kernel.org/r/1669905568-62-4-git-send-email-yangx.jy@fujitsu.com Signed-off-by: Xiao Yang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_param.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index 86c7a8bf3cbb..bbc88cd71d95 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -51,7 +51,12 @@ enum rxe_device_param { | IB_DEVICE_SRQ_RESIZE | IB_DEVICE_MEM_MGT_EXTENSIONS | IB_DEVICE_MEM_WINDOW +#ifdef CONFIG_64BIT + | IB_DEVICE_MEM_WINDOW_TYPE_2B + | IB_DEVICE_ATOMIC_WRITE, +#else | IB_DEVICE_MEM_WINDOW_TYPE_2B, +#endif /* CONFIG_64BIT */ RXE_MAX_SGE = 32, RXE_MAX_WQE_SIZE = sizeof(struct rxe_send_wqe) + sizeof(struct ib_sge) * RXE_MAX_SGE, -- cgit v1.2.3 From 3282a549cf9b300e2d1b007925ed007ab24e4131 Mon Sep 17 00:00:00 2001 From: Daisuke Matsuda Date: Fri, 9 Dec 2022 13:59:26 +0900 Subject: RDMA/rxe: Fix oops with zero length reads The commit 686d348476ee ("RDMA/rxe: Remove unnecessary mr testing") causes a kernel crash. If responder get a zero-byte RDMA Read request, qp->resp.mr is not set in check_rkey() (see IBA C9-88). The mr is NULL in this case, and a NULL pointer dereference occurs as shown below. BUG: kernel NULL pointer dereference, address: 0000000000000010 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 0 P4D 0 Oops: 0002 [#1] PREEMPT SMP PTI CPU: 2 PID: 3622 Comm: python3 Kdump: loaded Not tainted 6.1.0-rc3+ #34 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 RIP: 0010:__rxe_put+0xc/0x60 [rdma_rxe] Code: cc cc cc 31 f6 e8 64 36 1b d3 41 b8 01 00 00 00 44 89 c0 c3 cc cc cc cc 41 89 c0 eb c1 90 0f 1f 44 00 00 41 54 b8 ff ff ff ff 0f c1 47 10 83 f8 01 74 11 45 31 e4 85 c0 7e 20 44 89 e0 41 5c RSP: 0018:ffffb27bc012ce78 EFLAGS: 00010246 RAX: 00000000ffffffff RBX: ffff9790857b0580 RCX: 0000000000000000 RDX: ffff979080fe145a RSI: 000055560e3e0000 RDI: 0000000000000000 RBP: ffff97909c7dd800 R08: 0000000000000001 R09: e7ce43d97f7bed0f R10: ffff97908b29c300 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: ffff97908b29c300 R15: 0000000000000000 FS: 00007f276f7bd740(0000) GS:ffff9792b5c80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000010 CR3: 0000000114230002 CR4: 0000000000060ee0 Call Trace: read_reply+0xda/0x310 [rdma_rxe] rxe_responder+0x82d/0xe50 [rdma_rxe] do_task+0x84/0x170 [rdma_rxe] tasklet_action_common.constprop.0+0xa7/0x120 __do_softirq+0xcb/0x2ac do_softirq+0x63/0x90 Support a NULL mr during read_reply() Fixes: 686d348476ee ("RDMA/rxe: Remove unnecessary mr testing") Fixes: b5f9a01fae42 ("RDMA/rxe: Fix mr leak in RESPST_ERR_RNR") Link: https://lore.kernel.org/r/20221209045926.531689-1-matsuda-daisuke@fujitsu.com Link: https://lore.kernel.org/r/20221202145713.13152-1-lizhijian@fujitsu.com Signed-off-by: Daisuke Matsuda Signed-off-by: Li Zhijian Reviewed-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_resp.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 12eb85e8d415..0c8bec281937 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -460,7 +460,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp, return RESPST_EXECUTE; } - /* A zero-byte op is not required to set an addr or rkey. */ + /* A zero-byte op is not required to set an addr or rkey. See C9-88 */ if ((pkt->mask & RXE_READ_OR_WRITE_MASK) && (pkt->mask & RXE_RETH_MASK) && reth_len(pkt) == 0) { @@ -880,13 +880,15 @@ static enum resp_states read_reply(struct rxe_qp *qp, skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload, res->cur_psn, AETH_ACK_UNLIMITED); if (!skb) { - rxe_put(mr); + if (mr) + rxe_put(mr); return RESPST_ERR_RNR; } err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt), payload, RXE_FROM_MR_OBJ); - rxe_put(mr); + if (mr) + rxe_put(mr); if (err) { kfree_skb(skb); return RESPST_ERR_RKEY_VIOLATION; -- cgit v1.2.3 From 689c5421bfe0eac65526bd97a466b9590a6aad3c Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 8 Dec 2022 15:09:46 -0600 Subject: RDMA/rxe: Fix incorrect responder length checking The code in rxe_resp.c at check_length() is incorrect as it compares pkt->opcode an 8 bit value against various mask bits which are all higher than 256 so nothing is ever reported. This patch rewrites this to compare against pkt->mask which is correct. However this now exposes another error. For UD send packets the value of the pmtu cannot be determined from qp->mtu. All that is required here is to later check if the payload fits into the posted receive buffer in that case. Fixes: 837a55847ead ("RDMA/rxe: Implement packet length validation on responder") Link: https://lore.kernel.org/r/20221208210945.28607-1-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Reviewed-by: Daisuke Matsuda Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_resp.c | 62 +++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 26 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 0c8bec281937..abbaa41017e8 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -392,36 +392,46 @@ static enum resp_states check_resource(struct rxe_qp *qp, return RESPST_CHK_LENGTH; } -static enum resp_states check_length(struct rxe_qp *qp, - struct rxe_pkt_info *pkt) +static enum resp_states rxe_resp_check_length(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) { - int mtu = qp->mtu; - u32 payload = payload_size(pkt); - u32 dmalen = reth_len(pkt); - - /* RoCEv2 packets do not have LRH. - * Let's skip checking it. + /* + * See IBA C9-92 + * For UD QPs we only check if the packet will fit in the + * receive buffer later. For rmda operations additional + * length checks are performed in check_rkey. */ - - if ((pkt->opcode & RXE_START_MASK) && - (pkt->opcode & RXE_END_MASK)) { - /* "only" packets */ - if (payload > mtu) - return RESPST_ERR_LENGTH; - } else if ((pkt->opcode & RXE_START_MASK) || - (pkt->opcode & RXE_MIDDLE_MASK)) { - /* "first" or "middle" packets */ - if (payload != mtu) - return RESPST_ERR_LENGTH; - } else if (pkt->opcode & RXE_END_MASK) { - /* "last" packets */ - if ((payload == 0) || (payload > mtu)) - return RESPST_ERR_LENGTH; + if (pkt->mask & RXE_PAYLOAD_MASK && ((qp_type(qp) == IB_QPT_RC) || + (qp_type(qp) == IB_QPT_UC))) { + unsigned int mtu = qp->mtu; + unsigned int payload = payload_size(pkt); + + if ((pkt->mask & RXE_START_MASK) && + (pkt->mask & RXE_END_MASK)) { + if (unlikely(payload > mtu)) { + rxe_dbg_qp(qp, "only packet too long"); + return RESPST_ERR_LENGTH; + } + } else if ((pkt->mask & RXE_START_MASK) || + (pkt->mask & RXE_MIDDLE_MASK)) { + if (unlikely(payload != mtu)) { + rxe_dbg_qp(qp, "first or middle packet not mtu"); + return RESPST_ERR_LENGTH; + } + } else if (pkt->mask & RXE_END_MASK) { + if (unlikely((payload == 0) || (payload > mtu))) { + rxe_dbg_qp(qp, "last packet zero or too long"); + return RESPST_ERR_LENGTH; + } + } } - if (pkt->opcode & (RXE_WRITE_MASK | RXE_READ_MASK)) { - if (dmalen > (1 << 31)) + /* See IBA C9-94 */ + if (pkt->mask & RXE_RETH_MASK) { + if (reth_len(pkt) > (1U << 31)) { + rxe_dbg_qp(qp, "dma length too long"); return RESPST_ERR_LENGTH; + } } return RESPST_CHK_RKEY; @@ -1401,7 +1411,7 @@ int rxe_responder(void *arg) state = check_resource(qp, pkt); break; case RESPST_CHK_LENGTH: - state = check_length(qp, pkt); + state = rxe_resp_check_length(qp, pkt); break; case RESPST_CHK_RKEY: state = check_rkey(qp, pkt); -- cgit v1.2.3 From 02ea0a511558c907bde0e01fdebcd4536924d996 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Tue, 6 Dec 2022 21:01:55 +0800 Subject: RDMA/rxe: Allow registering persistent flag for pmem MR only Memory region could support at most 2 flush access flags: IB_ACCESS_FLUSH_PERSISTENT and IB_ACCESS_FLUSH_GLOBAL But we only allow user to register persistent flush flags to the pmem MR where it has the ability of persisting data across power cycles. So registering a persistent access flag to a non-pmem MR will be rejected. Link: https://lore.kernel.org/r/20221206130201.30986-5-lizhijian@fujitsu.com CC: Dan Williams Signed-off-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mr.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index b7c9ff1ddf0e..81a438e5010a 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -111,6 +111,15 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr) mr->ibmr.type = IB_MR_TYPE_DMA; } +static bool is_pmem_page(struct page *pg) +{ + unsigned long paddr = page_to_phys(pg); + + return REGION_INTERSECTS == + region_intersects(paddr, PAGE_SIZE, IORESOURCE_MEM, + IORES_DESC_PERSISTENT_MEMORY); +} + int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, int access, struct rxe_mr *mr) { @@ -146,16 +155,25 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, num_buf = 0; map = mr->map; if (length > 0) { - buf = map[0]->buf; + bool persistent_access = access & IB_ACCESS_FLUSH_PERSISTENT; + buf = map[0]->buf; for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) { + struct page *pg = sg_page_iter_page(&sg_iter); + + if (persistent_access && !is_pmem_page(pg)) { + rxe_dbg_mr(mr, "Unable to register persistent access to non-pmem device\n"); + err = -EINVAL; + goto err_release_umem; + } + if (num_buf >= RXE_BUF_PER_MAP) { map++; buf = map[0]->buf; num_buf = 0; } - vaddr = page_address(sg_page_iter_page(&sg_iter)); + vaddr = page_address(pg); if (!vaddr) { rxe_dbg_mr(mr, "Unable to get virtual address\n"); err = -ENOMEM; -- cgit v1.2.3 From 02e9a31c897d17981508ceaac4430b93ff56ffc7 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Tue, 6 Dec 2022 21:01:56 +0800 Subject: RDMA/rxe: Extend rxe packet format to support flush Extend rxe opcode tables, headers, helper and constants to support flush operations. Refer to the IBA A19.4.1 for more FETH definition details Link: https://lore.kernel.org/r/20221206130201.30986-6-lizhijian@fujitsu.com Reviewed-by: Zhu Yanjun Signed-off-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_hdr.h | 47 ++++++++++++++++++++++++++++++++++ drivers/infiniband/sw/rxe/rxe_opcode.c | 17 ++++++++++++ drivers/infiniband/sw/rxe/rxe_opcode.h | 14 ++++++---- 3 files changed, 73 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h index 804594b76040..46f82b27fcd2 100644 --- a/drivers/infiniband/sw/rxe/rxe_hdr.h +++ b/drivers/infiniband/sw/rxe/rxe_hdr.h @@ -607,6 +607,52 @@ static inline void reth_set_len(struct rxe_pkt_info *pkt, u32 len) rxe_opcode[pkt->opcode].offset[RXE_RETH], len); } +/****************************************************************************** + * FLUSH Extended Transport Header + ******************************************************************************/ + +struct rxe_feth { + __be32 bits; +}; + +#define FETH_PLT_MASK (0x0000000f) /* bits 3-0 */ +#define FETH_SEL_MASK (0x00000030) /* bits 5-4 */ +#define FETH_SEL_SHIFT (4U) + +static inline u32 __feth_plt(void *arg) +{ + struct rxe_feth *feth = arg; + + return be32_to_cpu(feth->bits) & FETH_PLT_MASK; +} + +static inline u32 __feth_sel(void *arg) +{ + struct rxe_feth *feth = arg; + + return (be32_to_cpu(feth->bits) & FETH_SEL_MASK) >> FETH_SEL_SHIFT; +} + +static inline u32 feth_plt(struct rxe_pkt_info *pkt) +{ + return __feth_plt(pkt->hdr + rxe_opcode[pkt->opcode].offset[RXE_FETH]); +} + +static inline u32 feth_sel(struct rxe_pkt_info *pkt) +{ + return __feth_sel(pkt->hdr + rxe_opcode[pkt->opcode].offset[RXE_FETH]); +} + +static inline void feth_init(struct rxe_pkt_info *pkt, u8 type, u8 level) +{ + struct rxe_feth *feth = (struct rxe_feth *) + (pkt->hdr + rxe_opcode[pkt->opcode].offset[RXE_FETH]); + u32 bits = ((level << FETH_SEL_SHIFT) & FETH_SEL_MASK) | + (type & FETH_PLT_MASK); + + feth->bits = cpu_to_be32(bits); +} + /****************************************************************************** * Atomic Extended Transport Header ******************************************************************************/ @@ -909,6 +955,7 @@ enum rxe_hdr_length { RXE_ATMETH_BYTES = sizeof(struct rxe_atmeth), RXE_IETH_BYTES = sizeof(struct rxe_ieth), RXE_RDETH_BYTES = sizeof(struct rxe_rdeth), + RXE_FETH_BYTES = sizeof(struct rxe_feth), }; static inline size_t header_size(struct rxe_pkt_info *pkt) diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c index fb196029048e..5c0d5c6ffda4 100644 --- a/drivers/infiniband/sw/rxe/rxe_opcode.c +++ b/drivers/infiniband/sw/rxe/rxe_opcode.c @@ -101,6 +101,12 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = { [IB_QPT_UC] = WR_LOCAL_OP_MASK, }, }, + [IB_WR_FLUSH] = { + .name = "IB_WR_FLUSH", + .mask = { + [IB_QPT_RC] = WR_FLUSH_MASK, + }, + }, [IB_WR_ATOMIC_WRITE] = { .name = "IB_WR_ATOMIC_WRITE", .mask = { @@ -384,6 +390,17 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { RXE_IETH_BYTES, } }, + [IB_OPCODE_RC_FLUSH] = { + .name = "IB_OPCODE_RC_FLUSH", + .mask = RXE_FETH_MASK | RXE_RETH_MASK | RXE_FLUSH_MASK | + RXE_START_MASK | RXE_END_MASK | RXE_REQ_MASK, + .length = RXE_BTH_BYTES + RXE_FETH_BYTES + RXE_RETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_FETH] = RXE_BTH_BYTES, + [RXE_RETH] = RXE_BTH_BYTES + RXE_FETH_BYTES, + } + }, [IB_OPCODE_RC_ATOMIC_WRITE] = { .name = "IB_OPCODE_RC_ATOMIC_WRITE", .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h index a470e9b0b884..cea4e0a63919 100644 --- a/drivers/infiniband/sw/rxe/rxe_opcode.h +++ b/drivers/infiniband/sw/rxe/rxe_opcode.h @@ -20,6 +20,7 @@ enum rxe_wr_mask { WR_READ_MASK = BIT(3), WR_WRITE_MASK = BIT(4), WR_LOCAL_OP_MASK = BIT(5), + WR_FLUSH_MASK = BIT(6), WR_ATOMIC_WRITE_MASK = BIT(7), WR_READ_OR_WRITE_MASK = WR_READ_MASK | WR_WRITE_MASK, @@ -48,6 +49,7 @@ enum rxe_hdr_type { RXE_RDETH, RXE_DETH, RXE_IMMDT, + RXE_FETH, RXE_PAYLOAD, NUM_HDR_TYPES }; @@ -64,6 +66,7 @@ enum rxe_hdr_mask { RXE_IETH_MASK = BIT(RXE_IETH), RXE_RDETH_MASK = BIT(RXE_RDETH), RXE_DETH_MASK = BIT(RXE_DETH), + RXE_FETH_MASK = BIT(RXE_FETH), RXE_PAYLOAD_MASK = BIT(RXE_PAYLOAD), RXE_REQ_MASK = BIT(NUM_HDR_TYPES + 0), @@ -72,13 +75,14 @@ enum rxe_hdr_mask { RXE_WRITE_MASK = BIT(NUM_HDR_TYPES + 3), RXE_READ_MASK = BIT(NUM_HDR_TYPES + 4), RXE_ATOMIC_MASK = BIT(NUM_HDR_TYPES + 5), + RXE_FLUSH_MASK = BIT(NUM_HDR_TYPES + 6), - RXE_RWR_MASK = BIT(NUM_HDR_TYPES + 6), - RXE_COMP_MASK = BIT(NUM_HDR_TYPES + 7), + RXE_RWR_MASK = BIT(NUM_HDR_TYPES + 7), + RXE_COMP_MASK = BIT(NUM_HDR_TYPES + 8), - RXE_START_MASK = BIT(NUM_HDR_TYPES + 8), - RXE_MIDDLE_MASK = BIT(NUM_HDR_TYPES + 9), - RXE_END_MASK = BIT(NUM_HDR_TYPES + 10), + RXE_START_MASK = BIT(NUM_HDR_TYPES + 9), + RXE_MIDDLE_MASK = BIT(NUM_HDR_TYPES + 10), + RXE_END_MASK = BIT(NUM_HDR_TYPES + 11), RXE_LOOPBACK_MASK = BIT(NUM_HDR_TYPES + 12), -- cgit v1.2.3 From fa1fd682ad3ef35b0e532c3bb14140786d17527c Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Tue, 6 Dec 2022 21:01:57 +0800 Subject: RDMA/rxe: Implement RC RDMA FLUSH service in requester side Implement FLUSH request operation in the requester. Link: https://lore.kernel.org/r/20221206130201.30986-7-lizhijian@fujitsu.com Reviewed-by: Zhu Yanjun Signed-off-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_req.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 2713e9058922..899c8779f800 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -241,6 +241,9 @@ static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits) IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE : IB_OPCODE_RC_SEND_FIRST; + case IB_WR_FLUSH: + return IB_OPCODE_RC_FLUSH; + case IB_WR_RDMA_READ: return IB_OPCODE_RC_RDMA_READ_REQUEST; @@ -425,11 +428,18 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, /* init optional headers */ if (pkt->mask & RXE_RETH_MASK) { - reth_set_rkey(pkt, ibwr->wr.rdma.rkey); + if (pkt->mask & RXE_FETH_MASK) + reth_set_rkey(pkt, ibwr->wr.flush.rkey); + else + reth_set_rkey(pkt, ibwr->wr.rdma.rkey); reth_set_va(pkt, wqe->iova); reth_set_len(pkt, wqe->dma.resid); } + /* Fill Flush Extension Transport Header */ + if (pkt->mask & RXE_FETH_MASK) + feth_init(pkt, ibwr->wr.flush.type, ibwr->wr.flush.level); + if (pkt->mask & RXE_IMMDT_MASK) immdt_set_imm(pkt, ibwr->ex.imm_data); @@ -488,6 +498,9 @@ static int finish_packet(struct rxe_qp *qp, struct rxe_av *av, memset(pad, 0, bth_pad(pkt)); } + } else if (pkt->mask & RXE_FLUSH_MASK) { + /* oA19-2: shall have no payload. */ + wqe->dma.resid = 0; } if (pkt->mask & RXE_ATOMIC_WRITE_MASK) { -- cgit v1.2.3 From ea1bb00ee9a5527b032a6efebe4a879db4cb42bb Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Tue, 6 Dec 2022 21:01:58 +0800 Subject: RDMA/rxe: Implement flush execution in responder side Only the requested placement types that also registered in the destination memory region are acceptable. Otherwise, responder will also reply NAK "Remote Access Error" if it found a placement type violation. We will persist data via arch_wb_cache_pmem(), which could be architecture specific. This commit also adds 2 helpers to update qp.resp from the incoming packet. Link: https://lore.kernel.org/r/20221206130201.30986-8-lizhijian@fujitsu.com Reviewed-by: Zhu Yanjun Signed-off-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_loc.h | 1 + drivers/infiniband/sw/rxe/rxe_mr.c | 36 ++++++++ drivers/infiniband/sw/rxe/rxe_resp.c | 160 +++++++++++++++++++++++++++++----- drivers/infiniband/sw/rxe/rxe_verbs.h | 6 ++ 4 files changed, 183 insertions(+), 20 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index a22476d27b38..948ce4902b10 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -64,6 +64,7 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr); int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, int access, struct rxe_mr *mr); int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr); +int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, int length); int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, enum rxe_mr_copy_dir dir); int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma, diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 81a438e5010a..072eac4b65d2 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -4,6 +4,8 @@ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. */ +#include + #include "rxe.h" #include "rxe_loc.h" @@ -192,6 +194,7 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, mr->offset = ib_umem_offset(umem); mr->state = RXE_MR_STATE_VALID; mr->ibmr.type = IB_MR_TYPE_USER; + mr->ibmr.page_size = PAGE_SIZE; return 0; @@ -295,6 +298,39 @@ out: return addr; } +int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, int length) +{ + size_t offset; + + if (length == 0) + return 0; + + if (mr->ibmr.type == IB_MR_TYPE_DMA) + return -EFAULT; + + offset = (iova - mr->ibmr.iova + mr->offset) & mr->page_mask; + while (length > 0) { + u8 *va; + int bytes; + + bytes = mr->ibmr.page_size - offset; + if (bytes > length) + bytes = length; + + va = iova_to_vaddr(mr, iova, length); + if (!va) + return -EFAULT; + + arch_wb_cache_pmem(va, bytes); + + length -= bytes; + iova += bytes; + offset = 0; + } + + return 0; +} + /* copy data from a range (vaddr, vaddr+length-1) to or from * a mr object starting at iova. */ diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index abbaa41017e8..7a60c7709da0 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -23,6 +23,7 @@ enum resp_states { RESPST_READ_REPLY, RESPST_ATOMIC_REPLY, RESPST_ATOMIC_WRITE_REPLY, + RESPST_PROCESS_FLUSH, RESPST_COMPLETE, RESPST_ACKNOWLEDGE, RESPST_CLEANUP, @@ -59,6 +60,7 @@ static char *resp_state_name[] = { [RESPST_READ_REPLY] = "READ_REPLY", [RESPST_ATOMIC_REPLY] = "ATOMIC_REPLY", [RESPST_ATOMIC_WRITE_REPLY] = "ATOMIC_WRITE_REPLY", + [RESPST_PROCESS_FLUSH] = "PROCESS_FLUSH", [RESPST_COMPLETE] = "COMPLETE", [RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE", [RESPST_CLEANUP] = "CLEANUP", @@ -258,19 +260,37 @@ static enum resp_states check_op_seq(struct rxe_qp *qp, } } +static bool check_qp_attr_access(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + if (((pkt->mask & RXE_READ_MASK) && + !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) || + ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) && + !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) || + ((pkt->mask & RXE_ATOMIC_MASK) && + !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) + return false; + + if (pkt->mask & RXE_FLUSH_MASK) { + u32 flush_type = feth_plt(pkt); + + if ((flush_type & IB_FLUSH_GLOBAL && + !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_GLOBAL)) || + (flush_type & IB_FLUSH_PERSISTENT && + !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_PERSISTENT))) + return false; + } + + return true; +} + static enum resp_states check_op_valid(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { switch (qp_type(qp)) { case IB_QPT_RC: - if (((pkt->mask & RXE_READ_MASK) && - !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) || - ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) && - !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) || - ((pkt->mask & RXE_ATOMIC_MASK) && - !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) { + if (!check_qp_attr_access(qp, pkt)) return RESPST_ERR_UNSUPPORTED_OPCODE; - } break; @@ -437,6 +457,23 @@ static enum resp_states rxe_resp_check_length(struct rxe_qp *qp, return RESPST_CHK_RKEY; } +static void qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt) +{ + qp->resp.va = reth_va(pkt); + qp->resp.offset = 0; + qp->resp.rkey = reth_rkey(pkt); + qp->resp.resid = reth_len(pkt); + qp->resp.length = reth_len(pkt); +} + +static void qp_resp_from_atmeth(struct rxe_qp *qp, struct rxe_pkt_info *pkt) +{ + qp->resp.va = atmeth_va(pkt); + qp->resp.offset = 0; + qp->resp.rkey = atmeth_rkey(pkt); + qp->resp.resid = sizeof(u64); +} + static enum resp_states check_rkey(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { @@ -448,23 +485,26 @@ static enum resp_states check_rkey(struct rxe_qp *qp, u32 pktlen; int mtu = qp->mtu; enum resp_states state; - int access; + int access = 0; if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { - if (pkt->mask & RXE_RETH_MASK) { - qp->resp.va = reth_va(pkt); - qp->resp.offset = 0; - qp->resp.rkey = reth_rkey(pkt); - qp->resp.resid = reth_len(pkt); - qp->resp.length = reth_len(pkt); - } + if (pkt->mask & RXE_RETH_MASK) + qp_resp_from_reth(qp, pkt); + access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ : IB_ACCESS_REMOTE_WRITE; + } else if (pkt->mask & RXE_FLUSH_MASK) { + u32 flush_type = feth_plt(pkt); + + if (pkt->mask & RXE_RETH_MASK) + qp_resp_from_reth(qp, pkt); + + if (flush_type & IB_FLUSH_GLOBAL) + access |= IB_ACCESS_FLUSH_GLOBAL; + if (flush_type & IB_FLUSH_PERSISTENT) + access |= IB_ACCESS_FLUSH_PERSISTENT; } else if (pkt->mask & RXE_ATOMIC_MASK) { - qp->resp.va = atmeth_va(pkt); - qp->resp.offset = 0; - qp->resp.rkey = atmeth_rkey(pkt); - qp->resp.resid = sizeof(u64); + qp_resp_from_atmeth(qp, pkt); access = IB_ACCESS_REMOTE_ATOMIC; } else { return RESPST_EXECUTE; @@ -511,11 +551,20 @@ static enum resp_states check_rkey(struct rxe_qp *qp, } } + if (pkt->mask & RXE_FLUSH_MASK) { + /* FLUSH MR may not set va or resid + * no need to check range since we will flush whole mr + */ + if (feth_sel(pkt) == IB_FLUSH_MR) + goto skip_check_range; + } + if (mr_check_range(mr, va + qp->resp.offset, resid)) { state = RESPST_ERR_RKEY_VIOLATION; goto err; } +skip_check_range: if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { if (resid > mtu) { if (pktlen != mtu || bth_pad(pkt)) { @@ -621,11 +670,61 @@ static struct resp_res *rxe_prepare_res(struct rxe_qp *qp, res->last_psn = pkt->psn; res->cur_psn = pkt->psn; break; + case RXE_FLUSH_MASK: + res->flush.va = qp->resp.va + qp->resp.offset; + res->flush.length = qp->resp.length; + res->flush.type = feth_plt(pkt); + res->flush.level = feth_sel(pkt); } return res; } +static enum resp_states process_flush(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + u64 length, start; + struct rxe_mr *mr = qp->resp.mr; + struct resp_res *res = qp->resp.res; + + /* oA19-14, oA19-15 */ + if (res && res->replay) + return RESPST_ACKNOWLEDGE; + else if (!res) { + res = rxe_prepare_res(qp, pkt, RXE_FLUSH_MASK); + qp->resp.res = res; + } + + if (res->flush.level == IB_FLUSH_RANGE) { + start = res->flush.va; + length = res->flush.length; + } else { /* level == IB_FLUSH_MR */ + start = mr->ibmr.iova; + length = mr->ibmr.length; + } + + if (res->flush.type & IB_FLUSH_PERSISTENT) { + if (rxe_flush_pmem_iova(mr, start, length)) + return RESPST_ERR_RKEY_VIOLATION; + /* Make data persistent. */ + wmb(); + } else if (res->flush.type & IB_FLUSH_GLOBAL) { + /* Make data global visibility. */ + wmb(); + } + + qp->resp.msn++; + + /* next expected psn, read handles this separately */ + qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; + qp->resp.ack_psn = qp->resp.psn; + + qp->resp.opcode = pkt->opcode; + qp->resp.status = IB_WC_SUCCESS; + + return RESPST_ACKNOWLEDGE; +} + /* Guarantee atomicity of atomic operations at the machine level. */ static DEFINE_SPINLOCK(atomic_ops_lock); @@ -980,6 +1079,8 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) return RESPST_ATOMIC_REPLY; } else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) { return RESPST_ATOMIC_WRITE_REPLY; + } else if (pkt->mask & RXE_FLUSH_MASK) { + return RESPST_PROCESS_FLUSH; } else { /* Unreachable */ WARN_ON_ONCE(1); @@ -1176,7 +1277,7 @@ static enum resp_states acknowledge(struct rxe_qp *qp, send_ack(qp, qp->resp.aeth_syndrome, pkt->psn); else if (pkt->mask & RXE_ATOMIC_MASK) send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); - else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) + else if (pkt->mask & (RXE_FLUSH_MASK | RXE_ATOMIC_WRITE_MASK)) send_read_response_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); else if (bth_ack(pkt)) send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); @@ -1234,6 +1335,22 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, /* SEND. Ack again and cleanup. C9-105. */ send_ack(qp, AETH_ACK_UNLIMITED, prev_psn); return RESPST_CLEANUP; + } else if (pkt->mask & RXE_FLUSH_MASK) { + struct resp_res *res; + + /* Find the operation in our list of responder resources. */ + res = find_resource(qp, pkt->psn); + if (res) { + res->replay = 1; + res->cur_psn = pkt->psn; + qp->resp.res = res; + rc = RESPST_PROCESS_FLUSH; + goto out; + } + + /* Resource not found. Class D error. Drop the request. */ + rc = RESPST_CLEANUP; + goto out; } else if (pkt->mask & RXE_READ_MASK) { struct resp_res *res; @@ -1431,6 +1548,9 @@ int rxe_responder(void *arg) case RESPST_ATOMIC_WRITE_REPLY: state = atomic_write_reply(qp, pkt); break; + case RESPST_PROCESS_FLUSH: + state = process_flush(qp, pkt); + break; case RESPST_ACKNOWLEDGE: state = acknowledge(qp, pkt); break; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 22a299b0a9f0..19ddfa890480 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -165,6 +165,12 @@ struct resp_res { u64 va; u32 resid; } read; + struct { + u32 length; + u64 va; + u8 type; + u8 level; + } flush; }; }; -- cgit v1.2.3 From 70aad902ce8aeb094dd3ef14988a652f24cce7c8 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Tue, 6 Dec 2022 21:01:59 +0800 Subject: RDMA/rxe: Implement flush completion Per IBA SPEC, FLUSH will ack in rdma read response with 0 length. Use IB_WC_FLUSH (aka IB_UVERBS_WC_FLUSH) code to tell userspace a FLUSH completion. Link: https://lore.kernel.org/r/20221206130201.30986-9-lizhijian@fujitsu.com Reviewed-by: Zhu Yanjun Signed-off-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_comp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 1c525325e271..20737fec392b 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -105,6 +105,7 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode) case IB_WR_REG_MR: return IB_WC_REG_MR; case IB_WR_BIND_MW: return IB_WC_BIND_MW; case IB_WR_ATOMIC_WRITE: return IB_WC_ATOMIC_WRITE; + case IB_WR_FLUSH: return IB_WC_FLUSH; default: return 0xff; @@ -278,7 +279,8 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, */ case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: if (wqe->wr.opcode != IB_WR_RDMA_READ && - wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV) { + wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV && + wqe->wr.opcode != IB_WR_FLUSH) { wqe->status = IB_WC_FATAL_ERR; return COMPST_ERROR; } -- cgit v1.2.3 From 124011e6e933bead5852c3f69b32dec43919fe1a Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Tue, 6 Dec 2022 21:02:01 +0800 Subject: RDMA/rxe: Enable RDMA FLUSH capability for rxe device Now we are ready to enable RDMA FLUSH capability for RXE. It can support Global Visibility and Persistence placement types. Link: https://lore.kernel.org/r/20221206130201.30986-11-lizhijian@fujitsu.com Reviewed-by: Zhu Yanjun Signed-off-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_param.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index bbc88cd71d95..a754fc902e3d 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -51,6 +51,8 @@ enum rxe_device_param { | IB_DEVICE_SRQ_RESIZE | IB_DEVICE_MEM_MGT_EXTENSIONS | IB_DEVICE_MEM_WINDOW + | IB_DEVICE_FLUSH_GLOBAL + | IB_DEVICE_FLUSH_PERSISTENT #ifdef CONFIG_64BIT | IB_DEVICE_MEM_WINDOW_TYPE_2B | IB_DEVICE_ATOMIC_WRITE, -- cgit v1.2.3