summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/sw
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-12-14 09:27:13 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-12-14 09:27:13 -0800
commitab425febda94c7d287ea3433cbd0971771d6aeb4 (patch)
tree66240fae1e9720214afda604635c3f0da0b9ebfa /drivers/infiniband/sw
parent08cdc2157966c07d3f986a097ddaa74cee312751 (diff)
parentdbc94a0fb81771a38733c0e8f2ea8c4fa6934dc1 (diff)
downloadlinux-ab425febda94c7d287ea3433cbd0971771d6aeb4.tar.bz2
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe: "Usual size of updates, a new driver, and most of the bulk focusing on rxe: - Usual typos, style, and language updates - Driver updates for mlx5, irdma, siw, rts, srp, hfi1, hns, erdma, mlx4, srp - Lots of RXE updates: * Improve reply error handling for bad MR operations * Code tidying * Debug printing uses common loggers * Remove half implemented RD related stuff * Support IBA's recently defined Atomic Write and Flush operations - erdma support for atomic operations - New driver 'mana' for Ethernet HW available in Azure VMs. This driver only supports DPDK" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (122 commits) IB/IPoIB: Fix queue count inconsistency for PKEY child interfaces RDMA: Add missed netdev_put() for the netdevice_tracker RDMA/rxe: Enable RDMA FLUSH capability for rxe device RDMA/cm: Make QP FLUSHABLE for supported device RDMA/rxe: Implement flush completion RDMA/rxe: Implement flush execution in responder side RDMA/rxe: Implement RC RDMA FLUSH service in requester side RDMA/rxe: Extend rxe packet format to support flush RDMA/rxe: Allow registering persistent flag for pmem MR only RDMA/rxe: Extend rxe user ABI to support flush RDMA: Extend RDMA kernel verbs ABI to support flush RDMA: Extend RDMA user ABI to support flush RDMA/rxe: Fix incorrect responder length checking RDMA/rxe: Fix oops with zero length reads RDMA/mlx5: Remove not-used IB_FLOW_SPEC_IB define RDMA/hns: Fix XRC caps on HIP08 RDMA/hns: Fix error code of CMD RDMA/hns: Fix page size cap from firmware RDMA/hns: Fix PBL page MTR find RDMA/hns: Fix AH attr queried by query_qp ...
Diffstat (limited to 'drivers/infiniband/sw')
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c4
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h19
-rw-r--r--drivers/infiniband/sw/rxe/rxe_av.c43
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c47
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c8
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hdr.h48
-rw-r--r--drivers/infiniband/sw/rxe/rxe_icrc.c4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h9
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mmap.c6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c122
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mw.c23
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c42
-rw-r--r--drivers/infiniband/sw/rxe/rxe_opcode.c35
-rw-r--r--drivers/infiniband/sw/rxe/rxe_opcode.h17
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h7
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c98
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c50
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c329
-rw-r--r--drivers/infiniband/sw/rxe/rxe_srq.c20
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.c52
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.h19
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c106
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h7
-rw-r--r--drivers/infiniband/sw/siw/siw_cq.c24
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.c40
25 files changed, 818 insertions, 361 deletions
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 51daac5c4feb..136c2efe3466 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -187,14 +187,14 @@ static int rxe_newlink(const char *ibdev_name, struct net_device *ndev)
exists = rxe_get_dev_from_net(ndev);
if (exists) {
ib_device_put(&exists->ib_dev);
- pr_err("already configured on %s\n", ndev->name);
+ rxe_dbg(exists, "already configured on %s\n", ndev->name);
err = -EEXIST;
goto err;
}
err = rxe_net_add(ibdev_name, ndev);
if (err) {
- pr_err("failed to add %s\n", ndev->name);
+ rxe_dbg(exists, "failed to add %s\n", ndev->name);
goto err;
}
err:
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index 30fbdf3bc76a..ab334900fcc3 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -38,6 +38,25 @@
#define RXE_ROCE_V2_SPORT (0xc000)
+#define rxe_dbg(rxe, fmt, ...) ibdev_dbg(&(rxe)->ib_dev, \
+ "%s: " fmt, __func__, ##__VA_ARGS__)
+#define rxe_dbg_uc(uc, fmt, ...) ibdev_dbg((uc)->ibuc.device, \
+ "uc#%d %s: " fmt, (uc)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_dbg_pd(pd, fmt, ...) ibdev_dbg((pd)->ibpd.device, \
+ "pd#%d %s: " fmt, (pd)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_dbg_ah(ah, fmt, ...) ibdev_dbg((ah)->ibah.device, \
+ "ah#%d %s: " fmt, (ah)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_dbg_srq(srq, fmt, ...) ibdev_dbg((srq)->ibsrq.device, \
+ "srq#%d %s: " fmt, (srq)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_dbg_qp(qp, fmt, ...) ibdev_dbg((qp)->ibqp.device, \
+ "qp#%d %s: " fmt, (qp)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_dbg_cq(cq, fmt, ...) ibdev_dbg((cq)->ibcq.device, \
+ "cq#%d %s: " fmt, (cq)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_dbg_mr(mr, fmt, ...) ibdev_dbg((mr)->ibmr.device, \
+ "mr#%d %s: " fmt, (mr)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_dbg_mw(mw, fmt, ...) ibdev_dbg((mw)->ibmw.device, \
+ "mw#%d %s: " fmt, (mw)->elem.index, __func__, ##__VA_ARGS__)
+
void rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu);
int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name);
diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c
index 3b05314ca739..889d7adbd455 100644
--- a/drivers/infiniband/sw/rxe/rxe_av.c
+++ b/drivers/infiniband/sw/rxe/rxe_av.c
@@ -14,26 +14,45 @@ void rxe_init_av(struct rdma_ah_attr *attr, struct rxe_av *av)
memcpy(av->dmac, attr->roce.dmac, ETH_ALEN);
}
-int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr)
+static int chk_attr(void *obj, struct rdma_ah_attr *attr, bool obj_is_ah)
{
const struct ib_global_route *grh = rdma_ah_read_grh(attr);
struct rxe_port *port;
+ struct rxe_dev *rxe;
+ struct rxe_qp *qp;
+ struct rxe_ah *ah;
int type;
+ if (obj_is_ah) {
+ ah = obj;
+ rxe = to_rdev(ah->ibah.device);
+ } else {
+ qp = obj;
+ rxe = to_rdev(qp->ibqp.device);
+ }
+
port = &rxe->port;
if (rdma_ah_get_ah_flags(attr) & IB_AH_GRH) {
if (grh->sgid_index > port->attr.gid_tbl_len) {
- pr_warn("invalid sgid index = %d\n",
- grh->sgid_index);
+ if (obj_is_ah)
+ rxe_dbg_ah(ah, "invalid sgid index = %d\n",
+ grh->sgid_index);
+ else
+ rxe_dbg_qp(qp, "invalid sgid index = %d\n",
+ grh->sgid_index);
return -EINVAL;
}
type = rdma_gid_attr_network_type(grh->sgid_attr);
if (type < RDMA_NETWORK_IPV4 ||
type > RDMA_NETWORK_IPV6) {
- pr_warn("invalid network type for rdma_rxe = %d\n",
- type);
+ if (obj_is_ah)
+ rxe_dbg_ah(ah, "invalid network type for rdma_rxe = %d\n",
+ type);
+ else
+ rxe_dbg_qp(qp, "invalid network type for rdma_rxe = %d\n",
+ type);
return -EINVAL;
}
}
@@ -41,6 +60,16 @@ int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr)
return 0;
}
+int rxe_av_chk_attr(struct rxe_qp *qp, struct rdma_ah_attr *attr)
+{
+ return chk_attr(qp, attr, false);
+}
+
+int rxe_ah_chk_attr(struct rxe_ah *ah, struct rdma_ah_attr *attr)
+{
+ return chk_attr(ah, attr, true);
+}
+
void rxe_av_from_attr(u8 port_num, struct rxe_av *av,
struct rdma_ah_attr *attr)
{
@@ -121,12 +150,12 @@ struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp)
/* only new user provider or kernel client */
ah = rxe_pool_get_index(&pkt->rxe->ah_pool, ah_num);
if (!ah) {
- pr_warn("Unable to find AH matching ah_num\n");
+ rxe_dbg_qp(pkt->qp, "Unable to find AH matching ah_num\n");
return NULL;
}
if (rxe_ah_pd(ah) != pkt->qp->pd) {
- pr_warn("PDs don't match for AH and QP\n");
+ rxe_dbg_qp(pkt->qp, "PDs don't match for AH and QP\n");
rxe_put(ah);
return NULL;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index fb0c008af78c..20737fec392b 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -104,6 +104,8 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode)
case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV;
case IB_WR_REG_MR: return IB_WC_REG_MR;
case IB_WR_BIND_MW: return IB_WC_BIND_MW;
+ case IB_WR_ATOMIC_WRITE: return IB_WC_ATOMIC_WRITE;
+ case IB_WR_FLUSH: return IB_WC_FLUSH;
default:
return 0xff;
@@ -114,11 +116,11 @@ void retransmit_timer(struct timer_list *t)
{
struct rxe_qp *qp = from_timer(qp, t, retrans_timer);
- pr_debug("%s: fired for qp#%d\n", __func__, qp->elem.index);
+ rxe_dbg_qp(qp, "retransmit timer fired\n");
if (qp->valid) {
qp->comp.timeout = 1;
- rxe_run_task(&qp->comp.task, 1);
+ rxe_sched_task(&qp->comp.task);
}
}
@@ -132,7 +134,10 @@ void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
if (must_sched != 0)
rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_COMPLETER_SCHED);
- rxe_run_task(&qp->comp.task, must_sched);
+ if (must_sched)
+ rxe_sched_task(&qp->comp.task);
+ else
+ rxe_run_task(&qp->comp.task);
}
static inline enum comp_state get_wqe(struct rxe_qp *qp,
@@ -200,6 +205,10 @@ static inline enum comp_state check_psn(struct rxe_qp *qp,
*/
if (pkt->psn == wqe->last_psn)
return COMPST_COMP_ACK;
+ else if (pkt->opcode == IB_OPCODE_RC_ACKNOWLEDGE &&
+ (qp->comp.opcode == IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST ||
+ qp->comp.opcode == IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE))
+ return COMPST_CHECK_ACK;
else
return COMPST_DONE;
} else if ((diff > 0) && (wqe->mask & WR_ATOMIC_OR_READ_MASK)) {
@@ -228,6 +237,10 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST:
case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE:
+ /* Check NAK code to handle a remote error */
+ if (pkt->opcode == IB_OPCODE_RC_ACKNOWLEDGE)
+ break;
+
if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE &&
pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) {
/* read retries of partial data may restart from
@@ -258,12 +271,16 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
if ((syn & AETH_TYPE_MASK) != AETH_ACK)
return COMPST_ERROR;
+ if (wqe->wr.opcode == IB_WR_ATOMIC_WRITE)
+ return COMPST_WRITE_SEND;
+
fallthrough;
/* (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE doesn't have an AETH)
*/
case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE:
if (wqe->wr.opcode != IB_WR_RDMA_READ &&
- wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV) {
+ wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV &&
+ wqe->wr.opcode != IB_WR_FLUSH) {
wqe->status = IB_WC_FATAL_ERR;
return COMPST_ERROR;
}
@@ -305,7 +322,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
qp->comp.psn = pkt->psn;
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
- rxe_run_task(&qp->req.task, 0);
+ rxe_run_task(&qp->req.task);
}
}
return COMPST_ERROR_RETRY;
@@ -323,7 +340,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
return COMPST_ERROR;
default:
- pr_warn("unexpected nak %x\n", syn);
+ rxe_dbg_qp(qp, "unexpected nak %x\n", syn);
wqe->status = IB_WC_REM_OP_ERR;
return COMPST_ERROR;
}
@@ -334,7 +351,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
break;
default:
- pr_warn("unexpected opcode\n");
+ rxe_dbg_qp(qp, "unexpected opcode\n");
}
return COMPST_ERROR;
@@ -452,7 +469,7 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
*/
if (qp->req.wait_fence) {
qp->req.wait_fence = 0;
- rxe_run_task(&qp->req.task, 0);
+ rxe_run_task(&qp->req.task);
}
}
@@ -466,7 +483,7 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp,
if (qp->req.need_rd_atomic) {
qp->comp.timeout_retry = 0;
qp->req.need_rd_atomic = 0;
- rxe_run_task(&qp->req.task, 0);
+ rxe_run_task(&qp->req.task);
}
}
@@ -512,7 +529,7 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp,
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
- rxe_run_task(&qp->req.task, 1);
+ rxe_sched_task(&qp->req.task);
}
}
@@ -587,8 +604,7 @@ int rxe_completer(void *arg)
state = COMPST_GET_ACK;
while (1) {
- pr_debug("qp#%d state = %s\n", qp_num(qp),
- comp_state_name[state]);
+ rxe_dbg_qp(qp, "state = %s\n", comp_state_name[state]);
switch (state) {
case COMPST_GET_ACK:
skb = skb_dequeue(&qp->resp_pkts);
@@ -646,7 +662,7 @@ int rxe_completer(void *arg)
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
- rxe_run_task(&qp->req.task, 1);
+ rxe_sched_task(&qp->req.task);
}
state = COMPST_DONE;
@@ -714,7 +730,7 @@ int rxe_completer(void *arg)
RXE_CNT_COMP_RETRY);
qp->req.need_retry = 1;
qp->comp.started_retry = 1;
- rxe_run_task(&qp->req.task, 0);
+ rxe_run_task(&qp->req.task);
}
goto done;
@@ -735,8 +751,7 @@ int rxe_completer(void *arg)
* rnr timer has fired
*/
qp->req.wait_for_rnr_timer = 1;
- pr_debug("qp#%d set rnr nak timer\n",
- qp_num(qp));
+ rxe_dbg_qp(qp, "set rnr nak timer\n");
mod_timer(&qp->rnr_nak_timer,
jiffies + rnrnak_jiffies(aeth_syn(pkt)
& ~AETH_TYPE_MASK));
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index b1a0ab3cd4bd..1df186534639 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -14,12 +14,12 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
int count;
if (cqe <= 0) {
- pr_warn("cqe(%d) <= 0\n", cqe);
+ rxe_dbg(rxe, "cqe(%d) <= 0\n", cqe);
goto err1;
}
if (cqe > rxe->attr.max_cqe) {
- pr_debug("cqe(%d) > max_cqe(%d)\n",
+ rxe_dbg(rxe, "cqe(%d) > max_cqe(%d)\n",
cqe, rxe->attr.max_cqe);
goto err1;
}
@@ -27,7 +27,7 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
if (cq) {
count = queue_count(cq->queue, QUEUE_TYPE_TO_CLIENT);
if (cqe < count) {
- pr_debug("cqe(%d) < current # elements in queue (%d)",
+ rxe_dbg_cq(cq, "cqe(%d) < current # elements in queue (%d)",
cqe, count);
goto err1;
}
@@ -65,7 +65,7 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
cq->queue = rxe_queue_init(rxe, &cqe,
sizeof(struct rxe_cqe), type);
if (!cq->queue) {
- pr_warn("unable to create cq\n");
+ rxe_dbg(rxe, "unable to create cq\n");
return -ENOMEM;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h
index e432f9e37795..46f82b27fcd2 100644
--- a/drivers/infiniband/sw/rxe/rxe_hdr.h
+++ b/drivers/infiniband/sw/rxe/rxe_hdr.h
@@ -608,6 +608,52 @@ static inline void reth_set_len(struct rxe_pkt_info *pkt, u32 len)
}
/******************************************************************************
+ * FLUSH Extended Transport Header
+ ******************************************************************************/
+
+struct rxe_feth {
+ __be32 bits;
+};
+
+#define FETH_PLT_MASK (0x0000000f) /* bits 3-0 */
+#define FETH_SEL_MASK (0x00000030) /* bits 5-4 */
+#define FETH_SEL_SHIFT (4U)
+
+static inline u32 __feth_plt(void *arg)
+{
+ struct rxe_feth *feth = arg;
+
+ return be32_to_cpu(feth->bits) & FETH_PLT_MASK;
+}
+
+static inline u32 __feth_sel(void *arg)
+{
+ struct rxe_feth *feth = arg;
+
+ return (be32_to_cpu(feth->bits) & FETH_SEL_MASK) >> FETH_SEL_SHIFT;
+}
+
+static inline u32 feth_plt(struct rxe_pkt_info *pkt)
+{
+ return __feth_plt(pkt->hdr + rxe_opcode[pkt->opcode].offset[RXE_FETH]);
+}
+
+static inline u32 feth_sel(struct rxe_pkt_info *pkt)
+{
+ return __feth_sel(pkt->hdr + rxe_opcode[pkt->opcode].offset[RXE_FETH]);
+}
+
+static inline void feth_init(struct rxe_pkt_info *pkt, u8 type, u8 level)
+{
+ struct rxe_feth *feth = (struct rxe_feth *)
+ (pkt->hdr + rxe_opcode[pkt->opcode].offset[RXE_FETH]);
+ u32 bits = ((level << FETH_SEL_SHIFT) & FETH_SEL_MASK) |
+ (type & FETH_PLT_MASK);
+
+ feth->bits = cpu_to_be32(bits);
+}
+
+/******************************************************************************
* Atomic Extended Transport Header
******************************************************************************/
struct rxe_atmeth {
@@ -742,7 +788,6 @@ enum aeth_syndrome {
AETH_NAK_INVALID_REQ = 0x61,
AETH_NAK_REM_ACC_ERR = 0x62,
AETH_NAK_REM_OP_ERR = 0x63,
- AETH_NAK_INV_RD_REQ = 0x64,
};
static inline u8 __aeth_syn(void *arg)
@@ -910,6 +955,7 @@ enum rxe_hdr_length {
RXE_ATMETH_BYTES = sizeof(struct rxe_atmeth),
RXE_IETH_BYTES = sizeof(struct rxe_ieth),
RXE_RDETH_BYTES = sizeof(struct rxe_rdeth),
+ RXE_FETH_BYTES = sizeof(struct rxe_feth),
};
static inline size_t header_size(struct rxe_pkt_info *pkt)
diff --git a/drivers/infiniband/sw/rxe/rxe_icrc.c b/drivers/infiniband/sw/rxe/rxe_icrc.c
index 46bb07c5c4df..71bc2c189588 100644
--- a/drivers/infiniband/sw/rxe/rxe_icrc.c
+++ b/drivers/infiniband/sw/rxe/rxe_icrc.c
@@ -21,7 +21,7 @@ int rxe_icrc_init(struct rxe_dev *rxe)
tfm = crypto_alloc_shash("crc32", 0, 0);
if (IS_ERR(tfm)) {
- pr_warn("failed to init crc32 algorithm err:%ld\n",
+ rxe_dbg(rxe, "failed to init crc32 algorithm err: %ld\n",
PTR_ERR(tfm));
return PTR_ERR(tfm);
}
@@ -51,7 +51,7 @@ static __be32 rxe_crc32(struct rxe_dev *rxe, __be32 crc, void *next, size_t len)
*(__be32 *)shash_desc_ctx(shash) = crc;
err = crypto_shash_update(shash, next, len);
if (unlikely(err)) {
- pr_warn_ratelimited("failed crc calculation, err: %d\n", err);
+ rxe_dbg(rxe, "failed crc calculation, err: %d\n", err);
return (__force __be32)crc32_le((__force u32)crc, next, len);
}
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index c2a5c8814a48..948ce4902b10 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -9,16 +9,12 @@
/* rxe_av.c */
void rxe_init_av(struct rdma_ah_attr *attr, struct rxe_av *av);
-
-int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr);
-
+int rxe_av_chk_attr(struct rxe_qp *qp, struct rdma_ah_attr *attr);
+int rxe_ah_chk_attr(struct rxe_ah *ah, struct rdma_ah_attr *attr);
void rxe_av_from_attr(u8 port_num, struct rxe_av *av,
struct rdma_ah_attr *attr);
-
void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr);
-
void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr);
-
struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp);
/* rxe_cq.c */
@@ -68,6 +64,7 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr);
int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
int access, struct rxe_mr *mr);
int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr);
+int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, int length);
int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
enum rxe_mr_copy_dir dir);
int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma,
diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c
index 9149b6095429..a47d72dbc537 100644
--- a/drivers/infiniband/sw/rxe/rxe_mmap.c
+++ b/drivers/infiniband/sw/rxe/rxe_mmap.c
@@ -79,7 +79,7 @@ int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
/* Don't allow a mmap larger than the object. */
if (size > ip->info.size) {
- pr_err("mmap region is larger than the object!\n");
+ rxe_dbg(rxe, "mmap region is larger than the object!\n");
spin_unlock_bh(&rxe->pending_lock);
ret = -EINVAL;
goto done;
@@ -87,7 +87,7 @@ int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
goto found_it;
}
- pr_warn("unable to find pending mmap info\n");
+ rxe_dbg(rxe, "unable to find pending mmap info\n");
spin_unlock_bh(&rxe->pending_lock);
ret = -EINVAL;
goto done;
@@ -98,7 +98,7 @@ found_it:
ret = remap_vmalloc_range(vma, ip->obj, 0);
if (ret) {
- pr_err("err %d from remap_vmalloc_range\n", ret);
+ rxe_dbg(rxe, "err %d from remap_vmalloc_range\n", ret);
goto done;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 502e9ada99b3..072eac4b65d2 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -4,6 +4,8 @@
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*/
+#include <linux/libnvdimm.h>
+
#include "rxe.h"
#include "rxe_loc.h"
@@ -26,7 +28,7 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
{
- switch (mr->type) {
+ switch (mr->ibmr.type) {
case IB_MR_TYPE_DMA:
return 0;
@@ -38,8 +40,7 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
return 0;
default:
- pr_warn("%s: mr type (%d) not supported\n",
- __func__, mr->type);
+ rxe_dbg_mr(mr, "type (%d) not supported\n", mr->ibmr.type);
return -EFAULT;
}
}
@@ -62,7 +63,6 @@ static void rxe_mr_init(int access, struct rxe_mr *mr)
mr->rkey = mr->ibmr.rkey = rkey;
mr->state = RXE_MR_STATE_INVALID;
- mr->map_shift = ilog2(RXE_BUF_PER_MAP);
}
static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
@@ -99,6 +99,7 @@ err2:
kfree(mr->map[i]);
kfree(mr->map);
+ mr->map = NULL;
err1:
return -ENOMEM;
}
@@ -109,7 +110,16 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr)
mr->access = access;
mr->state = RXE_MR_STATE_VALID;
- mr->type = IB_MR_TYPE_DMA;
+ mr->ibmr.type = IB_MR_TYPE_DMA;
+}
+
+static bool is_pmem_page(struct page *pg)
+{
+ unsigned long paddr = page_to_phys(pg);
+
+ return REGION_INTERSECTS ==
+ region_intersects(paddr, PAGE_SIZE, IORESOURCE_MEM,
+ IORES_DESC_PERSISTENT_MEMORY);
}
int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
@@ -122,12 +132,11 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
int num_buf;
void *vaddr;
int err;
- int i;
umem = ib_umem_get(&rxe->ib_dev, start, length, access);
if (IS_ERR(umem)) {
- pr_warn("%s: Unable to pin memory region err = %d\n",
- __func__, (int)PTR_ERR(umem));
+ rxe_dbg_mr(mr, "Unable to pin memory region err = %d\n",
+ (int)PTR_ERR(umem));
err = PTR_ERR(umem);
goto err_out;
}
@@ -138,8 +147,7 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
err = rxe_mr_alloc(mr, num_buf);
if (err) {
- pr_warn("%s: Unable to allocate memory for map\n",
- __func__);
+ rxe_dbg_mr(mr, "Unable to allocate memory for map\n");
goto err_release_umem;
}
@@ -149,23 +157,30 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
num_buf = 0;
map = mr->map;
if (length > 0) {
- buf = map[0]->buf;
+ bool persistent_access = access & IB_ACCESS_FLUSH_PERSISTENT;
+ buf = map[0]->buf;
for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
+ struct page *pg = sg_page_iter_page(&sg_iter);
+
+ if (persistent_access && !is_pmem_page(pg)) {
+ rxe_dbg_mr(mr, "Unable to register persistent access to non-pmem device\n");
+ err = -EINVAL;
+ goto err_release_umem;
+ }
+
if (num_buf >= RXE_BUF_PER_MAP) {
map++;
buf = map[0]->buf;
num_buf = 0;
}
- vaddr = page_address(sg_page_iter_page(&sg_iter));
+ vaddr = page_address(pg);
if (!vaddr) {
- pr_warn("%s: Unable to get virtual address\n",
- __func__);
+ rxe_dbg_mr(mr, "Unable to get virtual address\n");
err = -ENOMEM;
- goto err_cleanup_map;
+ goto err_release_umem;
}
-
buf->addr = (uintptr_t)vaddr;
buf->size = PAGE_SIZE;
num_buf++;
@@ -178,14 +193,11 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
mr->access = access;
mr->offset = ib_umem_offset(umem);
mr->state = RXE_MR_STATE_VALID;
- mr->type = IB_MR_TYPE_USER;
+ mr->ibmr.type = IB_MR_TYPE_USER;
+ mr->ibmr.page_size = PAGE_SIZE;
return 0;
-err_cleanup_map:
- for (i = 0; i < mr->num_map; i++)
- kfree(mr->map[i]);
- kfree(mr->map);
err_release_umem:
ib_umem_release(umem);
err_out:
@@ -205,7 +217,7 @@ int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr)
mr->max_buf = max_pages;
mr->state = RXE_MR_STATE_FREE;
- mr->type = IB_MR_TYPE_MEM_REG;
+ mr->ibmr.type = IB_MR_TYPE_MEM_REG;
return 0;
@@ -256,7 +268,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
void *addr;
if (mr->state != RXE_MR_STATE_VALID) {
- pr_warn("mr not in valid state\n");
+ rxe_dbg_mr(mr, "Not in valid state\n");
addr = NULL;
goto out;
}
@@ -267,7 +279,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
}
if (mr_check_range(mr, iova, length)) {
- pr_warn("range violation\n");
+ rxe_dbg_mr(mr, "Range violation\n");
addr = NULL;
goto out;
}
@@ -275,7 +287,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
lookup_iova(mr, iova, &m, &n, &offset);
if (offset + length > mr->map[m]->buf[n].size) {
- pr_warn("crosses page boundary\n");
+ rxe_dbg_mr(mr, "Crosses page boundary\n");
addr = NULL;
goto out;
}
@@ -286,6 +298,39 @@ out:
return addr;
}
+int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, int length)
+{
+ size_t offset;
+
+ if (length == 0)
+ return 0;
+
+ if (mr->ibmr.type == IB_MR_TYPE_DMA)
+ return -EFAULT;
+
+ offset = (iova - mr->ibmr.iova + mr->offset) & mr->page_mask;
+ while (length > 0) {
+ u8 *va;
+ int bytes;
+
+ bytes = mr->ibmr.page_size - offset;
+ if (bytes > length)
+ bytes = length;
+
+ va = iova_to_vaddr(mr, iova, length);
+ if (!va)
+ return -EFAULT;
+
+ arch_wb_cache_pmem(va, bytes);
+
+ length -= bytes;
+ iova += bytes;
+ offset = 0;
+ }
+
+ return 0;
+}
+
/* copy data from a range (vaddr, vaddr+length-1) to or from
* a mr object starting at iova.
*/
@@ -304,7 +349,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
if (length == 0)
return 0;
- if (mr->type == IB_MR_TYPE_DMA) {
+ if (mr->ibmr.type == IB_MR_TYPE_DMA) {
u8 *src, *dest;
src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova);
@@ -511,7 +556,7 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) ||
(type == RXE_LOOKUP_REMOTE && mr->rkey != key) ||
- mr_pd(mr) != pd || (access && !(access & mr->access)) ||
+ mr_pd(mr) != pd || ((access & mr->access) != access) ||
mr->state != RXE_MR_STATE_VALID)) {
rxe_put(mr);
mr = NULL;
@@ -528,27 +573,26 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 key)
mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8);
if (!mr) {
- pr_err("%s: No MR for key %#x\n", __func__, key);
+ rxe_dbg_qp(qp, "No MR for key %#x\n", key);
ret = -EINVAL;
goto err;
}
if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) {
- pr_err("%s: wr key (%#x) doesn't match mr key (%#x)\n",
- __func__, key, (mr->rkey ? mr->rkey : mr->lkey));
+ rxe_dbg_mr(mr, "wr key (%#x) doesn't match mr key (%#x)\n",
+ key, (mr->rkey ? mr->rkey : mr->lkey));
ret = -EINVAL;
goto err_drop_ref;
}
if (atomic_read(&mr->num_mw) > 0) {
- pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n",
- __func__);
+ rxe_dbg_mr(mr, "Attempt to invalidate an MR while bound to MWs\n");
ret = -EINVAL;
goto err_drop_ref;
}
- if (unlikely(mr->type != IB_MR_TYPE_MEM_REG)) {
- pr_warn("%s: mr->type (%d) is wrong type\n", __func__, mr->type);
+ if (unlikely(mr->ibmr.type != IB_MR_TYPE_MEM_REG)) {
+ rxe_dbg_mr(mr, "Type (%d) is wrong\n", mr->ibmr.type);
ret = -EINVAL;
goto err_drop_ref;
}
@@ -577,22 +621,20 @@ int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
/* user can only register MR in free state */
if (unlikely(mr->state != RXE_MR_STATE_FREE)) {
- pr_warn("%s: mr->lkey = 0x%x not free\n",
- __func__, mr->lkey);
+ rxe_dbg_mr(mr, "mr->lkey = 0x%x not free\n", mr->lkey);
return -EINVAL;
}
/* user can only register mr with qp in same protection domain */
if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) {
- pr_warn("%s: qp->pd and mr->pd don't match\n",
- __func__);
+ rxe_dbg_mr(mr, "qp->pd and mr->pd don't match\n");
return -EINVAL;
}
/* user is only allowed to change key portion of l/rkey */
if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) {
- pr_warn("%s: key = 0x%x has wrong index mr->lkey = 0x%x\n",
- __func__, key, mr->lkey);
+ rxe_dbg_mr(mr, "key = 0x%x has wrong index mr->lkey = 0x%x\n",
+ key, mr->lkey);
return -EINVAL;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c
index 902b7df7aaed..afa5ce1a7116 100644
--- a/drivers/infiniband/sw/rxe/rxe_mw.c
+++ b/drivers/infiniband/sw/rxe/rxe_mw.c
@@ -52,14 +52,14 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
{
if (mw->ibmw.type == IB_MW_TYPE_1) {
if (unlikely(mw->state != RXE_MW_STATE_VALID)) {
- pr_err_once(
+ rxe_dbg_mw(mw,
"attempt to bind a type 1 MW not in the valid state\n");
return -EINVAL;
}
/* o10-36.2.2 */
if (unlikely((mw->access & IB_ZERO_BASED))) {
- pr_err_once("attempt to bind a zero based type 1 MW\n");
+ rxe_dbg_mw(mw, "attempt to bind a zero based type 1 MW\n");
return -EINVAL;
}
}
@@ -67,21 +67,21 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
if (mw->ibmw.type == IB_MW_TYPE_2) {
/* o10-37.2.30 */
if (unlikely(mw->state != RXE_MW_STATE_FREE)) {
- pr_err_once(
+ rxe_dbg_mw(mw,
"attempt to bind a type 2 MW not in the free state\n");
return -EINVAL;
}
/* C10-72 */
if (unlikely(qp->pd != to_rpd(mw->ibmw.pd))) {
- pr_err_once(
+ rxe_dbg_mw(mw,
"attempt to bind type 2 MW with qp with different PD\n");
return -EINVAL;
}
/* o10-37.2.40 */
if (unlikely(!mr || wqe->wr.wr.mw.length == 0)) {
- pr_err_once(
+ rxe_dbg_mw(mw,
"attempt to invalidate type 2 MW by binding with NULL or zero length MR\n");
return -EINVAL;
}
@@ -92,13 +92,13 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
return 0;
if (unlikely(mr->access & IB_ZERO_BASED)) {
- pr_err_once("attempt to bind MW to zero based MR\n");
+ rxe_dbg_mw(mw, "attempt to bind MW to zero based MR\n");
return -EINVAL;
}
/* C10-73 */
if (unlikely(!(mr->access & IB_ACCESS_MW_BIND))) {
- pr_err_once(
+ rxe_dbg_mw(mw,
"attempt to bind an MW to an MR without bind access\n");
return -EINVAL;
}
@@ -107,7 +107,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
if (unlikely((mw->access &
(IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC)) &&
!(mr->access & IB_ACCESS_LOCAL_WRITE))) {
- pr_err_once(
+ rxe_dbg_mw(mw,
"attempt to bind an Writable MW to an MR without local write access\n");
return -EINVAL;
}
@@ -115,7 +115,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
/* C10-75 */
if (mw->access & IB_ZERO_BASED) {
if (unlikely(wqe->wr.wr.mw.length > mr->ibmr.length)) {
- pr_err_once(
+ rxe_dbg_mw(mw,
"attempt to bind a ZB MW outside of the MR\n");
return -EINVAL;
}
@@ -123,7 +123,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
if (unlikely((wqe->wr.wr.mw.addr < mr->ibmr.iova) ||
((wqe->wr.wr.mw.addr + wqe->wr.wr.mw.length) >
(mr->ibmr.iova + mr->ibmr.length)))) {
- pr_err_once(
+ rxe_dbg_mw(mw,
"attempt to bind a VA MW outside of the MR\n");
return -EINVAL;
}
@@ -293,8 +293,7 @@ struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey)
if (unlikely((mw->rkey != rkey) || rxe_mw_pd(mw) != pd ||
(mw->ibmw.type == IB_MW_TYPE_2 && mw->qp != qp) ||
- (mw->length == 0) ||
- (access && !(access & mw->access)) ||
+ (mw->length == 0) || ((access & mw->access) != access) ||
mw->state != RXE_MW_STATE_VALID)) {
rxe_put(mw);
return NULL;
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 35f327b9d4b8..e02e1624bcf4 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -20,9 +20,10 @@
static struct rxe_recv_sockets recv_sockets;
-static struct dst_entry *rxe_find_route4(struct net_device *ndev,
- struct in_addr *saddr,
- struct in_addr *daddr)
+static struct dst_entry *rxe_find_route4(struct rxe_qp *qp,
+ struct net_device *ndev,
+ struct in_addr *saddr,
+ struct in_addr *daddr)
{
struct rtable *rt;
struct flowi4 fl = { { 0 } };
@@ -35,7 +36,7 @@ static struct dst_entry *rxe_find_route4(struct net_device *ndev,
rt = ip_route_output_key(&init_net, &fl);
if (IS_ERR(rt)) {
- pr_err_ratelimited("no route to %pI4\n", &daddr->s_addr);
+ rxe_dbg_qp(qp, "no route to %pI4\n", &daddr->s_addr);
return NULL;
}
@@ -43,7 +44,8 @@ static struct dst_entry *rxe_find_route4(struct net_device *ndev,
}
#if IS_ENABLED(CONFIG_IPV6)
-static struct dst_entry *rxe_find_route6(struct net_device *ndev,
+static struct dst_entry *rxe_find_route6(struct rxe_qp *qp,
+ struct net_device *ndev,
struct in6_addr *saddr,
struct in6_addr *daddr)
{
@@ -60,12 +62,12 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev,
recv_sockets.sk6->sk, &fl6,
NULL);
if (IS_ERR(ndst)) {
- pr_err_ratelimited("no route to %pI6\n", daddr);
+ rxe_dbg_qp(qp, "no route to %pI6\n", daddr);
return NULL;
}
if (unlikely(ndst->error)) {
- pr_err("no route to %pI6\n", daddr);
+ rxe_dbg_qp(qp, "no route to %pI6\n", daddr);
goto put;
}
@@ -77,7 +79,8 @@ put:
#else
-static struct dst_entry *rxe_find_route6(struct net_device *ndev,
+static struct dst_entry *rxe_find_route6(struct rxe_qp *qp,
+ struct net_device *ndev,
struct in6_addr *saddr,
struct in6_addr *daddr)
{
@@ -105,14 +108,14 @@ static struct dst_entry *rxe_find_route(struct net_device *ndev,
saddr = &av->sgid_addr._sockaddr_in.sin_addr;
daddr = &av->dgid_addr._sockaddr_in.sin_addr;
- dst = rxe_find_route4(ndev, saddr, daddr);
+ dst = rxe_find_route4(qp, ndev, saddr, daddr);
} else if (av->network_type == RXE_NETWORK_TYPE_IPV6) {
struct in6_addr *saddr6;
struct in6_addr *daddr6;
saddr6 = &av->sgid_addr._sockaddr_in6.sin6_addr;
daddr6 = &av->dgid_addr._sockaddr_in6.sin6_addr;
- dst = rxe_find_route6(ndev, saddr6, daddr6);
+ dst = rxe_find_route6(qp, ndev, saddr6, daddr6);
#if IS_ENABLED(CONFIG_IPV6)
if (dst)
qp->dst_cookie =
@@ -282,7 +285,7 @@ static int prepare4(struct rxe_av *av, struct rxe_pkt_info *pkt,
dst = rxe_find_route(skb->dev, qp, av);
if (!dst) {
- pr_err("Host not reachable\n");
+ rxe_dbg_qp(qp, "Host not reachable\n");
return -EHOSTUNREACH;
}
@@ -306,7 +309,7 @@ static int prepare6(struct rxe_av *av, struct rxe_pkt_info *pkt,
dst = rxe_find_route(skb->dev, qp, av);
if (!dst) {
- pr_err("Host not reachable\n");
+ rxe_dbg_qp(qp, "Host not reachable\n");
return -EHOSTUNREACH;
}
@@ -345,7 +348,7 @@ static void rxe_skb_tx_dtor(struct sk_buff *skb)
if (unlikely(qp->need_req_skb &&
skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW))
- rxe_run_task(&qp->req.task, 1);
+ rxe_sched_task(&qp->req.task);
rxe_put(qp);
}
@@ -365,7 +368,8 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt)
} else if (skb->protocol == htons(ETH_P_IPV6)) {
err = ip6_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
} else {
- pr_err("Unknown layer 3 protocol: %d\n", skb->protocol);
+ rxe_dbg_qp(pkt->qp, "Unknown layer 3 protocol: %d\n",
+ skb->protocol);
atomic_dec(&pkt->qp->skb_out);
rxe_put(pkt->qp);
kfree_skb(skb);
@@ -373,7 +377,7 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt)
}
if (unlikely(net_xmit_eval(err))) {
- pr_debug("error sending packet: %d\n", err);
+ rxe_dbg_qp(pkt->qp, "error sending packet: %d\n", err);
return -EAGAIN;
}
@@ -411,7 +415,7 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
if ((is_request && (qp->req.state != QP_STATE_READY)) ||
(!is_request && (qp->resp.state != QP_STATE_READY))) {
- pr_info("Packet dropped. QP is not in ready state\n");
+ rxe_dbg_qp(qp, "Packet dropped. QP is not in ready state\n");
goto drop;
}
@@ -429,7 +433,7 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
if ((qp_type(qp) != IB_QPT_RC) &&
(pkt->mask & RXE_END_MASK)) {
pkt->wqe->state = wqe_state_done;
- rxe_run_task(&qp->comp.task, 1);
+ rxe_sched_task(&qp->comp.task);
}
rxe_counter_inc(rxe, RXE_CNT_SENT_PKTS);
@@ -592,7 +596,7 @@ static int rxe_notify(struct notifier_block *not_blk,
rxe_port_down(rxe);
break;
case NETDEV_CHANGEMTU:
- pr_info("%s changed mtu to %d\n", ndev->name, ndev->mtu);
+ rxe_dbg(rxe, "%s changed mtu to %d\n", ndev->name, ndev->mtu);
rxe_set_mtu(rxe, ndev->mtu);
break;
case NETDEV_CHANGE:
@@ -604,7 +608,7 @@ static int rxe_notify(struct notifier_block *not_blk,
case NETDEV_CHANGENAME:
case NETDEV_FEAT_CHANGE:
default:
- pr_info("ignoring netdev event = %ld for %s\n",
+ rxe_dbg(rxe, "ignoring netdev event = %ld for %s\n",
event, ndev->name);
break;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c
index d4ba4d506f17..5c0d5c6ffda4 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.c
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.c
@@ -101,6 +101,18 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = {
[IB_QPT_UC] = WR_LOCAL_OP_MASK,
},
},
+ [IB_WR_FLUSH] = {
+ .name = "IB_WR_FLUSH",
+ .mask = {
+ [IB_QPT_RC] = WR_FLUSH_MASK,
+ },
+ },
+ [IB_WR_ATOMIC_WRITE] = {
+ .name = "IB_WR_ATOMIC_WRITE",
+ .mask = {
+ [IB_QPT_RC] = WR_ATOMIC_WRITE_MASK,
+ },
+ },
};
struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
@@ -378,6 +390,29 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
RXE_IETH_BYTES,
}
},
+ [IB_OPCODE_RC_FLUSH] = {
+ .name = "IB_OPCODE_RC_FLUSH",
+ .mask = RXE_FETH_MASK | RXE_RETH_MASK | RXE_FLUSH_MASK |
+ RXE_START_MASK | RXE_END_MASK | RXE_REQ_MASK,
+ .length = RXE_BTH_BYTES + RXE_FETH_BYTES + RXE_RETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_FETH] = RXE_BTH_BYTES,
+ [RXE_RETH] = RXE_BTH_BYTES + RXE_FETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RC_ATOMIC_WRITE] = {
+ .name = "IB_OPCODE_RC_ATOMIC_WRITE",
+ .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK |
+ RXE_ATOMIC_WRITE_MASK | RXE_START_MASK |
+ RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_RETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RETH] = RXE_BTH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES + RXE_RETH_BYTES,
+ }
+ },
/* UC */
[IB_OPCODE_UC_SEND_FIRST] = {
diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h
index 8f9aaaf260f2..cea4e0a63919 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.h
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.h
@@ -20,6 +20,8 @@ enum rxe_wr_mask {
WR_READ_MASK = BIT(3),
WR_WRITE_MASK = BIT(4),
WR_LOCAL_OP_MASK = BIT(5),
+ WR_FLUSH_MASK = BIT(6),
+ WR_ATOMIC_WRITE_MASK = BIT(7),
WR_READ_OR_WRITE_MASK = WR_READ_MASK | WR_WRITE_MASK,
WR_WRITE_OR_SEND_MASK = WR_WRITE_MASK | WR_SEND_MASK,
@@ -47,6 +49,7 @@ enum rxe_hdr_type {
RXE_RDETH,
RXE_DETH,
RXE_IMMDT,
+ RXE_FETH,
RXE_PAYLOAD,
NUM_HDR_TYPES
};
@@ -63,6 +66,7 @@ enum rxe_hdr_mask {
RXE_IETH_MASK = BIT(RXE_IETH),
RXE_RDETH_MASK = BIT(RXE_RDETH),
RXE_DETH_MASK = BIT(RXE_DETH),
+ RXE_FETH_MASK = BIT(RXE_FETH),
RXE_PAYLOAD_MASK = BIT(RXE_PAYLOAD),
RXE_REQ_MASK = BIT(NUM_HDR_TYPES + 0),
@@ -71,16 +75,19 @@ enum rxe_hdr_mask {
RXE_WRITE_MASK = BIT(NUM_HDR_TYPES + 3),
RXE_READ_MASK = BIT(NUM_HDR_TYPES + 4),
RXE_ATOMIC_MASK = BIT(NUM_HDR_TYPES + 5),
+ RXE_FLUSH_MASK = BIT(NUM_HDR_TYPES + 6),
- RXE_RWR_MASK = BIT(NUM_HDR_TYPES + 6),
- RXE_COMP_MASK = BIT(NUM_HDR_TYPES + 7),
+ RXE_RWR_MASK = BIT(NUM_HDR_TYPES + 7),
+ RXE_COMP_MASK = BIT(NUM_HDR_TYPES + 8),
- RXE_START_MASK = BIT(NUM_HDR_TYPES + 8),
- RXE_MIDDLE_MASK = BIT(NUM_HDR_TYPES + 9),
- RXE_END_MASK = BIT(NUM_HDR_TYPES + 10),
+ RXE_START_MASK = BIT(NUM_HDR_TYPES + 9),
+ RXE_MIDDLE_MASK = BIT(NUM_HDR_TYPES + 10),
+ RXE_END_MASK = BIT(NUM_HDR_TYPES + 11),
RXE_LOOPBACK_MASK = BIT(NUM_HDR_TYPES + 12),
+ RXE_ATOMIC_WRITE_MASK = BIT(NUM_HDR_TYPES + 14),
+
RXE_READ_OR_ATOMIC_MASK = (RXE_READ_MASK | RXE_ATOMIC_MASK),
RXE_WRITE_OR_SEND_MASK = (RXE_WRITE_MASK | RXE_SEND_MASK),
RXE_READ_OR_WRITE_MASK = (RXE_READ_MASK | RXE_WRITE_MASK),
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index 86c7a8bf3cbb..a754fc902e3d 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -51,7 +51,14 @@ enum rxe_device_param {
| IB_DEVICE_SRQ_RESIZE
| IB_DEVICE_MEM_MGT_EXTENSIONS
| IB_DEVICE_MEM_WINDOW
+ | IB_DEVICE_FLUSH_GLOBAL
+ | IB_DEVICE_FLUSH_PERSISTENT
+#ifdef CONFIG_64BIT
+ | IB_DEVICE_MEM_WINDOW_TYPE_2B
+ | IB_DEVICE_ATOMIC_WRITE,
+#else
| IB_DEVICE_MEM_WINDOW_TYPE_2B,
+#endif /* CONFIG_64BIT */
RXE_MAX_SGE = 32,
RXE_MAX_WQE_SIZE = sizeof(struct rxe_send_wqe) +
sizeof(struct ib_sge) * RXE_MAX_SGE,
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index a62bab88415c..ab72db68b58f 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -19,33 +19,33 @@ static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap,
int has_srq)
{
if (cap->max_send_wr > rxe->attr.max_qp_wr) {
- pr_debug("invalid send wr = %u > %d\n",
+ rxe_dbg(rxe, "invalid send wr = %u > %d\n",
cap->max_send_wr, rxe->attr.max_qp_wr);
goto err1;
}
if (cap->max_send_sge > rxe->attr.max_send_sge) {
- pr_debug("invalid send sge = %u > %d\n",
+ rxe_dbg(rxe, "invalid send sge = %u > %d\n",
cap->max_send_sge, rxe->attr.max_send_sge);
goto err1;
}
if (!has_srq) {
if (cap->max_recv_wr > rxe->attr.max_qp_wr) {
- pr_debug("invalid recv wr = %u > %d\n",
+ rxe_dbg(rxe, "invalid recv wr = %u > %d\n",
cap->max_recv_wr, rxe->attr.max_qp_wr);
goto err1;
}
if (cap->max_recv_sge > rxe->attr.max_recv_sge) {
- pr_debug("invalid recv sge = %u > %d\n",
+ rxe_dbg(rxe, "invalid recv sge = %u > %d\n",
cap->max_recv_sge, rxe->attr.max_recv_sge);
goto err1;
}
}
if (cap->max_inline_data > rxe->max_inline_data) {
- pr_debug("invalid max inline data = %u > %d\n",
+ rxe_dbg(rxe, "invalid max inline data = %u > %d\n",
cap->max_inline_data, rxe->max_inline_data);
goto err1;
}
@@ -73,7 +73,7 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
}
if (!init->recv_cq || !init->send_cq) {
- pr_debug("missing cq\n");
+ rxe_dbg(rxe, "missing cq\n");
goto err1;
}
@@ -82,14 +82,14 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
if (init->qp_type == IB_QPT_GSI) {
if (!rdma_is_port_valid(&rxe->ib_dev, port_num)) {
- pr_debug("invalid port = %d\n", port_num);
+ rxe_dbg(rxe, "invalid port = %d\n", port_num);
goto err1;
}
port = &rxe->port;
if (init->qp_type == IB_QPT_GSI && port->qp_gsi_index) {
- pr_debug("GSI QP exists for port %d\n", port_num);
+ rxe_dbg(rxe, "GSI QP exists for port %d\n", port_num);
goto err1;
}
}
@@ -172,10 +172,6 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,
spin_lock_init(&qp->state_lock);
- spin_lock_init(&qp->req.task.state_lock);
- spin_lock_init(&qp->resp.task.state_lock);
- spin_lock_init(&qp->comp.task.state_lock);
-
spin_lock_init(&qp->sq.sq_lock);
spin_lock_init(&qp->rq.producer_lock);
spin_lock_init(&qp->rq.consumer_lock);
@@ -242,10 +238,8 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
skb_queue_head_init(&qp->req_pkts);
- rxe_init_task(&qp->req.task, qp,
- rxe_requester, "req");
- rxe_init_task(&qp->comp.task, qp,
- rxe_completer, "comp");
+ rxe_init_task(&qp->req.task, qp, rxe_requester);
+ rxe_init_task(&qp->comp.task, qp, rxe_completer);
qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */
if (init->qp_type == IB_QPT_RC) {
@@ -270,9 +264,6 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
wqe_size = rcv_wqe_size(qp->rq.max_sge);
- pr_debug("qp#%d max_wr = %d, max_sge = %d, wqe_size = %d\n",
- qp_num(qp), qp->rq.max_wr, qp->rq.max_sge, wqe_size);
-
type = QUEUE_TYPE_FROM_CLIENT;
qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr,
wqe_size, type);
@@ -292,8 +283,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
skb_queue_head_init(&qp->resp_pkts);
- rxe_init_task(&qp->resp.task, qp,
- rxe_responder, "resp");
+ rxe_init_task(&qp->resp.task, qp, rxe_responder);
qp->resp.opcode = OPCODE_NONE;
qp->resp.msn = 0;
@@ -402,7 +392,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
attr->qp_state : cur_state;
if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask)) {
- pr_debug("invalid mask or state for qp\n");
+ rxe_dbg_qp(qp, "invalid mask or state\n");
goto err1;
}
@@ -416,7 +406,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
if (mask & IB_QP_PORT) {
if (!rdma_is_port_valid(&rxe->ib_dev, attr->port_num)) {
- pr_debug("invalid port %d\n", attr->port_num);
+ rxe_dbg_qp(qp, "invalid port %d\n", attr->port_num);
goto err1;
}
}
@@ -424,18 +414,18 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
if (mask & IB_QP_CAP && rxe_qp_chk_cap(rxe, &attr->cap, !!qp->srq))
goto err1;
- if (mask & IB_QP_AV && rxe_av_chk_attr(rxe, &attr->ah_attr))
+ if (mask & IB_QP_AV && rxe_av_chk_attr(qp, &attr->ah_attr))
goto err1;
if (mask & IB_QP_ALT_PATH) {
- if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr))
+ if (rxe_av_chk_attr(qp, &attr->alt_ah_attr))
goto err1;
if (!rdma_is_port_valid(&rxe->ib_dev, attr->alt_port_num)) {
- pr_debug("invalid alt port %d\n", attr->alt_port_num);
+ rxe_dbg_qp(qp, "invalid alt port %d\n", attr->alt_port_num);
goto err1;
}
if (attr->alt_timeout > 31) {
- pr_debug("invalid QP alt timeout %d > 31\n",
+ rxe_dbg_qp(qp, "invalid alt timeout %d > 31\n",
attr->alt_timeout);
goto err1;
}
@@ -448,7 +438,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
enum ib_mtu mtu = attr->path_mtu;
if (mtu > max_mtu) {
- pr_debug("invalid mtu (%d) > (%d)\n",
+ rxe_dbg_qp(qp, "invalid mtu (%d) > (%d)\n",
ib_mtu_enum_to_int(mtu),
ib_mtu_enum_to_int(max_mtu));
goto err1;
@@ -457,7 +447,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
if (mask & IB_QP_MAX_QP_RD_ATOMIC) {
if (attr->max_rd_atomic > rxe->attr.max_qp_rd_atom) {
- pr_debug("invalid max_rd_atomic %d > %d\n",
+ rxe_dbg_qp(qp, "invalid max_rd_atomic %d > %d\n",
attr->max_rd_atomic,
rxe->attr.max_qp_rd_atom);
goto err1;
@@ -466,7 +456,8 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
if (mask & IB_QP_TIMEOUT) {
if (attr->timeout > 31) {
- pr_debug("invalid QP timeout %d > 31\n", attr->timeout);
+ rxe_dbg_qp(qp, "invalid timeout %d > 31\n",
+ attr->timeout);
goto err1;
}
}
@@ -543,10 +534,10 @@ static void rxe_qp_drain(struct rxe_qp *qp)
if (qp->req.state != QP_STATE_DRAINED) {
qp->req.state = QP_STATE_DRAIN;
if (qp_type(qp) == IB_QPT_RC)
- rxe_run_task(&qp->comp.task, 1);
+ rxe_sched_task(&qp->comp.task);
else
__rxe_do_task(&qp->comp.task);
- rxe_run_task(&qp->req.task, 1);
+ rxe_sched_task(&qp->req.task);
}
}
}
@@ -560,13 +551,13 @@ void rxe_qp_error(struct rxe_qp *qp)
qp->attr.qp_state = IB_QPS_ERR;
/* drain work and packet queues */
- rxe_run_task(&qp->resp.task, 1);
+ rxe_sched_task(&qp->resp.task);
if (qp_type(qp) == IB_QPT_RC)
- rxe_run_task(&qp->comp.task, 1);
+ rxe_sched_task(&qp->comp.task);
else
__rxe_do_task(&qp->comp.task);
- rxe_run_task(&qp->req.task, 1);
+ rxe_sched_task(&qp->req.task);
}
/* called by the modify qp verb */
@@ -644,27 +635,24 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
if (mask & IB_QP_RETRY_CNT) {
qp->attr.retry_cnt = attr->retry_cnt;
qp->comp.retry_cnt = attr->retry_cnt;
- pr_debug("qp#%d set retry count = %d\n", qp_num(qp),
- attr->retry_cnt);
+ rxe_dbg_qp(qp, "set retry count = %d\n", attr->retry_cnt);
}
if (mask & IB_QP_RNR_RETRY) {
qp->attr.rnr_retry = attr->rnr_retry;
qp->comp.rnr_retry = attr->rnr_retry;
- pr_debug("qp#%d set rnr retry count = %d\n", qp_num(qp),
- attr->rnr_retry);
+ rxe_dbg_qp(qp, "set rnr retry count = %d\n", attr->rnr_retry);
}
if (mask & IB_QP_RQ_PSN) {
qp->attr.rq_psn = (attr->rq_psn & BTH_PSN_MASK);
qp->resp.psn = qp->attr.rq_psn;
- pr_debug("qp#%d set resp psn = 0x%x\n", qp_num(qp),
- qp->resp.psn);
+ rxe_dbg_qp(qp, "set resp psn = 0x%x\n", qp->resp.psn);
}
if (mask & IB_QP_MIN_RNR_TIMER) {
qp->attr.min_rnr_timer = attr->min_rnr_timer;
- pr_debug("qp#%d set min rnr timer = 0x%x\n", qp_num(qp),
+ rxe_dbg_qp(qp, "set min rnr timer = 0x%x\n",
attr->min_rnr_timer);
}
@@ -672,7 +660,7 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
qp->attr.sq_psn = (attr->sq_psn & BTH_PSN_MASK);
qp->req.psn = qp->attr.sq_psn;
qp->comp.psn = qp->attr.sq_psn;
- pr_debug("qp#%d set req psn = 0x%x\n", qp_num(qp), qp->req.psn);
+ rxe_dbg_qp(qp, "set req psn = 0x%x\n", qp->req.psn);
}
if (mask & IB_QP_PATH_MIG_STATE)
@@ -686,40 +674,40 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
switch (attr->qp_state) {
case IB_QPS_RESET:
- pr_debug("qp#%d state -> RESET\n", qp_num(qp));
+ rxe_dbg_qp(qp, "state -> RESET\n");
rxe_qp_reset(qp);
break;
case IB_QPS_INIT:
- pr_debug("qp#%d state -> INIT\n", qp_num(qp));
+ rxe_dbg_qp(qp, "state -> INIT\n");
qp->req.state = QP_STATE_INIT;
qp->resp.state = QP_STATE_INIT;
qp->comp.state = QP_STATE_INIT;
break;
case IB_QPS_RTR:
- pr_debug("qp#%d state -> RTR\n", qp_num(qp));
+ rxe_dbg_qp(qp, "state -> RTR\n");
qp->resp.state = QP_STATE_READY;
break;
case IB_QPS_RTS:
- pr_debug("qp#%d state -> RTS\n", qp_num(qp));
+ rxe_dbg_qp(qp, "state -> RTS\n");
qp->req.state = QP_STATE_READY;
qp->comp.state = QP_STATE_READY;
break;
case IB_QPS_SQD:
- pr_debug("qp#%d state -> SQD\n", qp_num(qp));
+ rxe_dbg_qp(qp, "state -> SQD\n");
rxe_qp_drain(qp);
break;
case IB_QPS_SQE:
- pr_warn("qp#%d state -> SQE !!?\n", qp_num(qp));
+ rxe_dbg_qp(qp, "state -> SQE !!?\n");
/* Not possible from modify_qp. */
break;
case IB_QPS_ERR:
- pr_debug("qp#%d state -> ERR\n", qp_num(qp));
+ rxe_dbg_qp(qp, "state -> ERR\n");
rxe_qp_error(qp);
break;
}
@@ -759,7 +747,7 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask)
attr->sq_draining = 0;
}
- pr_debug("attr->sq_draining = %d\n", attr->sq_draining);
+ rxe_dbg_qp(qp, "attr->sq_draining = %d\n", attr->sq_draining);
return 0;
}
@@ -771,7 +759,7 @@ int rxe_qp_chk_destroy(struct rxe_qp *qp)
* will fail immediately.
*/
if (atomic_read(&qp->mcg_num)) {
- pr_debug("Attempt to destroy QP while attached to multicast group\n");
+ rxe_dbg_qp(qp, "Attempt to destroy while attached to multicast group\n");
return -EBUSY;
}
@@ -829,12 +817,12 @@ static void rxe_qp_do_cleanup(struct work_struct *work)
if (qp->resp.mr)
rxe_put(qp->resp.mr);
- if (qp_type(qp) == IB_QPT_RC)
- sk_dst_reset(qp->sk->sk);
-
free_rd_atomic_resources(qp);
if (qp->sk) {
+ if (qp_type(qp) == IB_QPT_RC)
+ sk_dst_reset(qp->sk->sk);
+
kernel_sock_shutdown(qp->sk, SHUT_RDWR);
sock_release(qp->sk);
}
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index f63771207970..899c8779f800 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -100,12 +100,12 @@ void rnr_nak_timer(struct timer_list *t)
{
struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer);
- pr_debug("%s: fired for qp#%d\n", __func__, qp_num(qp));
+ rxe_dbg_qp(qp, "nak timer fired\n");
/* request a send queue retry */
qp->req.need_retry = 1;
qp->req.wait_for_rnr_timer = 0;
- rxe_run_task(&qp->req.task, 1);
+ rxe_sched_task(&qp->req.task);
}
static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
@@ -241,6 +241,9 @@ static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits)
IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE :
IB_OPCODE_RC_SEND_FIRST;
+ case IB_WR_FLUSH:
+ return IB_OPCODE_RC_FLUSH;
+
case IB_WR_RDMA_READ:
return IB_OPCODE_RC_RDMA_READ_REQUEST;
@@ -258,6 +261,10 @@ static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits)
else
return fits ? IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE :
IB_OPCODE_RC_SEND_FIRST;
+
+ case IB_WR_ATOMIC_WRITE:
+ return IB_OPCODE_RC_ATOMIC_WRITE;
+
case IB_WR_REG_MR:
case IB_WR_LOCAL_INV:
return opcode;
@@ -421,11 +428,18 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
/* init optional headers */
if (pkt->mask & RXE_RETH_MASK) {
- reth_set_rkey(pkt, ibwr->wr.rdma.rkey);
+ if (pkt->mask & RXE_FETH_MASK)
+ reth_set_rkey(pkt, ibwr->wr.flush.rkey);
+ else
+ reth_set_rkey(pkt, ibwr->wr.rdma.rkey);
reth_set_va(pkt, wqe->iova);
reth_set_len(pkt, wqe->dma.resid);
}
+ /* Fill Flush Extension Transport Header */
+ if (pkt->mask & RXE_FETH_MASK)
+ feth_init(pkt, ibwr->wr.flush.type, ibwr->wr.flush.level);
+
if (pkt->mask & RXE_IMMDT_MASK)
immdt_set_imm(pkt, ibwr->ex.imm_data);
@@ -484,6 +498,14 @@ static int finish_packet(struct rxe_qp *qp, struct rxe_av *av,
memset(pad, 0, bth_pad(pkt));
}
+ } else if (pkt->mask & RXE_FLUSH_MASK) {
+ /* oA19-2: shall have no payload. */
+ wqe->dma.resid = 0;
+ }
+
+ if (pkt->mask & RXE_ATOMIC_WRITE_MASK) {
+ memcpy(payload_addr(pkt), wqe->dma.atomic_wr, payload);
+ wqe->dma.resid -= payload;
}
return 0;
@@ -595,7 +617,7 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
}
break;
default:
- pr_err("Unexpected send wqe opcode %d\n", opcode);
+ rxe_dbg_qp(qp, "Unexpected send wqe opcode %d\n", opcode);
wqe->status = IB_WC_LOC_QP_OP_ERR;
return -EINVAL;
}
@@ -608,7 +630,7 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
* which can lead to a deadlock. So go ahead and complete
* it now.
*/
- rxe_run_task(&qp->comp.task, 1);
+ rxe_sched_task(&qp->comp.task);
return 0;
}
@@ -709,13 +731,15 @@ int rxe_requester(void *arg)
}
mask = rxe_opcode[opcode].mask;
- if (unlikely(mask & RXE_READ_OR_ATOMIC_MASK)) {
+ if (unlikely(mask & (RXE_READ_OR_ATOMIC_MASK |
+ RXE_ATOMIC_WRITE_MASK))) {
if (check_init_depth(qp, wqe))
goto exit;
}
mtu = get_mtu(qp);
- payload = (mask & RXE_WRITE_OR_SEND_MASK) ? wqe->dma.resid : 0;
+ payload = (mask & (RXE_WRITE_OR_SEND_MASK | RXE_ATOMIC_WRITE_MASK)) ?
+ wqe->dma.resid : 0;
if (payload > mtu) {
if (qp_type(qp) == IB_QPT_UD) {
/* C10-93.1.1: If the total sum of all the buffer lengths specified for a
@@ -733,7 +757,7 @@ int rxe_requester(void *arg)
qp->req.wqe_index);
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
- rxe_run_task(&qp->comp.task, 0);
+ rxe_run_task(&qp->comp.task);
goto done;
}
payload = mtu;
@@ -748,14 +772,14 @@ int rxe_requester(void *arg)
av = rxe_get_av(&pkt, &ah);
if (unlikely(!av)) {
- pr_err("qp#%d Failed no address vector\n", qp_num(qp));
+ rxe_dbg_qp(qp, "Failed no address vector\n");
wqe->status = IB_WC_LOC_QP_OP_ERR;
goto err;
}
skb = init_req_packet(qp, av, wqe, opcode, payload, &pkt);
if (unlikely(!skb)) {
- pr_err("qp#%d Failed allocating skb\n", qp_num(qp));
+ rxe_dbg_qp(qp, "Failed allocating skb\n");
wqe->status = IB_WC_LOC_QP_OP_ERR;
if (ah)
rxe_put(ah);
@@ -764,7 +788,7 @@ int rxe_requester(void *arg)
err = finish_packet(qp, av, wqe, &pkt, skb, payload);
if (unlikely(err)) {
- pr_debug("qp#%d Error during finish packet\n", qp_num(qp));
+ rxe_dbg_qp(qp, "Error during finish packet\n");
if (err == -EFAULT)
wqe->status = IB_WC_LOC_PROT_ERR;
else
@@ -795,7 +819,7 @@ int rxe_requester(void *arg)
rollback_state(wqe, qp, &rollback_wqe, rollback_psn);
if (err == -EAGAIN) {
- rxe_run_task(&qp->req.task, 1);
+ rxe_sched_task(&qp->req.task);
goto exit;
}
@@ -817,7 +841,7 @@ err:
qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index);
wqe->state = wqe_state_error;
qp->req.state = QP_STATE_ERROR;
- rxe_run_task(&qp->comp.task, 0);
+ rxe_run_task(&qp->comp.task);
exit:
ret = -EAGAIN;
out:
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 693081e813ec..7a60c7709da0 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -22,6 +22,8 @@ enum resp_states {
RESPST_EXECUTE,
RESPST_READ_REPLY,
RESPST_ATOMIC_REPLY,
+ RESPST_ATOMIC_WRITE_REPLY,
+ RESPST_PROCESS_FLUSH,
RESPST_COMPLETE,
RESPST_ACKNOWLEDGE,
RESPST_CLEANUP,
@@ -57,6 +59,8 @@ static char *resp_state_name[] = {
[RESPST_EXECUTE] = "EXECUTE",
[RESPST_READ_REPLY] = "READ_REPLY",
[RESPST_ATOMIC_REPLY] = "ATOMIC_REPLY",
+ [RESPST_ATOMIC_WRITE_REPLY] = "ATOMIC_WRITE_REPLY",
+ [RESPST_PROCESS_FLUSH] = "PROCESS_FLUSH",
[RESPST_COMPLETE] = "COMPLETE",
[RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE",
[RESPST_CLEANUP] = "CLEANUP",
@@ -91,7 +95,10 @@ void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) ||
(skb_queue_len(&qp->req_pkts) > 1);
- rxe_run_task(&qp->resp.task, must_sched);
+ if (must_sched)
+ rxe_sched_task(&qp->resp.task);
+ else
+ rxe_run_task(&qp->resp.task);
}
static inline enum resp_states get_req(struct rxe_qp *qp,
@@ -253,19 +260,37 @@ static enum resp_states check_op_seq(struct rxe_qp *qp,
}
}
+static bool check_qp_attr_access(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
+{
+ if (((pkt->mask & RXE_READ_MASK) &&
+ !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) ||
+ ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) &&
+ !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) ||
+ ((pkt->mask & RXE_ATOMIC_MASK) &&
+ !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
+ return false;
+
+ if (pkt->mask & RXE_FLUSH_MASK) {
+ u32 flush_type = feth_plt(pkt);
+
+ if ((flush_type & IB_FLUSH_GLOBAL &&
+ !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_GLOBAL)) ||
+ (flush_type & IB_FLUSH_PERSISTENT &&
+ !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_PERSISTENT)))
+ return false;
+ }
+
+ return true;
+}
+
static enum resp_states check_op_valid(struct rxe_qp *qp,
struct rxe_pkt_info *pkt)
{
switch (qp_type(qp)) {
case IB_QPT_RC:
- if (((pkt->mask & RXE_READ_MASK) &&
- !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) ||
- ((pkt->mask & RXE_WRITE_MASK) &&
- !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) ||
- ((pkt->mask & RXE_ATOMIC_MASK) &&
- !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) {
+ if (!check_qp_attr_access(qp, pkt))
return RESPST_ERR_UNSUPPORTED_OPCODE;
- }
break;
@@ -314,7 +339,7 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp)
/* don't trust user space data */
if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) {
spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
- pr_warn("%s: invalid num_sge in SRQ entry\n", __func__);
+ rxe_dbg_qp(qp, "invalid num_sge in SRQ entry\n");
return RESPST_ERR_MALFORMED_WQE;
}
size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge);
@@ -364,7 +389,7 @@ static enum resp_states check_resource(struct rxe_qp *qp,
}
}
- if (pkt->mask & RXE_READ_OR_ATOMIC_MASK) {
+ if (pkt->mask & (RXE_READ_OR_ATOMIC_MASK | RXE_ATOMIC_WRITE_MASK)) {
/* it is the requesters job to not send
* too many read/atomic ops, we just
* recycle the responder resource queue
@@ -387,19 +412,66 @@ static enum resp_states check_resource(struct rxe_qp *qp,
return RESPST_CHK_LENGTH;
}
-static enum resp_states check_length(struct rxe_qp *qp,
- struct rxe_pkt_info *pkt)
+static enum resp_states rxe_resp_check_length(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
{
- switch (qp_type(qp)) {
- case IB_QPT_RC:
- return RESPST_CHK_RKEY;
-
- case IB_QPT_UC:
- return RESPST_CHK_RKEY;
+ /*
+ * See IBA C9-92
+ * For UD QPs we only check if the packet will fit in the
+ * receive buffer later. For rmda operations additional
+ * length checks are performed in check_rkey.
+ */
+ if (pkt->mask & RXE_PAYLOAD_MASK && ((qp_type(qp) == IB_QPT_RC) ||
+ (qp_type(qp) == IB_QPT_UC))) {
+ unsigned int mtu = qp->mtu;
+ unsigned int payload = payload_size(pkt);
+
+ if ((pkt->mask & RXE_START_MASK) &&
+ (pkt->mask & RXE_END_MASK)) {
+ if (unlikely(payload > mtu)) {
+ rxe_dbg_qp(qp, "only packet too long");
+ return RESPST_ERR_LENGTH;
+ }
+ } else if ((pkt->mask & RXE_START_MASK) ||
+ (pkt->mask & RXE_MIDDLE_MASK)) {
+ if (unlikely(payload != mtu)) {
+ rxe_dbg_qp(qp, "first or middle packet not mtu");
+ return RESPST_ERR_LENGTH;
+ }
+ } else if (pkt->mask & RXE_END_MASK) {
+ if (unlikely((payload == 0) || (payload > mtu))) {
+ rxe_dbg_qp(qp, "last packet zero or too long");
+ return RESPST_ERR_LENGTH;
+ }
+ }
+ }
- default:
- return RESPST_CHK_RKEY;
+ /* See IBA C9-94 */
+ if (pkt->mask & RXE_RETH_MASK) {
+ if (reth_len(pkt) > (1U << 31)) {
+ rxe_dbg_qp(qp, "dma length too long");
+ return RESPST_ERR_LENGTH;
+ }
}
+
+ return RESPST_CHK_RKEY;
+}
+
+static void qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
+{
+ qp->resp.va = reth_va(pkt);
+ qp->resp.offset = 0;
+ qp->resp.rkey = reth_rkey(pkt);
+ qp->resp.resid = reth_len(pkt);
+ qp->resp.length = reth_len(pkt);
+}
+
+static void qp_resp_from_atmeth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
+{
+ qp->resp.va = atmeth_va(pkt);
+ qp->resp.offset = 0;
+ qp->resp.rkey = atmeth_rkey(pkt);
+ qp->resp.resid = sizeof(u64);
}
static enum resp_states check_rkey(struct rxe_qp *qp,
@@ -413,29 +485,32 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
u32 pktlen;
int mtu = qp->mtu;
enum resp_states state;
- int access;
-
- if (pkt->mask & RXE_READ_OR_WRITE_MASK) {
- if (pkt->mask & RXE_RETH_MASK) {
- qp->resp.va = reth_va(pkt);
- qp->resp.offset = 0;
- qp->resp.rkey = reth_rkey(pkt);
- qp->resp.resid = reth_len(pkt);
- qp->resp.length = reth_len(pkt);
- }
+ int access = 0;
+
+ if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
+ if (pkt->mask & RXE_RETH_MASK)
+ qp_resp_from_reth(qp, pkt);
+
access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ
: IB_ACCESS_REMOTE_WRITE;
+ } else if (pkt->mask & RXE_FLUSH_MASK) {
+ u32 flush_type = feth_plt(pkt);
+
+ if (pkt->mask & RXE_RETH_MASK)
+ qp_resp_from_reth(qp, pkt);
+
+ if (flush_type & IB_FLUSH_GLOBAL)
+ access |= IB_ACCESS_FLUSH_GLOBAL;
+ if (flush_type & IB_FLUSH_PERSISTENT)
+ access |= IB_ACCESS_FLUSH_PERSISTENT;
} else if (pkt->mask & RXE_ATOMIC_MASK) {
- qp->resp.va = atmeth_va(pkt);
- qp->resp.offset = 0;
- qp->resp.rkey = atmeth_rkey(pkt);
- qp->resp.resid = sizeof(u64);
+ qp_resp_from_atmeth(qp, pkt);
access = IB_ACCESS_REMOTE_ATOMIC;
} else {
return RESPST_EXECUTE;
}
- /* A zero-byte op is not required to set an addr or rkey. */
+ /* A zero-byte op is not required to set an addr or rkey. See C9-88 */
if ((pkt->mask & RXE_READ_OR_WRITE_MASK) &&
(pkt->mask & RXE_RETH_MASK) &&
reth_len(pkt) == 0) {
@@ -450,15 +525,14 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
if (rkey_is_mw(rkey)) {
mw = rxe_lookup_mw(qp, access, rkey);
if (!mw) {
- pr_debug("%s: no MW matches rkey %#x\n",
- __func__, rkey);
+ rxe_dbg_qp(qp, "no MW matches rkey %#x\n", rkey);
state = RESPST_ERR_RKEY_VIOLATION;
goto err;
}
mr = mw->mr;
if (!mr) {
- pr_err("%s: MW doesn't have an MR\n", __func__);
+ rxe_dbg_qp(qp, "MW doesn't have an MR\n");
state = RESPST_ERR_RKEY_VIOLATION;
goto err;
}
@@ -471,19 +545,27 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
} else {
mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE);
if (!mr) {
- pr_debug("%s: no MR matches rkey %#x\n",
- __func__, rkey);
+ rxe_dbg_qp(qp, "no MR matches rkey %#x\n", rkey);
state = RESPST_ERR_RKEY_VIOLATION;
goto err;
}
}
+ if (pkt->mask & RXE_FLUSH_MASK) {
+ /* FLUSH MR may not set va or resid
+ * no need to check range since we will flush whole mr
+ */
+ if (feth_sel(pkt) == IB_FLUSH_MR)
+ goto skip_check_range;
+ }
+
if (mr_check_range(mr, va + qp->resp.offset, resid)) {
state = RESPST_ERR_RKEY_VIOLATION;
goto err;
}
- if (pkt->mask & RXE_WRITE_MASK) {
+skip_check_range:
+ if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
if (resid > mtu) {
if (pktlen != mtu || bth_pad(pkt)) {
state = RESPST_ERR_LENGTH;
@@ -583,15 +665,66 @@ static struct resp_res *rxe_prepare_res(struct rxe_qp *qp,
res->state = rdatm_res_state_new;
break;
case RXE_ATOMIC_MASK:
+ case RXE_ATOMIC_WRITE_MASK:
res->first_psn = pkt->psn;
res->last_psn = pkt->psn;
res->cur_psn = pkt->psn;
break;
+ case RXE_FLUSH_MASK:
+ res->flush.va = qp->resp.va + qp->resp.offset;
+ res->flush.length = qp->resp.length;
+ res->flush.type = feth_plt(pkt);
+ res->flush.level = feth_sel(pkt);
}
return res;
}
+static enum resp_states process_flush(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
+{
+ u64 length, start;
+ struct rxe_mr *mr = qp->resp.mr;
+ struct resp_res *res = qp->resp.res;
+
+ /* oA19-14, oA19-15 */
+ if (res && res->replay)
+ return RESPST_ACKNOWLEDGE;
+ else if (!res) {
+ res = rxe_prepare_res(qp, pkt, RXE_FLUSH_MASK);
+ qp->resp.res = res;
+ }
+
+ if (res->flush.level == IB_FLUSH_RANGE) {
+ start = res->flush.va;
+ length = res->flush.length;
+ } else { /* level == IB_FLUSH_MR */
+ start = mr->ibmr.iova;
+ length = mr->ibmr.length;
+ }
+
+ if (res->flush.type & IB_FLUSH_PERSISTENT) {
+ if (rxe_flush_pmem_iova(mr, start, length))
+ return RESPST_ERR_RKEY_VIOLATION;
+ /* Make data persistent. */
+ wmb();
+ } else if (res->flush.type & IB_FLUSH_GLOBAL) {
+ /* Make data global visibility. */
+ wmb();
+ }
+
+ qp->resp.msn++;
+
+ /* next expected psn, read handles this separately */
+ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
+ qp->resp.ack_psn = qp->resp.psn;
+
+ qp->resp.opcode = pkt->opcode;
+ qp->resp.status = IB_WC_SUCCESS;
+
+ return RESPST_ACKNOWLEDGE;
+}
+
/* Guarantee atomicity of atomic operations at the machine level. */
static DEFINE_SPINLOCK(atomic_ops_lock);
@@ -652,6 +785,55 @@ out:
return ret;
}
+static enum resp_states atomic_write_reply(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
+{
+ u64 src, *dst;
+ struct resp_res *res = qp->resp.res;
+ struct rxe_mr *mr = qp->resp.mr;
+ int payload = payload_size(pkt);
+
+ if (!res) {
+ res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK);
+ qp->resp.res = res;
+ }
+
+ if (!res->replay) {
+#ifdef CONFIG_64BIT
+ if (mr->state != RXE_MR_STATE_VALID)
+ return RESPST_ERR_RKEY_VIOLATION;
+
+ memcpy(&src, payload_addr(pkt), payload);
+
+ dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload);
+ /* check vaddr is 8 bytes aligned. */
+ if (!dst || (uintptr_t)dst & 7)
+ return RESPST_ERR_MISALIGNED_ATOMIC;
+
+ /* Do atomic write after all prior operations have completed */
+ smp_store_release(dst, src);
+
+ /* decrease resp.resid to zero */
+ qp->resp.resid -= sizeof(payload);
+
+ qp->resp.msn++;
+
+ /* next expected psn, read handles this separately */
+ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
+ qp->resp.ack_psn = qp->resp.psn;
+
+ qp->resp.opcode = pkt->opcode;
+ qp->resp.status = IB_WC_SUCCESS;
+
+ return RESPST_ACKNOWLEDGE;
+#else
+ return RESPST_ERR_UNSUPPORTED_OPCODE;
+#endif /* CONFIG_64BIT */
+ }
+
+ return RESPST_ACKNOWLEDGE;
+}
+
static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
struct rxe_pkt_info *ack,
int opcode,
@@ -807,14 +989,19 @@ static enum resp_states read_reply(struct rxe_qp *qp,
skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload,
res->cur_psn, AETH_ACK_UNLIMITED);
if (!skb) {
- rxe_put(mr);
+ if (mr)
+ rxe_put(mr);
return RESPST_ERR_RNR;
}
- rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt),
- payload, RXE_FROM_MR_OBJ);
+ err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt),
+ payload, RXE_FROM_MR_OBJ);
if (mr)
rxe_put(mr);
+ if (err) {
+ kfree_skb(skb);
+ return RESPST_ERR_RKEY_VIOLATION;
+ }
if (bth_pad(&ack_pkt)) {
u8 *pad = payload_addr(&ack_pkt) + payload;
@@ -890,6 +1077,10 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
return RESPST_READ_REPLY;
} else if (pkt->mask & RXE_ATOMIC_MASK) {
return RESPST_ATOMIC_REPLY;
+ } else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) {
+ return RESPST_ATOMIC_WRITE_REPLY;
+ } else if (pkt->mask & RXE_FLUSH_MASK) {
+ return RESPST_PROCESS_FLUSH;
} else {
/* Unreachable */
WARN_ON_ONCE(1);
@@ -1040,7 +1231,7 @@ static int send_common_ack(struct rxe_qp *qp, u8 syndrome, u32 psn,
err = rxe_xmit_packet(qp, &ack_pkt, skb);
if (err)
- pr_err_ratelimited("Failed sending %s\n", msg);
+ rxe_dbg_qp(qp, "Failed sending %s\n", msg);
return err;
}
@@ -1063,6 +1254,19 @@ static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
return ret;
}
+static int send_read_response_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
+{
+ int ret = send_common_ack(qp, syndrome, psn,
+ IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY,
+ "RDMA READ response of length zero ACK");
+
+ /* have to clear this since it is used to trigger
+ * long read replies
+ */
+ qp->resp.res = NULL;
+ return ret;
+}
+
static enum resp_states acknowledge(struct rxe_qp *qp,
struct rxe_pkt_info *pkt)
{
@@ -1073,6 +1277,8 @@ static enum resp_states acknowledge(struct rxe_qp *qp,
send_ack(qp, qp->resp.aeth_syndrome, pkt->psn);
else if (pkt->mask & RXE_ATOMIC_MASK)
send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
+ else if (pkt->mask & (RXE_FLUSH_MASK | RXE_ATOMIC_WRITE_MASK))
+ send_read_response_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
else if (bth_ack(pkt))
send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
@@ -1129,6 +1335,22 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
/* SEND. Ack again and cleanup. C9-105. */
send_ack(qp, AETH_ACK_UNLIMITED, prev_psn);
return RESPST_CLEANUP;
+ } else if (pkt->mask & RXE_FLUSH_MASK) {
+ struct resp_res *res;
+
+ /* Find the operation in our list of responder resources. */
+ res = find_resource(qp, pkt->psn);
+ if (res) {
+ res->replay = 1;
+ res->cur_psn = pkt->psn;
+ qp->resp.res = res;
+ rc = RESPST_PROCESS_FLUSH;
+ goto out;
+ }
+
+ /* Resource not found. Class D error. Drop the request. */
+ rc = RESPST_CLEANUP;
+ goto out;
} else if (pkt->mask & RXE_READ_MASK) {
struct resp_res *res;
@@ -1184,7 +1406,9 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
res->replay = 1;
res->cur_psn = pkt->psn;
qp->resp.res = res;
- rc = RESPST_ATOMIC_REPLY;
+ rc = pkt->mask & RXE_ATOMIC_MASK ?
+ RESPST_ATOMIC_REPLY :
+ RESPST_ATOMIC_WRITE_REPLY;
goto out;
}
@@ -1286,8 +1510,7 @@ int rxe_responder(void *arg)
}
while (1) {
- pr_debug("qp#%d state = %s\n", qp_num(qp),
- resp_state_name[state]);
+ rxe_dbg_qp(qp, "state = %s\n", resp_state_name[state]);
switch (state) {
case RESPST_GET_REQ:
state = get_req(qp, &pkt);
@@ -1305,7 +1528,7 @@ int rxe_responder(void *arg)
state = check_resource(qp, pkt);
break;
case RESPST_CHK_LENGTH:
- state = check_length(qp, pkt);
+ state = rxe_resp_check_length(qp, pkt);
break;
case RESPST_CHK_RKEY:
state = check_rkey(qp, pkt);
@@ -1322,6 +1545,12 @@ int rxe_responder(void *arg)
case RESPST_ATOMIC_REPLY:
state = atomic_reply(qp, pkt);
break;
+ case RESPST_ATOMIC_WRITE_REPLY:
+ state = atomic_write_reply(qp, pkt);
+ break;
+ case RESPST_PROCESS_FLUSH:
+ state = process_flush(qp, pkt);
+ break;
case RESPST_ACKNOWLEDGE:
state = acknowledge(qp, pkt);
break;
@@ -1444,7 +1673,7 @@ int rxe_responder(void *arg)
case RESPST_ERROR:
qp->resp.goto_error = 0;
- pr_debug("qp#%d moved to error state\n", qp_num(qp));
+ rxe_dbg_qp(qp, "moved to error state\n");
rxe_qp_error(qp);
goto exit;
diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c
index 02b39498c370..82e37a41ced4 100644
--- a/drivers/infiniband/sw/rxe/rxe_srq.c
+++ b/drivers/infiniband/sw/rxe/rxe_srq.c
@@ -13,13 +13,13 @@ int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init)
struct ib_srq_attr *attr = &init->attr;
if (attr->max_wr > rxe->attr.max_srq_wr) {
- pr_warn("max_wr(%d) > max_srq_wr(%d)\n",
+ rxe_dbg(rxe, "max_wr(%d) > max_srq_wr(%d)\n",
attr->max_wr, rxe->attr.max_srq_wr);
goto err1;
}
if (attr->max_wr <= 0) {
- pr_warn("max_wr(%d) <= 0\n", attr->max_wr);
+ rxe_dbg(rxe, "max_wr(%d) <= 0\n", attr->max_wr);
goto err1;
}
@@ -27,7 +27,7 @@ int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init)
attr->max_wr = RXE_MIN_SRQ_WR;
if (attr->max_sge > rxe->attr.max_srq_sge) {
- pr_warn("max_sge(%d) > max_srq_sge(%d)\n",
+ rxe_dbg(rxe, "max_sge(%d) > max_srq_sge(%d)\n",
attr->max_sge, rxe->attr.max_srq_sge);
goto err1;
}
@@ -65,7 +65,7 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
type = QUEUE_TYPE_FROM_CLIENT;
q = rxe_queue_init(rxe, &srq->rq.max_wr, srq_wqe_size, type);
if (!q) {
- pr_warn("unable to allocate queue for srq\n");
+ rxe_dbg_srq(srq, "Unable to allocate queue\n");
return -ENOMEM;
}
@@ -94,24 +94,24 @@ int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_attr *attr, enum ib_srq_attr_mask mask)
{
if (srq->error) {
- pr_warn("srq in error state\n");
+ rxe_dbg_srq(srq, "in error state\n");
goto err1;
}
if (mask & IB_SRQ_MAX_WR) {
if (attr->max_wr > rxe->attr.max_srq_wr) {
- pr_warn("max_wr(%d) > max_srq_wr(%d)\n",
+ rxe_dbg_srq(srq, "max_wr(%d) > max_srq_wr(%d)\n",
attr->max_wr, rxe->attr.max_srq_wr);
goto err1;
}
if (attr->max_wr <= 0) {
- pr_warn("max_wr(%d) <= 0\n", attr->max_wr);
+ rxe_dbg_srq(srq, "max_wr(%d) <= 0\n", attr->max_wr);
goto err1;
}
if (srq->limit && (attr->max_wr < srq->limit)) {
- pr_warn("max_wr (%d) < srq->limit (%d)\n",
+ rxe_dbg_srq(srq, "max_wr (%d) < srq->limit (%d)\n",
attr->max_wr, srq->limit);
goto err1;
}
@@ -122,13 +122,13 @@ int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
if (mask & IB_SRQ_LIMIT) {
if (attr->srq_limit > rxe->attr.max_srq_wr) {
- pr_warn("srq_limit(%d) > max_srq_wr(%d)\n",
+ rxe_dbg_srq(srq, "srq_limit(%d) > max_srq_wr(%d)\n",
attr->srq_limit, rxe->attr.max_srq_wr);
goto err1;
}
if (attr->srq_limit > srq->rq.queue->buf->index_mask) {
- pr_warn("srq_limit (%d) > cur limit(%d)\n",
+ rxe_dbg_srq(srq, "srq_limit (%d) > cur limit(%d)\n",
attr->srq_limit,
srq->rq.queue->buf->index_mask);
goto err1;
diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
index ec2b7de1c497..60b90e33a884 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.c
+++ b/drivers/infiniband/sw/rxe/rxe_task.c
@@ -4,10 +4,6 @@
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*/
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/hardirq.h>
-
#include "rxe.h"
int __rxe_do_task(struct rxe_task *task)
@@ -28,30 +24,31 @@ int __rxe_do_task(struct rxe_task *task)
* a second caller finds the task already running
* but looks just after the last call to func
*/
-void rxe_do_task(struct tasklet_struct *t)
+static void do_task(struct tasklet_struct *t)
{
int cont;
int ret;
struct rxe_task *task = from_tasklet(task, t, tasklet);
+ struct rxe_qp *qp = (struct rxe_qp *)task->arg;
unsigned int iterations = RXE_MAX_ITERATIONS;
- spin_lock_bh(&task->state_lock);
+ spin_lock_bh(&task->lock);
switch (task->state) {
case TASK_STATE_START:
task->state = TASK_STATE_BUSY;
- spin_unlock_bh(&task->state_lock);
+ spin_unlock_bh(&task->lock);
break;
case TASK_STATE_BUSY:
task->state = TASK_STATE_ARMED;
fallthrough;
case TASK_STATE_ARMED:
- spin_unlock_bh(&task->state_lock);
+ spin_unlock_bh(&task->lock);
return;
default:
- spin_unlock_bh(&task->state_lock);
- pr_warn("%s failed with bad state %d\n", __func__, task->state);
+ spin_unlock_bh(&task->lock);
+ rxe_dbg_qp(qp, "failed with bad state %d\n", task->state);
return;
}
@@ -59,7 +56,7 @@ void rxe_do_task(struct tasklet_struct *t)
cont = 0;
ret = task->func(task->arg);
- spin_lock_bh(&task->state_lock);
+ spin_lock_bh(&task->lock);
switch (task->state) {
case TASK_STATE_BUSY:
if (ret) {
@@ -85,27 +82,25 @@ void rxe_do_task(struct tasklet_struct *t)
break;
default:
- pr_warn("%s failed with bad state %d\n", __func__,
- task->state);
+ rxe_dbg_qp(qp, "failed with bad state %d\n",
+ task->state);
}
- spin_unlock_bh(&task->state_lock);
+ spin_unlock_bh(&task->lock);
} while (cont);
task->ret = ret;
}
-int rxe_init_task(struct rxe_task *task,
- void *arg, int (*func)(void *), char *name)
+int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *))
{
task->arg = arg;
task->func = func;
- snprintf(task->name, sizeof(task->name), "%s", name);
task->destroyed = false;
- tasklet_setup(&task->tasklet, rxe_do_task);
+ tasklet_setup(&task->tasklet, do_task);
task->state = TASK_STATE_START;
- spin_lock_init(&task->state_lock);
+ spin_lock_init(&task->lock);
return 0;
}
@@ -121,23 +116,28 @@ void rxe_cleanup_task(struct rxe_task *task)
task->destroyed = true;
do {
- spin_lock_bh(&task->state_lock);
+ spin_lock_bh(&task->lock);
idle = (task->state == TASK_STATE_START);
- spin_unlock_bh(&task->state_lock);
+ spin_unlock_bh(&task->lock);
} while (!idle);
tasklet_kill(&task->tasklet);
}
-void rxe_run_task(struct rxe_task *task, int sched)
+void rxe_run_task(struct rxe_task *task)
+{
+ if (task->destroyed)
+ return;
+
+ do_task(&task->tasklet);
+}
+
+void rxe_sched_task(struct rxe_task *task)
{
if (task->destroyed)
return;
- if (sched)
- tasklet_schedule(&task->tasklet);
- else
- rxe_do_task(&task->tasklet);
+ tasklet_schedule(&task->tasklet);
}
void rxe_disable_task(struct rxe_task *task)
diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h
index 7f612a1c68a7..7b88129702ac 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.h
+++ b/drivers/infiniband/sw/rxe/rxe_task.h
@@ -21,11 +21,10 @@ enum {
struct rxe_task {
struct tasklet_struct tasklet;
int state;
- spinlock_t state_lock; /* spinlock for task state */
+ spinlock_t lock;
void *arg;
int (*func)(void *arg);
int ret;
- char name[16];
bool destroyed;
};
@@ -34,8 +33,7 @@ struct rxe_task {
* arg => parameter to pass to fcn
* func => function to call until it returns != 0
*/
-int rxe_init_task(struct rxe_task *task,
- void *arg, int (*func)(void *), char *name);
+int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *));
/* cleanup task */
void rxe_cleanup_task(struct rxe_task *task);
@@ -46,18 +44,9 @@ void rxe_cleanup_task(struct rxe_task *task);
*/
int __rxe_do_task(struct rxe_task *task);
-/*
- * common function called by any of the main tasklets
- * If there is any chance that there is additional
- * work to do someone must reschedule the task before
- * leaving
- */
-void rxe_do_task(struct tasklet_struct *t);
+void rxe_run_task(struct rxe_task *task);
-/* run a task, else schedule it to run as a tasklet, The decision
- * to run or schedule tasklet is based on the parameter sched.
- */
-void rxe_run_task(struct rxe_task *task, int sched);
+void rxe_sched_task(struct rxe_task *task);
/* keep a task from scheduling */
void rxe_disable_task(struct rxe_task *task);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 88825edc7dce..025b35bf014e 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -172,10 +172,6 @@ static int rxe_create_ah(struct ib_ah *ibah,
ah->is_user = false;
}
- err = rxe_av_chk_attr(rxe, init_attr->ah_attr);
- if (err)
- return err;
-
err = rxe_add_to_pool_ah(&rxe->ah_pool, ah,
init_attr->flags & RDMA_CREATE_AH_SLEEPABLE);
if (err)
@@ -184,6 +180,12 @@ static int rxe_create_ah(struct ib_ah *ibah,
/* create index > 0 */
ah->ah_num = ah->elem.index;
+ err = rxe_ah_chk_attr(ah, init_attr->ah_attr);
+ if (err) {
+ rxe_cleanup(ah);
+ return err;
+ }
+
if (uresp) {
/* only if new user provider */
err = copy_to_user(&uresp->ah_num, &ah->ah_num,
@@ -206,10 +208,9 @@ static int rxe_create_ah(struct ib_ah *ibah,
static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
{
int err;
- struct rxe_dev *rxe = to_rdev(ibah->device);
struct rxe_ah *ah = to_rah(ibah);
- err = rxe_av_chk_attr(rxe, attr);
+ err = rxe_ah_chk_attr(ah, attr);
if (err)
return err;
@@ -238,7 +239,6 @@ static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
{
- int err;
int i;
u32 length;
struct rxe_recv_wqe *recv_wqe;
@@ -246,15 +246,11 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
int full;
full = queue_full(rq->queue, QUEUE_TYPE_TO_DRIVER);
- if (unlikely(full)) {
- err = -ENOMEM;
- goto err1;
- }
+ if (unlikely(full))
+ return -ENOMEM;
- if (unlikely(num_sge > rq->max_sge)) {
- err = -EINVAL;
- goto err1;
- }
+ if (unlikely(num_sge > rq->max_sge))
+ return -EINVAL;
length = 0;
for (i = 0; i < num_sge; i++)
@@ -275,9 +271,6 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
queue_advance_producer(rq->queue, QUEUE_TYPE_TO_DRIVER);
return 0;
-
-err1:
- return err;
}
static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
@@ -343,10 +336,7 @@ static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
if (err)
return err;
- err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata);
- if (err)
- return err;
- return 0;
+ return rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata);
}
static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
@@ -453,11 +443,11 @@ static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
err = rxe_qp_chk_attr(rxe, qp, attr, mask);
if (err)
- goto err1;
+ return err;
err = rxe_qp_from_attr(qp, attr, mask, udata);
if (err)
- goto err1;
+ return err;
if ((mask & IB_QP_AV) && (attr->ah_attr.ah_flags & IB_AH_GRH))
qp->src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label,
@@ -465,9 +455,6 @@ static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
qp->attr.dest_qp_num);
return 0;
-
-err1:
- return err;
}
static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
@@ -501,24 +488,21 @@ static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
struct rxe_sq *sq = &qp->sq;
if (unlikely(num_sge > sq->max_sge))
- goto err1;
+ return -EINVAL;
if (unlikely(mask & WR_ATOMIC_MASK)) {
if (length < 8)
- goto err1;
+ return -EINVAL;
if (atomic_wr(ibwr)->remote_addr & 0x7)
- goto err1;
+ return -EINVAL;
}
if (unlikely((ibwr->send_flags & IB_SEND_INLINE) &&
(length > sq->max_inline)))
- goto err1;
+ return -EINVAL;
return 0;
-
-err1:
- return -EINVAL;
}
static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
@@ -695,9 +679,9 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr,
wr = next;
}
- rxe_run_task(&qp->req.task, 1);
+ rxe_sched_task(&qp->req.task);
if (unlikely(qp->req.state == QP_STATE_ERROR))
- rxe_run_task(&qp->comp.task, 1);
+ rxe_sched_task(&qp->comp.task);
return err;
}
@@ -719,7 +703,7 @@ static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
if (qp->is_user) {
/* Utilize process context to do protocol processing */
- rxe_run_task(&qp->req.task, 0);
+ rxe_run_task(&qp->req.task);
return 0;
} else
return rxe_post_send_kernel(qp, wr, bad_wr);
@@ -735,14 +719,12 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) {
*bad_wr = wr;
- err = -EINVAL;
- goto err1;
+ return -EINVAL;
}
if (unlikely(qp->srq)) {
*bad_wr = wr;
- err = -EINVAL;
- goto err1;
+ return -EINVAL;
}
spin_lock_irqsave(&rq->producer_lock, flags);
@@ -759,9 +741,8 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
spin_unlock_irqrestore(&rq->producer_lock, flags);
if (qp->resp.state == QP_STATE_ERROR)
- rxe_run_task(&qp->resp.task, 1);
+ rxe_sched_task(&qp->resp.task);
-err1:
return err;
}
@@ -826,16 +807,9 @@ static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
err = rxe_cq_chk_attr(rxe, cq, cqe, 0);
if (err)
- goto err1;
-
- err = rxe_cq_resize_queue(cq, cqe, uresp, udata);
- if (err)
- goto err1;
-
- return 0;
+ return err;
-err1:
- return err;
+ return rxe_cq_resize_queue(cq, cqe, uresp, udata);
}
static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
@@ -902,6 +876,7 @@ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
rxe_get(pd);
mr->ibmr.pd = ibpd;
+ mr->ibmr.device = ibpd->device;
rxe_mr_init_dma(access, mr);
rxe_finalize(mr);
@@ -921,26 +896,23 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
struct rxe_mr *mr;
mr = rxe_alloc(&rxe->mr_pool);
- if (!mr) {
- err = -ENOMEM;
- goto err2;
- }
-
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
rxe_get(pd);
mr->ibmr.pd = ibpd;
+ mr->ibmr.device = ibpd->device;
err = rxe_mr_init_user(rxe, start, length, iova, access, mr);
if (err)
- goto err3;
+ goto err1;
rxe_finalize(mr);
return &mr->ibmr;
-err3:
+err1:
rxe_cleanup(mr);
-err2:
return ERR_PTR(err);
}
@@ -956,25 +928,23 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
return ERR_PTR(-EINVAL);
mr = rxe_alloc(&rxe->mr_pool);
- if (!mr) {
- err = -ENOMEM;
- goto err1;
- }
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
rxe_get(pd);
mr->ibmr.pd = ibpd;
+ mr->ibmr.device = ibpd->device;
err = rxe_mr_init_fast(max_num_sg, mr);
if (err)
- goto err2;
+ goto err1;
rxe_finalize(mr);
return &mr->ibmr;
-err2:
- rxe_cleanup(mr);
err1:
+ rxe_cleanup(mr);
return ERR_PTR(err);
}
@@ -1134,7 +1104,7 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
err = ib_register_device(dev, ibdev_name, NULL);
if (err)
- pr_warn("%s failed with error %d\n", __func__, err);
+ rxe_dbg(rxe, "failed with error %d\n", err);
/*
* Note that rxe may be invalid at this point if another thread
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 5f5cbfcb3569..19ddfa890480 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -165,6 +165,12 @@ struct resp_res {
u64 va;
u32 resid;
} read;
+ struct {
+ u32 length;
+ u64 va;
+ u8 type;
+ u8 level;
+ } flush;
};
};
@@ -304,7 +310,6 @@ struct rxe_mr {
u32 lkey;
u32 rkey;
enum rxe_mr_state state;
- enum ib_mr_type type;
u32 offset;
int access;
diff --git a/drivers/infiniband/sw/siw/siw_cq.c b/drivers/infiniband/sw/siw/siw_cq.c
index d68e37859e73..403029de6b92 100644
--- a/drivers/infiniband/sw/siw/siw_cq.c
+++ b/drivers/infiniband/sw/siw/siw_cq.c
@@ -56,8 +56,6 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc)
if (READ_ONCE(cqe->flags) & SIW_WQE_VALID) {
memset(wc, 0, sizeof(*wc));
wc->wr_id = cqe->id;
- wc->status = map_cqe_status[cqe->status].ib;
- wc->opcode = map_wc_opcode[cqe->opcode];
wc->byte_len = cqe->bytes;
/*
@@ -71,10 +69,32 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc)
wc->wc_flags = IB_WC_WITH_INVALIDATE;
}
wc->qp = cqe->base_qp;
+ wc->opcode = map_wc_opcode[cqe->opcode];
+ wc->status = map_cqe_status[cqe->status].ib;
siw_dbg_cq(cq,
"idx %u, type %d, flags %2x, id 0x%pK\n",
cq->cq_get % cq->num_cqe, cqe->opcode,
cqe->flags, (void *)(uintptr_t)cqe->id);
+ } else {
+ /*
+ * A malicious user may set invalid opcode or
+ * status in the user mmapped CQE array.
+ * Sanity check and correct values in that case
+ * to avoid out-of-bounds access to global arrays
+ * for opcode and status mapping.
+ */
+ u8 opcode = cqe->opcode;
+ u16 status = cqe->status;
+
+ if (opcode >= SIW_NUM_OPCODES) {
+ opcode = 0;
+ status = SIW_WC_GENERAL_ERR;
+ } else if (status >= SIW_NUM_WC_STATUS) {
+ status = SIW_WC_GENERAL_ERR;
+ }
+ wc->opcode = map_wc_opcode[opcode];
+ wc->status = map_cqe_status[status].ib;
+
}
WRITE_ONCE(cqe->flags, 0);
cq->cq_get++;
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 3e814cfb298c..906fde1a2a0d 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -676,13 +676,45 @@ static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr,
static int siw_sq_flush_wr(struct siw_qp *qp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr)
{
- struct siw_sqe sqe = {};
int rv = 0;
while (wr) {
- sqe.id = wr->wr_id;
- sqe.opcode = wr->opcode;
- rv = siw_sqe_complete(qp, &sqe, 0, SIW_WC_WR_FLUSH_ERR);
+ struct siw_sqe sqe = {};
+
+ switch (wr->opcode) {
+ case IB_WR_RDMA_WRITE:
+ sqe.opcode = SIW_OP_WRITE;
+ break;
+ case IB_WR_RDMA_READ:
+ sqe.opcode = SIW_OP_READ;
+ break;
+ case IB_WR_RDMA_READ_WITH_INV:
+ sqe.opcode = SIW_OP_READ_LOCAL_INV;
+ break;
+ case IB_WR_SEND:
+ sqe.opcode = SIW_OP_SEND;
+ break;
+ case IB_WR_SEND_WITH_IMM:
+ sqe.opcode = SIW_OP_SEND_WITH_IMM;
+ break;
+ case IB_WR_SEND_WITH_INV:
+ sqe.opcode = SIW_OP_SEND_REMOTE_INV;
+ break;
+ case IB_WR_LOCAL_INV:
+ sqe.opcode = SIW_OP_INVAL_STAG;
+ break;
+ case IB_WR_REG_MR:
+ sqe.opcode = SIW_OP_REG_MR;
+ break;
+ default:
+ rv = -EINVAL;
+ break;
+ }
+ if (!rv) {
+ sqe.id = wr->wr_id;
+ rv = siw_sqe_complete(qp, &sqe, 0,
+ SIW_WC_WR_FLUSH_ERR);
+ }
if (rv) {
if (bad_wr)
*bad_wr = wr;