summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
authorMike Marciniszyn <mike.marciniszyn@intel.com>2016-02-14 12:10:04 -0800
committerDoug Ledford <dledford@redhat.com>2016-03-10 20:38:07 -0500
commit46a80d62e6e0ccfc9d8a05c5b773405b84a4afd7 (patch)
tree077ea38ac2f7fd75c1334deadcc141ad6753a009 /drivers/infiniband/hw
parent20f333b61300fa658952713ca9b8b4b72bbaed9f (diff)
downloadlinux-46a80d62e6e0ccfc9d8a05c5b773405b84a4afd7.tar.bz2
IB/qib, staging/rdma/hfi1: add s_hlock for use in post send
This patch adds an additional lock to reduce contention on the s_lock. This lock is used in post_send() so that the post_send is not serialized with the send engine and other send related processing. To do this the s_next_psn is now maintained on post_send() while post_send() related fields are moved to a new cache line. There is an s_avail maintained for the post_send() to mitigate trading cache lines with the send engine. The lock is released/acquired around releasing the just built packet to the egress mechanism. Reviewed-by: Jubin John <jubin.john@intel.com> Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Dean Luick <dean.luick@intel.com> Signed-off-by: Harish Chegondi <harish.chegondi@intel.com> Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Ira Weiny <ira.weiny@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c36
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c44
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c11
-rw-r--r--drivers/infiniband/hw/qib/qib_uc.c22
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c22
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c37
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h6
7 files changed, 103 insertions, 75 deletions
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 01d49dc91de2..6ffa0221da9f 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -474,6 +474,42 @@ void qib_get_credit(struct rvt_qp *qp, u32 aeth)
}
}
+/**
+ * qib_check_send_wqe - validate wr/wqe
+ * @qp - The qp
+ * @wqe - The built wqe
+ *
+ * validate wr/wqe. This is called
+ * prior to inserting the wqe into
+ * the ring but after the wqe has been
+ * setup.
+ *
+ * Returns 0 on success, -EINVAL on failure
+ */
+int qib_check_send_wqe(struct rvt_qp *qp,
+ struct rvt_swqe *wqe)
+{
+ struct rvt_ah *ah;
+
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_RC:
+ case IB_QPT_UC:
+ if (wqe->length > 0x80000000U)
+ return -EINVAL;
+ break;
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ case IB_QPT_UD:
+ ah = ibah_to_rvtah(wqe->ud_wr.ah);
+ if (wqe->length > (1 << ah->log_pmtu))
+ return -EINVAL;
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
#ifdef CONFIG_DEBUG_FS
struct qib_qp_iter {
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index ce886b2ade74..9088e26d3ac8 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -226,6 +226,8 @@ bail:
* qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
* @qp: a pointer to the QP
*
+ * Assumes the s_lock is held.
+ *
* Return 1 if constructed; otherwise, return 0.
*/
int qib_make_rc_req(struct rvt_qp *qp)
@@ -241,7 +243,6 @@ int qib_make_rc_req(struct rvt_qp *qp)
u32 bth2;
u32 pmtu = qp->pmtu;
char newreq;
- unsigned long flags;
int ret = 0;
int delta;
@@ -249,12 +250,6 @@ int qib_make_rc_req(struct rvt_qp *qp)
if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
ohdr = &priv->s_hdr->u.l.oth;
- /*
- * The lock is needed to synchronize between the sending tasklet,
- * the receive interrupt handler, and timeout resends.
- */
- spin_lock_irqsave(&qp->s_lock, flags);
-
/* Sending responses has higher priority over sending requests. */
if ((qp->s_flags & RVT_S_RESP_PENDING) &&
qib_make_rc_ack(dev, qp, ohdr, pmtu))
@@ -264,7 +259,8 @@ int qib_make_rc_req(struct rvt_qp *qp)
if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
goto bail;
/* We are in the error state, flush the work request. */
- if (qp->s_last == qp->s_head)
+ smp_read_barrier_depends(); /* see post_one_send() */
+ if (qp->s_last == ACCESS_ONCE(qp->s_head))
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
if (atomic_read(&priv->s_dma_busy)) {
@@ -321,8 +317,8 @@ int qib_make_rc_req(struct rvt_qp *qp)
qp->s_flags |= RVT_S_WAIT_FENCE;
goto bail;
}
- wqe->psn = qp->s_next_psn;
newreq = 1;
+ qp->s_psn = wqe->psn;
}
/*
* Note that we have to be careful not to modify the
@@ -341,9 +337,7 @@ int qib_make_rc_req(struct rvt_qp *qp)
qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
goto bail;
}
- wqe->lpsn = wqe->psn;
if (len > pmtu) {
- wqe->lpsn += (len - 1) / pmtu;
qp->s_state = OP(SEND_FIRST);
len = pmtu;
break;
@@ -381,9 +375,7 @@ int qib_make_rc_req(struct rvt_qp *qp)
cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
hwords += sizeof(struct ib_reth) / sizeof(u32);
- wqe->lpsn = wqe->psn;
if (len > pmtu) {
- wqe->lpsn += (len - 1) / pmtu;
qp->s_state = OP(RDMA_WRITE_FIRST);
len = pmtu;
break;
@@ -418,13 +410,6 @@ int qib_make_rc_req(struct rvt_qp *qp)
qp->s_num_rd_atomic++;
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
- /*
- * Adjust s_next_psn to count the
- * expected number of responses.
- */
- if (len > pmtu)
- qp->s_next_psn += (len - 1) / pmtu;
- wqe->lpsn = qp->s_next_psn++;
}
ohdr->u.rc.reth.vaddr =
@@ -456,7 +441,6 @@ int qib_make_rc_req(struct rvt_qp *qp)
qp->s_num_rd_atomic++;
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
- wqe->lpsn = wqe->psn;
}
if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
qp->s_state = OP(COMPARE_SWAP);
@@ -499,11 +483,8 @@ int qib_make_rc_req(struct rvt_qp *qp)
}
if (wqe->wr.opcode == IB_WR_RDMA_READ)
qp->s_psn = wqe->lpsn + 1;
- else {
+ else
qp->s_psn++;
- if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
- qp->s_next_psn = qp->s_psn;
- }
break;
case OP(RDMA_READ_RESPONSE_FIRST):
@@ -523,8 +504,6 @@ int qib_make_rc_req(struct rvt_qp *qp)
/* FALLTHROUGH */
case OP(SEND_MIDDLE):
bth2 = qp->s_psn++ & QIB_PSN_MASK;
- if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
- qp->s_next_psn = qp->s_psn;
ss = &qp->s_sge;
len = qp->s_len;
if (len > pmtu) {
@@ -564,8 +543,6 @@ int qib_make_rc_req(struct rvt_qp *qp)
/* FALLTHROUGH */
case OP(RDMA_WRITE_MIDDLE):
bth2 = qp->s_psn++ & QIB_PSN_MASK;
- if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
- qp->s_next_psn = qp->s_psn;
ss = &qp->s_sge;
len = qp->s_len;
if (len > pmtu) {
@@ -630,13 +607,9 @@ int qib_make_rc_req(struct rvt_qp *qp)
qp->s_cur_size = len;
qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), bth2);
done:
- ret = 1;
- goto unlock;
-
+ return 1;
bail:
qp->s_flags &= ~RVT_S_BUSY;
-unlock:
- spin_unlock_irqrestore(&qp->s_lock, flags);
return ret;
}
@@ -1454,7 +1427,8 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
goto ack_done;
/* Ignore invalid responses. */
- if (qib_cmp24(psn, qp->s_next_psn) >= 0)
+ smp_read_barrier_depends(); /* see post_one_send */
+ if (qib_cmp24(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0)
goto ack_done;
/* Ignore duplicate responses. */
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index 2623684745f0..a5f07a64b228 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -391,7 +391,8 @@ static void qib_ruc_loopback(struct rvt_qp *sqp)
sqp->s_flags |= RVT_S_BUSY;
again:
- if (sqp->s_last == sqp->s_head)
+ smp_read_barrier_depends(); /* see post_one_send() */
+ if (sqp->s_last == ACCESS_ONCE(sqp->s_head))
goto clr_busy;
wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
@@ -765,22 +766,24 @@ void qib_do_send(struct rvt_qp *qp)
qp->s_flags |= RVT_S_BUSY;
- spin_unlock_irqrestore(&qp->s_lock, flags);
-
do {
/* Check for a constructed packet to be sent. */
if (qp->s_hdrwords != 0) {
+ spin_unlock_irqrestore(&qp->s_lock, flags);
/*
* If the packet cannot be sent now, return and
* the send tasklet will be woken up later.
*/
if (qib_verbs_send(qp, priv->s_hdr, qp->s_hdrwords,
qp->s_cur_sge, qp->s_cur_size))
- break;
+ return;
/* Record that s_hdr is empty. */
qp->s_hdrwords = 0;
+ spin_lock_irqsave(&qp->s_lock, flags);
}
} while (make_req(qp));
+
+ spin_unlock_irqrestore(&qp->s_lock, flags);
}
/*
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index 1b2fc69855b2..7bdbc79ceaa3 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -41,6 +41,8 @@
* qib_make_uc_req - construct a request packet (SEND, RDMA write)
* @qp: a pointer to the QP
*
+ * Assumes the s_lock is held.
+ *
* Return 1 if constructed; otherwise, return 0.
*/
int qib_make_uc_req(struct rvt_qp *qp)
@@ -48,20 +50,18 @@ int qib_make_uc_req(struct rvt_qp *qp)
struct qib_qp_priv *priv = qp->priv;
struct qib_other_headers *ohdr;
struct rvt_swqe *wqe;
- unsigned long flags;
u32 hwords;
u32 bth0;
u32 len;
u32 pmtu = qp->pmtu;
int ret = 0;
- spin_lock_irqsave(&qp->s_lock, flags);
-
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
goto bail;
/* We are in the error state, flush the work request. */
- if (qp->s_last == qp->s_head)
+ smp_read_barrier_depends(); /* see post_one_send() */
+ if (qp->s_last == ACCESS_ONCE(qp->s_head))
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
if (atomic_read(&priv->s_dma_busy)) {
@@ -90,13 +90,13 @@ int qib_make_uc_req(struct rvt_qp *qp)
RVT_PROCESS_NEXT_SEND_OK))
goto bail;
/* Check if send work queue is empty. */
- if (qp->s_cur == qp->s_head)
+ smp_read_barrier_depends(); /* see post_one_send() */
+ if (qp->s_cur == ACCESS_ONCE(qp->s_head))
goto bail;
/*
* Start a new request.
*/
- wqe->psn = qp->s_next_psn;
- qp->s_psn = qp->s_next_psn;
+ qp->s_psn = wqe->psn;
qp->s_sge.sge = wqe->sg_list[0];
qp->s_sge.sg_list = wqe->sg_list + 1;
qp->s_sge.num_sge = wqe->wr.num_sge;
@@ -215,15 +215,11 @@ int qib_make_uc_req(struct rvt_qp *qp)
qp->s_cur_sge = &qp->s_sge;
qp->s_cur_size = len;
qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
- qp->s_next_psn++ & QIB_PSN_MASK);
+ qp->s_psn++ & QIB_PSN_MASK);
done:
- ret = 1;
- goto unlock;
-
+ return 1;
bail:
qp->s_flags &= ~RVT_S_BUSY;
-unlock:
- spin_unlock_irqrestore(&qp->s_lock, flags);
return ret;
}
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index fe4917272b89..d9502137de62 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -234,6 +234,8 @@ drop:
* qib_make_ud_req - construct a UD request packet
* @qp: the QP
*
+ * Assumes the s_lock is held.
+ *
* Return 1 if constructed; otherwise, return 0.
*/
int qib_make_ud_req(struct rvt_qp *qp)
@@ -244,7 +246,6 @@ int qib_make_ud_req(struct rvt_qp *qp)
struct qib_pportdata *ppd;
struct qib_ibport *ibp;
struct rvt_swqe *wqe;
- unsigned long flags;
u32 nwords;
u32 extra_bytes;
u32 bth0;
@@ -253,13 +254,12 @@ int qib_make_ud_req(struct rvt_qp *qp)
int ret = 0;
int next_cur;
- spin_lock_irqsave(&qp->s_lock, flags);
-
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
goto bail;
/* We are in the error state, flush the work request. */
- if (qp->s_last == qp->s_head)
+ smp_read_barrier_depends(); /* see post_one_send */
+ if (qp->s_last == ACCESS_ONCE(qp->s_head))
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
if (atomic_read(&priv->s_dma_busy)) {
@@ -271,7 +271,9 @@ int qib_make_ud_req(struct rvt_qp *qp)
goto done;
}
- if (qp->s_cur == qp->s_head)
+ /* see post_one_send() */
+ smp_read_barrier_depends();
+ if (qp->s_cur == ACCESS_ONCE(qp->s_head))
goto bail;
wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
@@ -292,6 +294,7 @@ int qib_make_ud_req(struct rvt_qp *qp)
this_cpu_inc(ibp->pmastats->n_unicast_xmit);
lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
if (unlikely(lid == ppd->lid)) {
+ unsigned long flags;
/*
* If DMAs are in progress, we can't generate
* a completion for the loopback packet since
@@ -304,6 +307,7 @@ int qib_make_ud_req(struct rvt_qp *qp)
goto bail;
}
qp->s_cur = next_cur;
+ local_irq_save(flags);
spin_unlock_irqrestore(&qp->s_lock, flags);
qib_ud_loopback(qp, wqe);
spin_lock_irqsave(&qp->s_lock, flags);
@@ -378,7 +382,7 @@ int qib_make_ud_req(struct rvt_qp *qp)
ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE) ?
cpu_to_be32(QIB_MULTICAST_QPN) :
cpu_to_be32(wqe->ud_wr.remote_qpn);
- ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & QIB_PSN_MASK);
+ ohdr->bth[2] = cpu_to_be32(wqe->psn & QIB_PSN_MASK);
/*
* Qkeys with the high order bit set mean use the
* qkey from the QP context instead of the WR (see 10.2.5).
@@ -388,13 +392,9 @@ int qib_make_ud_req(struct rvt_qp *qp)
ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
done:
- ret = 1;
- goto unlock;
-
+ return 1;
bail:
qp->s_flags &= ~RVT_S_BUSY;
-unlock:
- spin_unlock_irqrestore(&qp->s_lock, flags);
return ret;
}
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index fa94f78073cf..5cf019fb50d9 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -1662,6 +1662,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
dd->verbs_dev.rdi.driver_f.get_card_name = qib_get_card_name;
dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev;
dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah;
+ dd->verbs_dev.rdi.driver_f.check_send_wqe = qib_check_send_wqe;
dd->verbs_dev.rdi.driver_f.notify_new_ah = qib_notify_new_ah;
dd->verbs_dev.rdi.driver_f.alloc_qpn = qib_alloc_qpn;
dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qib_qp_priv_alloc;
@@ -1677,6 +1678,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = qib_mtu_to_path_mtu;
dd->verbs_dev.rdi.driver_f.mtu_from_qp = qib_mtu_from_qp;
dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = qib_get_pmtu_from_attr;
+ dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _qib_schedule_send;
dd->verbs_dev.rdi.driver_f.query_port_state = qib_query_port;
dd->verbs_dev.rdi.driver_f.shut_down_port = qib_shut_down_port;
dd->verbs_dev.rdi.driver_f.cap_mask_chg = qib_cap_mask_chg;
@@ -1778,17 +1780,34 @@ void qib_unregister_ib_device(struct qib_devdata *dd)
dev->pio_hdrs, dev->pio_hdrs_phys);
}
-/*
- * This must be called with s_lock held.
+/**
+ * _qib_schedule_send - schedule progress
+ * @qp - the qp
+ *
+ * This schedules progress w/o regard to the s_flags.
+ *
+ * It is only used in post send, which doesn't hold
+ * the s_lock.
*/
-void qib_schedule_send(struct rvt_qp *qp)
+void _qib_schedule_send(struct rvt_qp *qp)
{
+ struct qib_ibport *ibp =
+ to_iport(qp->ibqp.device, qp->port_num);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
struct qib_qp_priv *priv = qp->priv;
- if (qib_send_ok(qp)) {
- struct qib_ibport *ibp =
- to_iport(qp->ibqp.device, qp->port_num);
- struct qib_pportdata *ppd = ppd_from_ibp(ibp);
- queue_work(ppd->qib_wq, &priv->s_work);
- }
+ queue_work(ppd->qib_wq, &priv->s_work);
+}
+
+/**
+ * qib_schedule_send - schedule progress
+ * @qp - the qp
+ *
+ * This schedules qp progress. The s_lock
+ * should be held.
+ */
+void qib_schedule_send(struct rvt_qp *qp)
+{
+ if (qib_send_ok(qp))
+ _qib_schedule_send(qp);
}
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index b88e027b6cb0..d137d714935d 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -298,9 +298,7 @@ static inline int qib_send_ok(struct rvt_qp *qp)
!(qp->s_flags & RVT_S_ANY_WAIT_SEND));
}
-/*
- * This must be called with s_lock held.
- */
+void _qib_schedule_send(struct rvt_qp *qp);
void qib_schedule_send(struct rvt_qp *qp);
static inline int qib_pkey_ok(u16 pkey1, u16 pkey2)
@@ -392,6 +390,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
+int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
+
struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid);
void qib_rc_rnr_retry(unsigned long arg);