From 46a80d62e6e0ccfc9d8a05c5b773405b84a4afd7 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sun, 14 Feb 2016 12:10:04 -0800 Subject: IB/qib, staging/rdma/hfi1: add s_hlock for use in post send This patch adds an additional lock to reduce contention on the s_lock. This lock is used in post_send() so that the post_send is not serialized with the send engine and other send related processing. To do this the s_next_psn is now maintained on post_send() while post_send() related fields are moved to a new cache line. There is an s_avail maintained for the post_send() to mitigate trading cache lines with the send engine. The lock is released/acquired around releasing the just built packet to the egress mechanism. Reviewed-by: Jubin John Reviewed-by: Dennis Dalessandro Signed-off-by: Dean Luick Signed-off-by: Harish Chegondi Signed-off-by: Mike Marciniszyn Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_qp.c | 36 ++++++++++++++++++++++++++++ drivers/infiniband/hw/qib/qib_rc.c | 44 +++++++---------------------------- drivers/infiniband/hw/qib/qib_ruc.c | 11 +++++---- drivers/infiniband/hw/qib/qib_uc.c | 22 +++++++----------- drivers/infiniband/hw/qib/qib_ud.c | 22 +++++++++--------- drivers/infiniband/hw/qib/qib_verbs.c | 37 ++++++++++++++++++++++------- drivers/infiniband/hw/qib/qib_verbs.h | 6 ++--- 7 files changed, 103 insertions(+), 75 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 01d49dc91de2..6ffa0221da9f 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -474,6 +474,42 @@ void qib_get_credit(struct rvt_qp *qp, u32 aeth) } } +/** + * qib_check_send_wqe - validate wr/wqe + * @qp - The qp + * @wqe - The built wqe + * + * validate wr/wqe. This is called + * prior to inserting the wqe into + * the ring but after the wqe has been + * setup. + * + * Returns 0 on success, -EINVAL on failure + */ +int qib_check_send_wqe(struct rvt_qp *qp, + struct rvt_swqe *wqe) +{ + struct rvt_ah *ah; + + switch (qp->ibqp.qp_type) { + case IB_QPT_RC: + case IB_QPT_UC: + if (wqe->length > 0x80000000U) + return -EINVAL; + break; + case IB_QPT_SMI: + case IB_QPT_GSI: + case IB_QPT_UD: + ah = ibah_to_rvtah(wqe->ud_wr.ah); + if (wqe->length > (1 << ah->log_pmtu)) + return -EINVAL; + break; + default: + break; + } + return 0; +} + #ifdef CONFIG_DEBUG_FS struct qib_qp_iter { diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index ce886b2ade74..9088e26d3ac8 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -226,6 +226,8 @@ bail: * qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) * @qp: a pointer to the QP * + * Assumes the s_lock is held. + * * Return 1 if constructed; otherwise, return 0. */ int qib_make_rc_req(struct rvt_qp *qp) @@ -241,7 +243,6 @@ int qib_make_rc_req(struct rvt_qp *qp) u32 bth2; u32 pmtu = qp->pmtu; char newreq; - unsigned long flags; int ret = 0; int delta; @@ -249,12 +250,6 @@ int qib_make_rc_req(struct rvt_qp *qp) if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) ohdr = &priv->s_hdr->u.l.oth; - /* - * The lock is needed to synchronize between the sending tasklet, - * the receive interrupt handler, and timeout resends. - */ - spin_lock_irqsave(&qp->s_lock, flags); - /* Sending responses has higher priority over sending requests. */ if ((qp->s_flags & RVT_S_RESP_PENDING) && qib_make_rc_ack(dev, qp, ohdr, pmtu)) @@ -264,7 +259,8 @@ int qib_make_rc_req(struct rvt_qp *qp) if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) + smp_read_barrier_depends(); /* see post_one_send() */ + if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_dma_busy)) { @@ -321,8 +317,8 @@ int qib_make_rc_req(struct rvt_qp *qp) qp->s_flags |= RVT_S_WAIT_FENCE; goto bail; } - wqe->psn = qp->s_next_psn; newreq = 1; + qp->s_psn = wqe->psn; } /* * Note that we have to be careful not to modify the @@ -341,9 +337,7 @@ int qib_make_rc_req(struct rvt_qp *qp) qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; goto bail; } - wqe->lpsn = wqe->psn; if (len > pmtu) { - wqe->lpsn += (len - 1) / pmtu; qp->s_state = OP(SEND_FIRST); len = pmtu; break; @@ -381,9 +375,7 @@ int qib_make_rc_req(struct rvt_qp *qp) cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); hwords += sizeof(struct ib_reth) / sizeof(u32); - wqe->lpsn = wqe->psn; if (len > pmtu) { - wqe->lpsn += (len - 1) / pmtu; qp->s_state = OP(RDMA_WRITE_FIRST); len = pmtu; break; @@ -418,13 +410,6 @@ int qib_make_rc_req(struct rvt_qp *qp) qp->s_num_rd_atomic++; if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; - /* - * Adjust s_next_psn to count the - * expected number of responses. - */ - if (len > pmtu) - qp->s_next_psn += (len - 1) / pmtu; - wqe->lpsn = qp->s_next_psn++; } ohdr->u.rc.reth.vaddr = @@ -456,7 +441,6 @@ int qib_make_rc_req(struct rvt_qp *qp) qp->s_num_rd_atomic++; if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; - wqe->lpsn = wqe->psn; } if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { qp->s_state = OP(COMPARE_SWAP); @@ -499,11 +483,8 @@ int qib_make_rc_req(struct rvt_qp *qp) } if (wqe->wr.opcode == IB_WR_RDMA_READ) qp->s_psn = wqe->lpsn + 1; - else { + else qp->s_psn++; - if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; - } break; case OP(RDMA_READ_RESPONSE_FIRST): @@ -523,8 +504,6 @@ int qib_make_rc_req(struct rvt_qp *qp) /* FALLTHROUGH */ case OP(SEND_MIDDLE): bth2 = qp->s_psn++ & QIB_PSN_MASK; - if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { @@ -564,8 +543,6 @@ int qib_make_rc_req(struct rvt_qp *qp) /* FALLTHROUGH */ case OP(RDMA_WRITE_MIDDLE): bth2 = qp->s_psn++ & QIB_PSN_MASK; - if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { @@ -630,13 +607,9 @@ int qib_make_rc_req(struct rvt_qp *qp) qp->s_cur_size = len; qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), bth2); done: - ret = 1; - goto unlock; - + return 1; bail: qp->s_flags &= ~RVT_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } @@ -1454,7 +1427,8 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp, goto ack_done; /* Ignore invalid responses. */ - if (qib_cmp24(psn, qp->s_next_psn) >= 0) + smp_read_barrier_depends(); /* see post_one_send */ + if (qib_cmp24(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0) goto ack_done; /* Ignore duplicate responses. */ diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 2623684745f0..a5f07a64b228 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -391,7 +391,8 @@ static void qib_ruc_loopback(struct rvt_qp *sqp) sqp->s_flags |= RVT_S_BUSY; again: - if (sqp->s_last == sqp->s_head) + smp_read_barrier_depends(); /* see post_one_send() */ + if (sqp->s_last == ACCESS_ONCE(sqp->s_head)) goto clr_busy; wqe = rvt_get_swqe_ptr(sqp, sqp->s_last); @@ -765,22 +766,24 @@ void qib_do_send(struct rvt_qp *qp) qp->s_flags |= RVT_S_BUSY; - spin_unlock_irqrestore(&qp->s_lock, flags); - do { /* Check for a constructed packet to be sent. */ if (qp->s_hdrwords != 0) { + spin_unlock_irqrestore(&qp->s_lock, flags); /* * If the packet cannot be sent now, return and * the send tasklet will be woken up later. */ if (qib_verbs_send(qp, priv->s_hdr, qp->s_hdrwords, qp->s_cur_sge, qp->s_cur_size)) - break; + return; /* Record that s_hdr is empty. */ qp->s_hdrwords = 0; + spin_lock_irqsave(&qp->s_lock, flags); } } while (make_req(qp)); + + spin_unlock_irqrestore(&qp->s_lock, flags); } /* diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 1b2fc69855b2..7bdbc79ceaa3 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -41,6 +41,8 @@ * qib_make_uc_req - construct a request packet (SEND, RDMA write) * @qp: a pointer to the QP * + * Assumes the s_lock is held. + * * Return 1 if constructed; otherwise, return 0. */ int qib_make_uc_req(struct rvt_qp *qp) @@ -48,20 +50,18 @@ int qib_make_uc_req(struct rvt_qp *qp) struct qib_qp_priv *priv = qp->priv; struct qib_other_headers *ohdr; struct rvt_swqe *wqe; - unsigned long flags; u32 hwords; u32 bth0; u32 len; u32 pmtu = qp->pmtu; int ret = 0; - spin_lock_irqsave(&qp->s_lock, flags); - if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) { if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) + smp_read_barrier_depends(); /* see post_one_send() */ + if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_dma_busy)) { @@ -90,13 +90,13 @@ int qib_make_uc_req(struct rvt_qp *qp) RVT_PROCESS_NEXT_SEND_OK)) goto bail; /* Check if send work queue is empty. */ - if (qp->s_cur == qp->s_head) + smp_read_barrier_depends(); /* see post_one_send() */ + if (qp->s_cur == ACCESS_ONCE(qp->s_head)) goto bail; /* * Start a new request. */ - wqe->psn = qp->s_next_psn; - qp->s_psn = qp->s_next_psn; + qp->s_psn = wqe->psn; qp->s_sge.sge = wqe->sg_list[0]; qp->s_sge.sg_list = wqe->sg_list + 1; qp->s_sge.num_sge = wqe->wr.num_sge; @@ -215,15 +215,11 @@ int qib_make_uc_req(struct rvt_qp *qp) qp->s_cur_sge = &qp->s_sge; qp->s_cur_size = len; qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), - qp->s_next_psn++ & QIB_PSN_MASK); + qp->s_psn++ & QIB_PSN_MASK); done: - ret = 1; - goto unlock; - + return 1; bail: qp->s_flags &= ~RVT_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index fe4917272b89..d9502137de62 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -234,6 +234,8 @@ drop: * qib_make_ud_req - construct a UD request packet * @qp: the QP * + * Assumes the s_lock is held. + * * Return 1 if constructed; otherwise, return 0. */ int qib_make_ud_req(struct rvt_qp *qp) @@ -244,7 +246,6 @@ int qib_make_ud_req(struct rvt_qp *qp) struct qib_pportdata *ppd; struct qib_ibport *ibp; struct rvt_swqe *wqe; - unsigned long flags; u32 nwords; u32 extra_bytes; u32 bth0; @@ -253,13 +254,12 @@ int qib_make_ud_req(struct rvt_qp *qp) int ret = 0; int next_cur; - spin_lock_irqsave(&qp->s_lock, flags); - if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) { if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) + smp_read_barrier_depends(); /* see post_one_send */ + if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_dma_busy)) { @@ -271,7 +271,9 @@ int qib_make_ud_req(struct rvt_qp *qp) goto done; } - if (qp->s_cur == qp->s_head) + /* see post_one_send() */ + smp_read_barrier_depends(); + if (qp->s_cur == ACCESS_ONCE(qp->s_head)) goto bail; wqe = rvt_get_swqe_ptr(qp, qp->s_cur); @@ -292,6 +294,7 @@ int qib_make_ud_req(struct rvt_qp *qp) this_cpu_inc(ibp->pmastats->n_unicast_xmit); lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1); if (unlikely(lid == ppd->lid)) { + unsigned long flags; /* * If DMAs are in progress, we can't generate * a completion for the loopback packet since @@ -304,6 +307,7 @@ int qib_make_ud_req(struct rvt_qp *qp) goto bail; } qp->s_cur = next_cur; + local_irq_save(flags); spin_unlock_irqrestore(&qp->s_lock, flags); qib_ud_loopback(qp, wqe); spin_lock_irqsave(&qp->s_lock, flags); @@ -378,7 +382,7 @@ int qib_make_ud_req(struct rvt_qp *qp) ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE) ? cpu_to_be32(QIB_MULTICAST_QPN) : cpu_to_be32(wqe->ud_wr.remote_qpn); - ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & QIB_PSN_MASK); + ohdr->bth[2] = cpu_to_be32(wqe->psn & QIB_PSN_MASK); /* * Qkeys with the high order bit set mean use the * qkey from the QP context instead of the WR (see 10.2.5). @@ -388,13 +392,9 @@ int qib_make_ud_req(struct rvt_qp *qp) ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); done: - ret = 1; - goto unlock; - + return 1; bail: qp->s_flags &= ~RVT_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index fa94f78073cf..5cf019fb50d9 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1662,6 +1662,7 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->verbs_dev.rdi.driver_f.get_card_name = qib_get_card_name; dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev; dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah; + dd->verbs_dev.rdi.driver_f.check_send_wqe = qib_check_send_wqe; dd->verbs_dev.rdi.driver_f.notify_new_ah = qib_notify_new_ah; dd->verbs_dev.rdi.driver_f.alloc_qpn = qib_alloc_qpn; dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qib_qp_priv_alloc; @@ -1677,6 +1678,7 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = qib_mtu_to_path_mtu; dd->verbs_dev.rdi.driver_f.mtu_from_qp = qib_mtu_from_qp; dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = qib_get_pmtu_from_attr; + dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _qib_schedule_send; dd->verbs_dev.rdi.driver_f.query_port_state = qib_query_port; dd->verbs_dev.rdi.driver_f.shut_down_port = qib_shut_down_port; dd->verbs_dev.rdi.driver_f.cap_mask_chg = qib_cap_mask_chg; @@ -1778,17 +1780,34 @@ void qib_unregister_ib_device(struct qib_devdata *dd) dev->pio_hdrs, dev->pio_hdrs_phys); } -/* - * This must be called with s_lock held. +/** + * _qib_schedule_send - schedule progress + * @qp - the qp + * + * This schedules progress w/o regard to the s_flags. + * + * It is only used in post send, which doesn't hold + * the s_lock. */ -void qib_schedule_send(struct rvt_qp *qp) +void _qib_schedule_send(struct rvt_qp *qp) { + struct qib_ibport *ibp = + to_iport(qp->ibqp.device, qp->port_num); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); struct qib_qp_priv *priv = qp->priv; - if (qib_send_ok(qp)) { - struct qib_ibport *ibp = - to_iport(qp->ibqp.device, qp->port_num); - struct qib_pportdata *ppd = ppd_from_ibp(ibp); - queue_work(ppd->qib_wq, &priv->s_work); - } + queue_work(ppd->qib_wq, &priv->s_work); +} + +/** + * qib_schedule_send - schedule progress + * @qp - the qp + * + * This schedules qp progress. The s_lock + * should be held. + */ +void qib_schedule_send(struct rvt_qp *qp) +{ + if (qib_send_ok(qp)) + _qib_schedule_send(qp); } diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index b88e027b6cb0..d137d714935d 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -298,9 +298,7 @@ static inline int qib_send_ok(struct rvt_qp *qp) !(qp->s_flags & RVT_S_ANY_WAIT_SEND)); } -/* - * This must be called with s_lock held. - */ +void _qib_schedule_send(struct rvt_qp *qp); void qib_schedule_send(struct rvt_qp *qp); static inline int qib_pkey_ok(u16 pkey1, u16 pkey2) @@ -392,6 +390,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); +int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe); + struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid); void qib_rc_rnr_retry(unsigned long arg); -- cgit v1.2.3