From 21bfd4706268c85c9d17ca2fd48de3b19c9d40db Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 18 Jun 2013 10:27:38 +0300 Subject: RDMA/cxgb3: Timeout condition is never true This is a static checker fix. "count" is unsigned so it's never -1. Since "count" is 16 bits and the addition operation is implicitly casted to int then there is no wrapping here. Signed-off-by: Dan Carpenter Acked-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch_qp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index e5649e8b215d..b57c0befd962 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -883,7 +883,8 @@ u16 iwch_rqes_posted(struct iwch_qp *qhp) { union t3_wr *wqe = qhp->wq.queue; u16 count = 0; - while ((count+1) != 0 && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) { + + while (count < USHRT_MAX && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) { count++; wqe++; } -- cgit v1.2.3 From f29fa1cf340e21085672ccfeb6d66f292208567e Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 19 Jun 2013 10:40:09 +0800 Subject: IB/ehca: Fix error return code in ehca_create_slab_caches() Fix to return -ENOMEM in the kmem_cache_create() error handling case instead of 0, as done elsewhere in this function. Signed-off-by: Wei Yongjun Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index f8a62918a88d..8f43093edace 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -211,6 +211,7 @@ static int ehca_create_slab_caches(void) if (!ctblk_cache) { ehca_gen_err("Cannot create ctblk SLAB cache."); ehca_cleanup_small_qp_cache(); + ret = -ENOMEM; goto create_slab_caches6; } #endif -- cgit v1.2.3 From 27159f5087f9ff59fdc42958a31bca3a291b9f67 Mon Sep 17 00:00:00 2001 From: "Gottumukkala, Naresh" Date: Wed, 5 Jun 2013 08:50:46 +0000 Subject: RDMA/ocrdma: Remove use_cnt for queues Remove use_cnt. Rely on IB midlayer to keep track of the use count. Signed-off-by: Naresh Gottumukkala Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma.h | 4 --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 1 - drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 39 +---------------------------- 3 files changed, 1 insertion(+), 43 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 48970af23679..21d99f6fb367 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -97,7 +97,6 @@ struct ocrdma_queue_info { u16 id; /* qid, where to ring the doorbell. */ u16 head, tail; bool created; - atomic_t used; /* Number of valid elements in the queue */ }; struct ocrdma_eq { @@ -198,7 +197,6 @@ struct ocrdma_cq { struct ocrdma_ucontext *ucontext; dma_addr_t pa; u32 len; - atomic_t use_cnt; /* head of all qp's sq and rq for which cqes need to be flushed * by the software. @@ -210,7 +208,6 @@ struct ocrdma_pd { struct ib_pd ibpd; struct ocrdma_dev *dev; struct ocrdma_ucontext *uctx; - atomic_t use_cnt; u32 id; int num_dpp_qp; u32 dpp_page; @@ -246,7 +243,6 @@ struct ocrdma_srq { struct ocrdma_qp_hwq_info rq; struct ocrdma_pd *pd; - atomic_t use_cnt; u32 id; u64 *rqe_wr_id_tbl; u32 *idx_bit_fields; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 71942af4fce9..910b706a91c7 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -128,7 +128,6 @@ static inline struct ocrdma_mqe *ocrdma_get_mqe(struct ocrdma_dev *dev) static inline void ocrdma_mq_inc_head(struct ocrdma_dev *dev) { dev->mq.sq.head = (dev->mq.sq.head + 1) & (OCRDMA_MQ_LEN - 1); - atomic_inc(&dev->mq.sq.used); } static inline void *ocrdma_get_mqe_rsp(struct ocrdma_dev *dev) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index b29a4246ef41..38c145b28f5c 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -398,7 +398,6 @@ struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev, kfree(pd); return ERR_PTR(status); } - atomic_set(&pd->use_cnt, 0); if (udata && context) { status = ocrdma_copy_pd_uresp(pd, context, udata); @@ -419,12 +418,6 @@ int ocrdma_dealloc_pd(struct ib_pd *ibpd) int status; u64 usr_db; - if (atomic_read(&pd->use_cnt)) { - ocrdma_err("%s(%d) pd=0x%x is in use.\n", - __func__, dev->id, pd->id); - status = -EFAULT; - goto dealloc_err; - } status = ocrdma_mbx_dealloc_pd(dev, pd); if (pd->uctx) { u64 dpp_db = dev->nic_info.dpp_unmapped_addr + @@ -436,7 +429,6 @@ int ocrdma_dealloc_pd(struct ib_pd *ibpd) ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size); } kfree(pd); -dealloc_err: return status; } @@ -474,7 +466,6 @@ static struct ocrdma_mr *ocrdma_alloc_lkey(struct ib_pd *ibpd, return ERR_PTR(-ENOMEM); } mr->pd = pd; - atomic_inc(&pd->use_cnt); mr->ibmr.lkey = mr->hwmr.lkey; if (mr->hwmr.remote_wr || mr->hwmr.remote_rd) mr->ibmr.rkey = mr->hwmr.lkey; @@ -664,7 +655,6 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, if (status) goto mbx_err; mr->pd = pd; - atomic_inc(&pd->use_cnt); mr->ibmr.lkey = mr->hwmr.lkey; if (mr->hwmr.remote_wr || mr->hwmr.remote_rd) mr->ibmr.rkey = mr->hwmr.lkey; @@ -689,7 +679,6 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr) if (mr->hwmr.fr_mr == 0) ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); - atomic_dec(&mr->pd->use_cnt); /* it could be user registered memory. */ if (mr->umem) ib_umem_release(mr->umem); @@ -752,7 +741,6 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector, spin_lock_init(&cq->cq_lock); spin_lock_init(&cq->comp_handler_lock); - atomic_set(&cq->use_cnt, 0); INIT_LIST_HEAD(&cq->sq_head); INIT_LIST_HEAD(&cq->rq_head); cq->dev = dev; @@ -799,9 +787,6 @@ int ocrdma_destroy_cq(struct ib_cq *ibcq) struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); struct ocrdma_dev *dev = cq->dev; - if (atomic_read(&cq->use_cnt)) - return -EINVAL; - status = ocrdma_mbx_destroy_cq(dev, cq); if (cq->ucontext) { @@ -1023,15 +1008,6 @@ static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp, qp->state = OCRDMA_QPS_RST; } -static void ocrdma_set_qp_use_cnt(struct ocrdma_qp *qp, struct ocrdma_pd *pd) -{ - atomic_inc(&pd->use_cnt); - atomic_inc(&qp->sq_cq->use_cnt); - atomic_inc(&qp->rq_cq->use_cnt); - if (qp->srq) - atomic_inc(&qp->srq->use_cnt); - qp->ibqp.qp_num = qp->id; -} static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev, struct ib_qp_init_attr *attrs) @@ -1099,7 +1075,7 @@ struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd, goto cpy_err; } ocrdma_store_gsi_qp_cq(dev, attrs); - ocrdma_set_qp_use_cnt(qp, pd); + qp->ibqp.qp_num = qp->id; mutex_unlock(&dev->dev_lock); return &qp->ibqp; @@ -1475,11 +1451,6 @@ int ocrdma_destroy_qp(struct ib_qp *ibqp) ocrdma_del_flush_qp(qp); - atomic_dec(&qp->pd->use_cnt); - atomic_dec(&qp->sq_cq->use_cnt); - atomic_dec(&qp->rq_cq->use_cnt); - if (qp->srq) - atomic_dec(&qp->srq->use_cnt); kfree(qp->wqe_wr_id_tbl); kfree(qp->rqe_wr_id_tbl); kfree(qp); @@ -1565,14 +1536,12 @@ struct ib_srq *ocrdma_create_srq(struct ib_pd *ibpd, goto arm_err; } - atomic_set(&srq->use_cnt, 0); if (udata) { status = ocrdma_copy_srq_uresp(srq, udata); if (status) goto arm_err; } - atomic_inc(&pd->use_cnt); return &srq->ibsrq; arm_err: @@ -1618,18 +1587,12 @@ int ocrdma_destroy_srq(struct ib_srq *ibsrq) srq = get_ocrdma_srq(ibsrq); dev = srq->dev; - if (atomic_read(&srq->use_cnt)) { - ocrdma_err("%s(%d) err, srq=0x%x in use\n", - __func__, dev->id, srq->id); - return -EAGAIN; - } status = ocrdma_mbx_destroy_srq(dev, srq); if (srq->pd->uctx) ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa, srq->rq.len); - atomic_dec(&srq->pd->use_cnt); kfree(srq->idx_bit_fields); kfree(srq->rqe_wr_id_tbl); kfree(srq); -- cgit v1.2.3 From b1d58b99194a121a44ec77571f84f62a6ccd6431 Mon Sep 17 00:00:00 2001 From: Naresh Gottumukkala Date: Mon, 10 Jun 2013 04:42:38 +0000 Subject: RDMA/ocrdma: Use MCC_CREATE_EXT_V1 for MCC create Use MCC_CREATE_EXT_V1 to create MCC_queue to receive RoCE events. Signed-off-by: Naresh Gottumukkala Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 39 +++++++++++-------------------- drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 20 ++-------------- 2 files changed, 15 insertions(+), 44 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 910b706a91c7..f671d5d9ce3a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -563,32 +563,19 @@ static int ocrdma_mbx_create_mq(struct ocrdma_dev *dev, memset(cmd, 0, sizeof(*cmd)); num_pages = PAGES_4K_SPANNED(mq->va, mq->size); - if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { - ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_MQ, - OCRDMA_SUBSYS_COMMON, sizeof(*cmd)); - cmd->v0.pages = num_pages; - cmd->v0.async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID; - cmd->v0.async_cqid_valid = (cq->id << 1); - cmd->v0.cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) << - OCRDMA_CREATE_MQ_RING_SIZE_SHIFT); - cmd->v0.cqid_ringsize |= - (cq->id << OCRDMA_CREATE_MQ_V0_CQ_ID_SHIFT); - cmd->v0.valid = OCRDMA_CREATE_MQ_VALID; - pa = &cmd->v0.pa[0]; - } else { - ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_MQ_EXT, - OCRDMA_SUBSYS_COMMON, sizeof(*cmd)); - cmd->req.rsvd_version = 1; - cmd->v1.cqid_pages = num_pages; - cmd->v1.cqid_pages |= (cq->id << OCRDMA_CREATE_MQ_CQ_ID_SHIFT); - cmd->v1.async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID; - cmd->v1.async_event_bitmap = Bit(20); - cmd->v1.async_cqid_ringsize = cq->id; - cmd->v1.async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) << - OCRDMA_CREATE_MQ_RING_SIZE_SHIFT); - cmd->v1.valid = OCRDMA_CREATE_MQ_VALID; - pa = &cmd->v1.pa[0]; - } + ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_MQ_EXT, + OCRDMA_SUBSYS_COMMON, sizeof(*cmd)); + cmd->req.rsvd_version = 1; + cmd->cqid_pages = num_pages; + cmd->cqid_pages |= (cq->id << OCRDMA_CREATE_MQ_CQ_ID_SHIFT); + cmd->async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID; + cmd->async_event_bitmap = Bit(20); + cmd->async_cqid_ringsize = cq->id; + cmd->async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) << + OCRDMA_CREATE_MQ_RING_SIZE_SHIFT); + cmd->valid = OCRDMA_CREATE_MQ_VALID; + pa = &cmd->pa[0]; + ocrdma_build_q_pages(pa, num_pages, mq->dma, PAGE_SIZE_4K); status = be_roce_mcc_cmd(dev->nic_info.netdev, cmd, sizeof(*cmd), NULL, NULL); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index c75cbdfa87e7..cd0512f1fb5b 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -608,16 +608,8 @@ enum { OCRDMA_CREATE_MQ_ASYNC_CQ_VALID = Bit(0) }; -struct ocrdma_create_mq_v0 { - u32 pages; - u32 cqid_ringsize; - u32 valid; - u32 async_cqid_valid; - u32 rsvd; - struct ocrdma_pa pa[8]; -} __packed; - -struct ocrdma_create_mq_v1 { +struct ocrdma_create_mq_req { + struct ocrdma_mbx_hdr req; u32 cqid_pages; u32 async_event_bitmap; u32 async_cqid_ringsize; @@ -627,14 +619,6 @@ struct ocrdma_create_mq_v1 { struct ocrdma_pa pa[8]; } __packed; -struct ocrdma_create_mq_req { - struct ocrdma_mbx_hdr req; - union { - struct ocrdma_create_mq_v0 v0; - struct ocrdma_create_mq_v1 v1; - }; -} __packed; - struct ocrdma_create_mq_rsp { struct ocrdma_mbx_rsp rsp; u32 id; -- cgit v1.2.3 From ef99c4c2ed63cb0deb94ea70fb47c2d6294e302e Mon Sep 17 00:00:00 2001 From: Naresh Gottumukkala Date: Mon, 10 Jun 2013 04:42:39 +0000 Subject: RDMA/ocrdma: Replace ocrdma_err with pr_err Remove private macro ocrdma_err and replace with standard pr_err. Signed-off-by: Naresh Gottumukkala Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma.h | 2 - drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 44 +++++++------- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 6 +- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 92 ++++++++++++++--------------- 4 files changed, 70 insertions(+), 74 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 21d99f6fb367..9d82d097e25a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -42,8 +42,6 @@ #define OCRDMA_ROCE_DEV_VERSION "1.0.0" #define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA" -#define ocrdma_err(format, arg...) printk(KERN_ERR format, ##arg) - #define OCRDMA_MAX_AH 512 #define OCRDMA_UVERBS(CMD_NAME) (1ull << IB_USER_VERBS_CMD_##CMD_NAME) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index f671d5d9ce3a..76c9e192ddcc 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -731,7 +731,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev, qp_event = 0; srq_event = 0; dev_event = 0; - ocrdma_err("%s() unknown type=0x%x\n", __func__, type); + pr_err("%s() unknown type=0x%x\n", __func__, type); break; } @@ -761,8 +761,8 @@ static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe) if (evt_code == OCRDMA_ASYNC_EVE_CODE) ocrdma_dispatch_ibevent(dev, cqe); else - ocrdma_err("%s(%d) invalid evt code=0x%x\n", - __func__, dev->id, evt_code); + pr_err("%s(%d) invalid evt code=0x%x\n", __func__, + dev->id, evt_code); } static void ocrdma_process_mcqe(struct ocrdma_dev *dev, struct ocrdma_mcqe *cqe) @@ -776,8 +776,8 @@ static void ocrdma_process_mcqe(struct ocrdma_dev *dev, struct ocrdma_mcqe *cqe) dev->mqe_ctx.cmd_done = true; wake_up(&dev->mqe_ctx.cmd_wait); } else - ocrdma_err("%s() cqe for invalid tag0x%x.expected=0x%x\n", - __func__, cqe->tag_lo, dev->mqe_ctx.tag); + pr_err("%s() cqe for invalid tag0x%x.expected=0x%x\n", + __func__, cqe->tag_lo, dev->mqe_ctx.tag); } static int ocrdma_mq_cq_handler(struct ocrdma_dev *dev, u16 cq_id) @@ -796,7 +796,7 @@ static int ocrdma_mq_cq_handler(struct ocrdma_dev *dev, u16 cq_id) else if (cqe->valid_ae_cmpl_cons & OCRDMA_MCQE_CMPL_MASK) ocrdma_process_mcqe(dev, cqe); else - ocrdma_err("%s() cqe->compl is not set.\n", __func__); + pr_err("%s() cqe->compl is not set.\n", __func__); memset(cqe, 0, sizeof(struct ocrdma_mcqe)); ocrdma_mcq_inc_tail(dev); } @@ -855,7 +855,7 @@ static void ocrdma_qp_cq_handler(struct ocrdma_dev *dev, u16 cq_idx) cq = dev->cq_tbl[cq_idx]; if (cq == NULL) { - ocrdma_err("%s%d invalid id=0x%x\n", __func__, dev->id, cq_idx); + pr_err("%s%d invalid id=0x%x\n", __func__, dev->id, cq_idx); return; } spin_lock_irqsave(&cq->cq_lock, flags); @@ -957,7 +957,7 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe) rsp = ocrdma_get_mqe_rsp(dev); ocrdma_copy_le32_to_cpu(mqe, rsp, (sizeof(*mqe))); if (cqe_status || ext_status) { - ocrdma_err + pr_err ("%s() opcode=0x%x, cqe_status=0x%x, ext_status=0x%x\n", __func__, (rsp->u.rsp.subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >> @@ -1339,8 +1339,8 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq, if (dpp_cq) return -EINVAL; if (entries > dev->attr.max_cqe) { - ocrdma_err("%s(%d) max_cqe=0x%x, requester_cqe=0x%x\n", - __func__, dev->id, dev->attr.max_cqe, entries); + pr_err("%s(%d) max_cqe=0x%x, requester_cqe=0x%x\n", + __func__, dev->id, dev->attr.max_cqe, entries); return -EINVAL; } if (dpp_cq && (dev->nic_info.dev_family != OCRDMA_GEN2_FAMILY)) @@ -1607,7 +1607,7 @@ int ocrdma_reg_mr(struct ocrdma_dev *dev, status = ocrdma_mbx_reg_mr(dev, hwmr, pdid, cur_pbl_cnt, hwmr->pbe_size, last); if (status) { - ocrdma_err("%s() status=%d\n", __func__, status); + pr_err("%s() status=%d\n", __func__, status); return status; } /* if there is no more pbls to register then exit. */ @@ -1630,7 +1630,7 @@ int ocrdma_reg_mr(struct ocrdma_dev *dev, break; } if (status) - ocrdma_err("%s() err. status=%d\n", __func__, status); + pr_err("%s() err. status=%d\n", __func__, status); return status; } @@ -1827,8 +1827,8 @@ static int ocrdma_set_create_qp_sq_cmd(struct ocrdma_create_qp_req *cmd, status = ocrdma_build_q_conf(&max_wqe_allocated, dev->attr.wqe_size, &hw_pages, &hw_page_size); if (status) { - ocrdma_err("%s() req. max_send_wr=0x%x\n", __func__, - max_wqe_allocated); + pr_err("%s() req. max_send_wr=0x%x\n", __func__, + max_wqe_allocated); return -EINVAL; } qp->sq.max_cnt = max_wqe_allocated; @@ -1877,8 +1877,8 @@ static int ocrdma_set_create_qp_rq_cmd(struct ocrdma_create_qp_req *cmd, status = ocrdma_build_q_conf(&max_rqe_allocated, dev->attr.rqe_size, &hw_pages, &hw_page_size); if (status) { - ocrdma_err("%s() req. max_recv_wr=0x%x\n", __func__, - attrs->cap.max_recv_wr + 1); + pr_err("%s() req. max_recv_wr=0x%x\n", __func__, + attrs->cap.max_recv_wr + 1); return status; } qp->rq.max_cnt = max_rqe_allocated; @@ -2073,10 +2073,10 @@ mbx_err: if (qp->rq.va) dma_free_coherent(&pdev->dev, qp->rq.len, qp->rq.va, qp->rq.pa); rq_err: - ocrdma_err("%s(%d) rq_err\n", __func__, dev->id); + pr_err("%s(%d) rq_err\n", __func__, dev->id); dma_free_coherent(&pdev->dev, qp->sq.len, qp->sq.va, qp->sq.pa); sq_err: - ocrdma_err("%s(%d) sq_err\n", __func__, dev->id); + pr_err("%s(%d) sq_err\n", __func__, dev->id); kfree(cmd); return status; } @@ -2113,7 +2113,7 @@ int ocrdma_resolve_dgid(struct ocrdma_dev *dev, union ib_gid *dgid, else if (rdma_link_local_addr(&in6)) rdma_get_ll_mac(&in6, mac_addr); else { - ocrdma_err("%s() fail to resolve mac_addr.\n", __func__); + pr_err("%s() fail to resolve mac_addr.\n", __func__); return -EINVAL; } return 0; @@ -2348,8 +2348,8 @@ int ocrdma_mbx_create_srq(struct ocrdma_srq *srq, dev->attr.rqe_size, &hw_pages, &hw_page_size); if (status) { - ocrdma_err("%s() req. max_wr=0x%x\n", __func__, - srq_attr->attr.max_wr); + pr_err("%s() req. max_wr=0x%x\n", __func__, + srq_attr->attr.max_wr); status = -EINVAL; goto ret; } @@ -2600,7 +2600,7 @@ mq_err: ocrdma_destroy_qp_eqs(dev); qpeq_err: ocrdma_destroy_eq(dev, &dev->meq); - ocrdma_err("%s() status=%d\n", __func__, status); + pr_err("%s() status=%d\n", __func__, status); return status; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 48928c8e7774..ded416f1adea 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -378,7 +378,7 @@ static int ocrdma_alloc_resources(struct ocrdma_dev *dev) spin_lock_init(&dev->flush_q_lock); return 0; alloc_err: - ocrdma_err("%s(%d) error.\n", __func__, dev->id); + pr_err("%s(%d) error.\n", __func__, dev->id); return -ENOMEM; } @@ -396,7 +396,7 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev)); if (!dev) { - ocrdma_err("Unable to allocate ib device\n"); + pr_err("Unable to allocate ib device\n"); return NULL; } dev->mbx_cmd = kzalloc(sizeof(struct ocrdma_mqe_emb_cmd), GFP_KERNEL); @@ -437,7 +437,7 @@ init_err: idr_err: kfree(dev->mbx_cmd); ib_dealloc_device(&dev->ibdev); - ocrdma_err("%s() leaving. ret=%d\n", __func__, status); + pr_err("%s() leaving. ret=%d\n", __func__, status); return NULL; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 38c145b28f5c..882a8198d820 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -114,8 +114,8 @@ int ocrdma_query_port(struct ib_device *ibdev, dev = get_ocrdma_dev(ibdev); if (port > 1) { - ocrdma_err("%s(%d) invalid_port=0x%x\n", __func__, - dev->id, port); + pr_err("%s(%d) invalid_port=0x%x\n", __func__, + dev->id, port); return -EINVAL; } netdev = dev->nic_info.netdev; @@ -155,8 +155,7 @@ int ocrdma_modify_port(struct ib_device *ibdev, u8 port, int mask, dev = get_ocrdma_dev(ibdev); if (port > 1) { - ocrdma_err("%s(%d) invalid_port=0x%x\n", __func__, - dev->id, port); + pr_err("%s(%d) invalid_port=0x%x\n", __func__, dev->id, port); return -EINVAL; } return 0; @@ -442,8 +441,8 @@ static struct ocrdma_mr *ocrdma_alloc_lkey(struct ib_pd *ibpd, struct ocrdma_dev *dev = pd->dev; if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) { - ocrdma_err("%s(%d) leaving err, invalid access rights\n", - __func__, dev->id); + pr_err("%s(%d) leaving err, invalid access rights\n", + __func__, dev->id); return ERR_PTR(-EINVAL); } @@ -703,8 +702,8 @@ static int ocrdma_copy_cq_uresp(struct ocrdma_cq *cq, struct ib_udata *udata, uresp.phase_change = cq->phase_change ? 1 : 0; status = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (status) { - ocrdma_err("%s(%d) copy error cqid=0x%x.\n", - __func__, cq->dev->id, cq->id); + pr_err("%s(%d) copy error cqid=0x%x.\n", + __func__, cq->dev->id, cq->id); goto err; } uctx = get_ocrdma_ucontext(ib_ctx); @@ -822,57 +821,56 @@ static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev, if (attrs->qp_type != IB_QPT_GSI && attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_UD) { - ocrdma_err("%s(%d) unsupported qp type=0x%x requested\n", - __func__, dev->id, attrs->qp_type); + pr_err("%s(%d) unsupported qp type=0x%x requested\n", + __func__, dev->id, attrs->qp_type); return -EINVAL; } if (attrs->cap.max_send_wr > dev->attr.max_wqe) { - ocrdma_err("%s(%d) unsupported send_wr=0x%x requested\n", - __func__, dev->id, attrs->cap.max_send_wr); - ocrdma_err("%s(%d) supported send_wr=0x%x\n", - __func__, dev->id, dev->attr.max_wqe); + pr_err("%s(%d) unsupported send_wr=0x%x requested\n", + __func__, dev->id, attrs->cap.max_send_wr); + pr_err("%s(%d) supported send_wr=0x%x\n", + __func__, dev->id, dev->attr.max_wqe); return -EINVAL; } if (!attrs->srq && (attrs->cap.max_recv_wr > dev->attr.max_rqe)) { - ocrdma_err("%s(%d) unsupported recv_wr=0x%x requested\n", - __func__, dev->id, attrs->cap.max_recv_wr); - ocrdma_err("%s(%d) supported recv_wr=0x%x\n", - __func__, dev->id, dev->attr.max_rqe); + pr_err("%s(%d) unsupported recv_wr=0x%x requested\n", + __func__, dev->id, attrs->cap.max_recv_wr); + pr_err("%s(%d) supported recv_wr=0x%x\n", + __func__, dev->id, dev->attr.max_rqe); return -EINVAL; } if (attrs->cap.max_inline_data > dev->attr.max_inline_data) { - ocrdma_err("%s(%d) unsupported inline data size=0x%x" - " requested\n", __func__, dev->id, - attrs->cap.max_inline_data); - ocrdma_err("%s(%d) supported inline data size=0x%x\n", - __func__, dev->id, dev->attr.max_inline_data); + pr_err("%s(%d) unsupported inline data size=0x%x requested\n", + __func__, dev->id, attrs->cap.max_inline_data); + pr_err("%s(%d) supported inline data size=0x%x\n", + __func__, dev->id, dev->attr.max_inline_data); return -EINVAL; } if (attrs->cap.max_send_sge > dev->attr.max_send_sge) { - ocrdma_err("%s(%d) unsupported send_sge=0x%x requested\n", - __func__, dev->id, attrs->cap.max_send_sge); - ocrdma_err("%s(%d) supported send_sge=0x%x\n", - __func__, dev->id, dev->attr.max_send_sge); + pr_err("%s(%d) unsupported send_sge=0x%x requested\n", + __func__, dev->id, attrs->cap.max_send_sge); + pr_err("%s(%d) supported send_sge=0x%x\n", + __func__, dev->id, dev->attr.max_send_sge); return -EINVAL; } if (attrs->cap.max_recv_sge > dev->attr.max_recv_sge) { - ocrdma_err("%s(%d) unsupported recv_sge=0x%x requested\n", - __func__, dev->id, attrs->cap.max_recv_sge); - ocrdma_err("%s(%d) supported recv_sge=0x%x\n", - __func__, dev->id, dev->attr.max_recv_sge); + pr_err("%s(%d) unsupported recv_sge=0x%x requested\n", + __func__, dev->id, attrs->cap.max_recv_sge); + pr_err("%s(%d) supported recv_sge=0x%x\n", + __func__, dev->id, dev->attr.max_recv_sge); return -EINVAL; } /* unprivileged user space cannot create special QP */ if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) { - ocrdma_err + pr_err ("%s(%d) Userspace can't create special QPs of type=0x%x\n", __func__, dev->id, attrs->qp_type); return -EINVAL; } /* allow creating only one GSI type of QP */ if (attrs->qp_type == IB_QPT_GSI && dev->gsi_qp_created) { - ocrdma_err("%s(%d) GSI special QPs already created.\n", - __func__, dev->id); + pr_err("%s(%d) GSI special QPs already created.\n", + __func__, dev->id); return -EINVAL; } /* verify consumer QPs are not trying to use GSI QP's CQ */ @@ -881,8 +879,8 @@ static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev, (dev->gsi_sqcq == get_ocrdma_cq(attrs->recv_cq)) || (dev->gsi_rqcq == get_ocrdma_cq(attrs->send_cq)) || (dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) { - ocrdma_err("%s(%d) Consumer QP cannot use GSI CQs.\n", - __func__, dev->id); + pr_err("%s(%d) Consumer QP cannot use GSI CQs.\n", + __func__, dev->id); return -EINVAL; } } @@ -934,7 +932,7 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp, } status = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (status) { - ocrdma_err("%s(%d) user copy error.\n", __func__, dev->id); + pr_err("%s(%d) user copy error.\n", __func__, dev->id); goto err; } status = ocrdma_add_mmap(pd->uctx, uresp.sq_page_addr[0], @@ -1088,7 +1086,7 @@ mbx_err: kfree(qp->wqe_wr_id_tbl); kfree(qp->rqe_wr_id_tbl); kfree(qp); - ocrdma_err("%s(%d) error=%d\n", __func__, dev->id, status); + pr_err("%s(%d) error=%d\n", __func__, dev->id, status); gen_err: return ERR_PTR(status); } @@ -1138,10 +1136,10 @@ int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, spin_unlock_irqrestore(&qp->q_lock, flags); if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask)) { - ocrdma_err("%s(%d) invalid attribute mask=0x%x specified for " - "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n", - __func__, dev->id, attr_mask, qp->id, ibqp->qp_type, - old_qps, new_qps); + pr_err("%s(%d) invalid attribute mask=0x%x specified for\n" + "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n", + __func__, dev->id, attr_mask, qp->id, ibqp->qp_type, + old_qps, new_qps); goto param_err; } @@ -1640,9 +1638,9 @@ static int ocrdma_build_inline_sges(struct ocrdma_qp *qp, { if (wr->send_flags & IB_SEND_INLINE) { if (wr->sg_list[0].length > qp->max_inline_data) { - ocrdma_err("%s() supported_len=0x%x," - " unspported len req=0x%x\n", __func__, - qp->max_inline_data, wr->sg_list[0].length); + pr_err("%s() supported_len=0x%x,\n" + " unspported len req=0x%x\n", __func__, + qp->max_inline_data, wr->sg_list[0].length); return -EINVAL; } memcpy(sge, @@ -2057,8 +2055,8 @@ static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc, break; default: ibwc->status = IB_WC_GENERAL_ERR; - ocrdma_err("%s() invalid opcode received = 0x%x\n", - __func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK); + pr_err("%s() invalid opcode received = 0x%x\n", + __func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK); break; }; } -- cgit v1.2.3 From f6ddcf71070d01a7bb34818dd3aaf4bdac5386fa Mon Sep 17 00:00:00 2001 From: Naresh Gottumukkala Date: Mon, 10 Jun 2013 04:42:40 +0000 Subject: RDMA/ocrdma: Set bad_wr in error case Fix post_send to set the bad_wr in error case. Signed-off-by: Naresh Gottumukkala Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 882a8198d820..06215301a675 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -1734,12 +1734,14 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, spin_lock_irqsave(&qp->q_lock, flags); if (qp->state != OCRDMA_QPS_RTS && qp->state != OCRDMA_QPS_SQD) { spin_unlock_irqrestore(&qp->q_lock, flags); + *bad_wr = wr; return -EINVAL; } while (wr) { if (ocrdma_hwq_free_cnt(&qp->sq) == 0 || wr->num_sge > qp->sq.max_sges) { + *bad_wr = wr; status = -ENOMEM; break; } -- cgit v1.2.3 From df176ea0743fd0fb0514c862797f6bd8c08ab42e Mon Sep 17 00:00:00 2001 From: Naresh Gottumukkala Date: Mon, 10 Jun 2013 04:42:41 +0000 Subject: RDMA/ocrdma: Change macros to inline funtions Signed-off-by: Naresh Gottumukkala Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma.h | 43 ++++++++++++++++++++++++++--- drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 15 ---------- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 2 +- 3 files changed, 40 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 9d82d097e25a..7aa7f0f15f8e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -290,10 +290,6 @@ struct ocrdma_qp { u8 *ird_q_va; }; -#define OCRDMA_GET_NUM_POSTED_SHIFT_VAL(qp) \ - (((qp->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) && \ - (qp->id < 64)) ? 24 : 16) - struct ocrdma_hw_mr { struct ocrdma_dev *dev; u32 lkey; @@ -384,4 +380,43 @@ static inline struct ocrdma_srq *get_ocrdma_srq(struct ib_srq *ibsrq) return container_of(ibsrq, struct ocrdma_srq, ibsrq); } + +static inline int ocrdma_get_num_posted_shift(struct ocrdma_qp *qp) +{ + return ((qp->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY && + qp->id < 64) ? 24 : 16); +} + +static inline int is_cqe_valid(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe) +{ + int cqe_valid; + cqe_valid = le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_VALID; + return ((cqe_valid == cq->phase) ? 1 : 0); +} + +static inline int is_cqe_for_sq(struct ocrdma_cqe *cqe) +{ + return (le32_to_cpu(cqe->flags_status_srcqpn) & + OCRDMA_CQE_QTYPE) ? 0 : 1; +} + +static inline int is_cqe_invalidated(struct ocrdma_cqe *cqe) +{ + return (le32_to_cpu(cqe->flags_status_srcqpn) & + OCRDMA_CQE_INVALIDATE) ? 1 : 0; +} + +static inline int is_cqe_imm(struct ocrdma_cqe *cqe) +{ + return (le32_to_cpu(cqe->flags_status_srcqpn) & + OCRDMA_CQE_IMM) ? 1 : 0; +} + +static inline int is_cqe_wr_imm(struct ocrdma_cqe *cqe) +{ + return (le32_to_cpu(cqe->flags_status_srcqpn) & + OCRDMA_CQE_WRITE_IMM) ? 1 : 0; +} + + #endif diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index cd0512f1fb5b..36b062da2aea 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -1534,21 +1534,6 @@ struct ocrdma_cqe { u32 flags_status_srcqpn; /* w3 */ } __packed; -#define is_cqe_valid(cq, cqe) \ - (((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_VALID)\ - == cq->phase) ? 1 : 0) -#define is_cqe_for_sq(cqe) \ - ((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_QTYPE) ? 0 : 1) -#define is_cqe_for_rq(cqe) \ - ((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_QTYPE) ? 1 : 0) -#define is_cqe_invalidated(cqe) \ - ((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_INVALIDATE) ? \ - 1 : 0) -#define is_cqe_imm(cqe) \ - ((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_IMM) ? 1 : 0) -#define is_cqe_wr_imm(cqe) \ - ((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_WRITE_IMM) ? 1 : 0) - struct ocrdma_sge { u32 addr_hi; u32 addr_lo; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 06215301a675..dcfbab177faa 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -1819,7 +1819,7 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, static void ocrdma_ring_rq_db(struct ocrdma_qp *qp) { - u32 val = qp->rq.dbid | (1 << OCRDMA_GET_NUM_POSTED_SHIFT_VAL(qp)); + u32 val = qp->rq.dbid | (1 << ocrdma_get_num_posted_shift(qp)); iowrite32(val, qp->rq_db); } -- cgit v1.2.3 From 9884bcdca30ae9f29a0d6af4a4577826b00c5d94 Mon Sep 17 00:00:00 2001 From: Naresh Gottumukkala Date: Mon, 10 Jun 2013 04:42:42 +0000 Subject: RDMA/ocrdma: Reorg structures to avoid padding Reorg structures to better packing to avoid cacheline padding. Signed-off-by: Naresh Gottumukkala Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 7aa7f0f15f8e..d540180a8e42 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -236,15 +236,16 @@ struct ocrdma_srq { struct ib_srq ibsrq; struct ocrdma_dev *dev; u8 __iomem *db; + struct ocrdma_qp_hwq_info rq; + u64 *rqe_wr_id_tbl; + u32 *idx_bit_fields; + u32 bit_fields_len; + /* provide synchronization to multiple context(s) posting rqe */ spinlock_t q_lock ____cacheline_aligned; - struct ocrdma_qp_hwq_info rq; struct ocrdma_pd *pd; u32 id; - u64 *rqe_wr_id_tbl; - u32 *idx_bit_fields; - u32 bit_fields_len; }; struct ocrdma_qp { @@ -252,8 +253,6 @@ struct ocrdma_qp { struct ocrdma_dev *dev; u8 __iomem *sq_db; - /* provide synchronization to multiple context(s) posting wqe, rqe */ - spinlock_t q_lock ____cacheline_aligned; struct ocrdma_qp_hwq_info sq; struct { uint64_t wrid; @@ -263,6 +262,9 @@ struct ocrdma_qp { uint8_t rsvd[3]; } *wqe_wr_id_tbl; u32 max_inline_data; + + /* provide synchronization to multiple context(s) posting wqe, rqe */ + spinlock_t q_lock ____cacheline_aligned; struct ocrdma_cq *sq_cq; /* list maintained per CQ to flush SQ errors */ struct list_head sq_entry; -- cgit v1.2.3 From 8469ba39a6b77917e8879680aed17229bf72f263 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 30 May 2013 18:25:25 -0400 Subject: IB/qib: Add DCA support This patch adds DCA cache warming for systems that support DCA. The code uses cpu affinity notification to react to an affinity change from a user mode program like irqbalance and (re-)program the chip accordingly. This notification avoids reading the current cpu on every interrupt. Reviewed-by: Dean Luick Signed-off-by: Mike Marciniszyn [ Add Kconfig dependency on SMP && GENERIC_HARDIRQS to avoid failure to build due to undefined struct irq_affinity_notify. - Roland ] Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/Kconfig | 8 + drivers/infiniband/hw/qib/qib.h | 13 ++ drivers/infiniband/hw/qib/qib_iba6120.c | 10 + drivers/infiniband/hw/qib/qib_iba7220.c | 10 + drivers/infiniband/hw/qib/qib_iba7322.c | 334 ++++++++++++++++++++++++++++++-- drivers/infiniband/hw/qib/qib_init.c | 41 ++++ 6 files changed, 404 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/qib/Kconfig b/drivers/infiniband/hw/qib/Kconfig index 1e603a375069..d03ca4c1ff25 100644 --- a/drivers/infiniband/hw/qib/Kconfig +++ b/drivers/infiniband/hw/qib/Kconfig @@ -5,3 +5,11 @@ config INFINIBAND_QIB This is a low-level driver for Intel PCIe QLE InfiniBand host channel adapters. This driver does not support the Intel HyperTransport card (model QHT7140). + +config INFINIBAND_QIB_DCA + bool "QIB DCA support" + depends on INFINIBAND_QIB && DCA && SMP && GENERIC_HARDIRQS && !(INFINIBAND_QIB=y && DCA=m) + default y + ---help--- + Setting this enables DCA support on some Intel chip sets + with the iba7322 HCA. diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 4d11575c2010..cecbd43f9212 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -428,9 +428,19 @@ struct qib_verbs_txreq { #define ACTIVITY_TIMER 5 #define MAX_NAME_SIZE 64 + +#ifdef CONFIG_INFINIBAND_QIB_DCA +struct qib_irq_notify; +#endif + struct qib_msix_entry { struct msix_entry msix; void *arg; +#ifdef CONFIG_INFINIBAND_QIB_DCA + int dca; + int rcv; + struct qib_irq_notify *notifier; +#endif char name[MAX_NAME_SIZE]; cpumask_var_t mask; }; @@ -828,6 +838,9 @@ struct qib_devdata { struct qib_ctxtdata *); void (*f_writescratch)(struct qib_devdata *, u32); int (*f_tempsense_rd)(struct qib_devdata *, int regnum); +#ifdef CONFIG_INFINIBAND_QIB_DCA + int (*f_notify_dca)(struct qib_devdata *, unsigned long event); +#endif char *boardname; /* human readable board info */ diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 0232ae56b1fa..84e593d6007b 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -3464,6 +3464,13 @@ static int qib_6120_tempsense_rd(struct qib_devdata *dd, int regnum) return -ENXIO; } +#ifdef CONFIG_INFINIBAND_QIB_DCA +static int qib_6120_notify_dca(struct qib_devdata *dd, unsigned long event) +{ + return 0; +} +#endif + /* Dummy function, as 6120 boards never disable EEPROM Write */ static int qib_6120_eeprom_wen(struct qib_devdata *dd, int wen) { @@ -3539,6 +3546,9 @@ struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *pdev, dd->f_xgxs_reset = qib_6120_xgxs_reset; dd->f_writescratch = writescratch; dd->f_tempsense_rd = qib_6120_tempsense_rd; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dd->f_notify_dca = qib_6120_notify_dca; +#endif /* * Do remaining pcie setup and save pcie values in dd. * Any error printing is already done by the init code. diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 64d0ecb90cdc..454c2e7668fe 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -4513,6 +4513,13 @@ bail: return ret; } +#ifdef CONFIG_INFINIBAND_QIB_DCA +static int qib_7220_notify_dca(struct qib_devdata *dd, unsigned long event) +{ + return 0; +} +#endif + /* Dummy function, as 7220 boards never disable EEPROM Write */ static int qib_7220_eeprom_wen(struct qib_devdata *dd, int wen) { @@ -4587,6 +4594,9 @@ struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *pdev, dd->f_xgxs_reset = qib_7220_xgxs_reset; dd->f_writescratch = writescratch; dd->f_tempsense_rd = qib_7220_tempsense_rd; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dd->f_notify_dca = qib_7220_notify_dca; +#endif /* * Do remaining pcie setup and save pcie values in dd. * Any error printing is already done by the init code. diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 3f6b21e9dc11..46ffea033be0 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -44,6 +44,9 @@ #include #include #include +#ifdef CONFIG_INFINIBAND_QIB_DCA +#include +#endif #include "qib.h" #include "qib_7322_regs.h" @@ -519,6 +522,14 @@ static const u8 qib_7322_physportstate[0x20] = { [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN }; +#ifdef CONFIG_INFINIBAND_QIB_DCA +struct qib_irq_notify { + int rcv; + void *arg; + struct irq_affinity_notify notify; +}; +#endif + struct qib_chip_specific { u64 __iomem *cregbase; u64 *cntrs; @@ -546,6 +557,12 @@ struct qib_chip_specific { u32 lastbuf_for_pio; u32 stay_in_freeze; u32 recovery_ports_initted; +#ifdef CONFIG_INFINIBAND_QIB_DCA + u32 dca_ctrl; + int rhdr_cpu[18]; + int sdma_cpu[2]; + u64 dca_rcvhdr_ctrl[5]; /* B, C, D, E, F */ +#endif struct qib_msix_entry *msix_entries; unsigned long *sendchkenable; unsigned long *sendgrhchk; @@ -642,28 +659,76 @@ static struct { irq_handler_t handler; int lsb; int port; /* 0 if not port-specific, else port # */ + int dca; } irq_table[] = { - { "", qib_7322intr, -1, 0 }, + { "", qib_7322intr, -1, 0, 0 }, { " (buf avail)", qib_7322bufavail, - SYM_LSB(IntStatus, SendBufAvail), 0 }, + SYM_LSB(IntStatus, SendBufAvail), 0, 0}, { " (sdma 0)", sdma_intr, - SYM_LSB(IntStatus, SDmaInt_0), 1 }, + SYM_LSB(IntStatus, SDmaInt_0), 1, 1 }, { " (sdma 1)", sdma_intr, - SYM_LSB(IntStatus, SDmaInt_1), 2 }, + SYM_LSB(IntStatus, SDmaInt_1), 2, 1 }, { " (sdmaI 0)", sdma_idle_intr, - SYM_LSB(IntStatus, SDmaIdleInt_0), 1 }, + SYM_LSB(IntStatus, SDmaIdleInt_0), 1, 1}, { " (sdmaI 1)", sdma_idle_intr, - SYM_LSB(IntStatus, SDmaIdleInt_1), 2 }, + SYM_LSB(IntStatus, SDmaIdleInt_1), 2, 1}, { " (sdmaP 0)", sdma_progress_intr, - SYM_LSB(IntStatus, SDmaProgressInt_0), 1 }, + SYM_LSB(IntStatus, SDmaProgressInt_0), 1, 1 }, { " (sdmaP 1)", sdma_progress_intr, - SYM_LSB(IntStatus, SDmaProgressInt_1), 2 }, + SYM_LSB(IntStatus, SDmaProgressInt_1), 2, 1 }, { " (sdmaC 0)", sdma_cleanup_intr, - SYM_LSB(IntStatus, SDmaCleanupDone_0), 1 }, + SYM_LSB(IntStatus, SDmaCleanupDone_0), 1, 0 }, { " (sdmaC 1)", sdma_cleanup_intr, - SYM_LSB(IntStatus, SDmaCleanupDone_1), 2 }, + SYM_LSB(IntStatus, SDmaCleanupDone_1), 2 , 0}, }; +#ifdef CONFIG_INFINIBAND_QIB_DCA + +static const struct dca_reg_map { + int shadow_inx; + int lsb; + u64 mask; + u16 regno; +} dca_rcvhdr_reg_map[] = { + { 0, SYM_LSB(DCACtrlB, RcvHdrq0DCAOPH), + ~SYM_MASK(DCACtrlB, RcvHdrq0DCAOPH) , KREG_IDX(DCACtrlB) }, + { 0, SYM_LSB(DCACtrlB, RcvHdrq1DCAOPH), + ~SYM_MASK(DCACtrlB, RcvHdrq1DCAOPH) , KREG_IDX(DCACtrlB) }, + { 0, SYM_LSB(DCACtrlB, RcvHdrq2DCAOPH), + ~SYM_MASK(DCACtrlB, RcvHdrq2DCAOPH) , KREG_IDX(DCACtrlB) }, + { 0, SYM_LSB(DCACtrlB, RcvHdrq3DCAOPH), + ~SYM_MASK(DCACtrlB, RcvHdrq3DCAOPH) , KREG_IDX(DCACtrlB) }, + { 1, SYM_LSB(DCACtrlC, RcvHdrq4DCAOPH), + ~SYM_MASK(DCACtrlC, RcvHdrq4DCAOPH) , KREG_IDX(DCACtrlC) }, + { 1, SYM_LSB(DCACtrlC, RcvHdrq5DCAOPH), + ~SYM_MASK(DCACtrlC, RcvHdrq5DCAOPH) , KREG_IDX(DCACtrlC) }, + { 1, SYM_LSB(DCACtrlC, RcvHdrq6DCAOPH), + ~SYM_MASK(DCACtrlC, RcvHdrq6DCAOPH) , KREG_IDX(DCACtrlC) }, + { 1, SYM_LSB(DCACtrlC, RcvHdrq7DCAOPH), + ~SYM_MASK(DCACtrlC, RcvHdrq7DCAOPH) , KREG_IDX(DCACtrlC) }, + { 2, SYM_LSB(DCACtrlD, RcvHdrq8DCAOPH), + ~SYM_MASK(DCACtrlD, RcvHdrq8DCAOPH) , KREG_IDX(DCACtrlD) }, + { 2, SYM_LSB(DCACtrlD, RcvHdrq9DCAOPH), + ~SYM_MASK(DCACtrlD, RcvHdrq9DCAOPH) , KREG_IDX(DCACtrlD) }, + { 2, SYM_LSB(DCACtrlD, RcvHdrq10DCAOPH), + ~SYM_MASK(DCACtrlD, RcvHdrq10DCAOPH) , KREG_IDX(DCACtrlD) }, + { 2, SYM_LSB(DCACtrlD, RcvHdrq11DCAOPH), + ~SYM_MASK(DCACtrlD, RcvHdrq11DCAOPH) , KREG_IDX(DCACtrlD) }, + { 3, SYM_LSB(DCACtrlE, RcvHdrq12DCAOPH), + ~SYM_MASK(DCACtrlE, RcvHdrq12DCAOPH) , KREG_IDX(DCACtrlE) }, + { 3, SYM_LSB(DCACtrlE, RcvHdrq13DCAOPH), + ~SYM_MASK(DCACtrlE, RcvHdrq13DCAOPH) , KREG_IDX(DCACtrlE) }, + { 3, SYM_LSB(DCACtrlE, RcvHdrq14DCAOPH), + ~SYM_MASK(DCACtrlE, RcvHdrq14DCAOPH) , KREG_IDX(DCACtrlE) }, + { 3, SYM_LSB(DCACtrlE, RcvHdrq15DCAOPH), + ~SYM_MASK(DCACtrlE, RcvHdrq15DCAOPH) , KREG_IDX(DCACtrlE) }, + { 4, SYM_LSB(DCACtrlF, RcvHdrq16DCAOPH), + ~SYM_MASK(DCACtrlF, RcvHdrq16DCAOPH) , KREG_IDX(DCACtrlF) }, + { 4, SYM_LSB(DCACtrlF, RcvHdrq17DCAOPH), + ~SYM_MASK(DCACtrlF, RcvHdrq17DCAOPH) , KREG_IDX(DCACtrlF) }, +}; +#endif + /* ibcctrl bits */ #define QLOGIC_IB_IBCC_LINKINITCMD_DISABLE 1 /* cycle through TS1/TS2 till OK */ @@ -686,6 +751,13 @@ static void write_7322_init_portregs(struct qib_pportdata *); static void setup_7322_link_recovery(struct qib_pportdata *, u32); static void check_7322_rxe_status(struct qib_pportdata *); static u32 __iomem *qib_7322_getsendbuf(struct qib_pportdata *, u64, u32 *); +#ifdef CONFIG_INFINIBAND_QIB_DCA +static void qib_setup_dca(struct qib_devdata *dd); +static void setup_dca_notifier(struct qib_devdata *dd, + struct qib_msix_entry *m); +static void reset_dca_notifier(struct qib_devdata *dd, + struct qib_msix_entry *m); +#endif /** * qib_read_ureg32 - read 32-bit virtualized per-context register @@ -2558,6 +2630,162 @@ static void qib_setup_7322_setextled(struct qib_pportdata *ppd, u32 on) qib_write_kreg_port(ppd, krp_rcvpktledcnt, ledblink); } +#ifdef CONFIG_INFINIBAND_QIB_DCA + +static int qib_7322_notify_dca(struct qib_devdata *dd, unsigned long event) +{ + switch (event) { + case DCA_PROVIDER_ADD: + if (dd->flags & QIB_DCA_ENABLED) + break; + if (!dca_add_requester(&dd->pcidev->dev)) { + qib_devinfo(dd->pcidev, "DCA enabled\n"); + dd->flags |= QIB_DCA_ENABLED; + qib_setup_dca(dd); + } + break; + case DCA_PROVIDER_REMOVE: + if (dd->flags & QIB_DCA_ENABLED) { + dca_remove_requester(&dd->pcidev->dev); + dd->flags &= ~QIB_DCA_ENABLED; + dd->cspec->dca_ctrl = 0; + qib_write_kreg(dd, KREG_IDX(DCACtrlA), + dd->cspec->dca_ctrl); + } + break; + } + return 0; +} + +static void qib_update_rhdrq_dca(struct qib_ctxtdata *rcd, int cpu) +{ + struct qib_devdata *dd = rcd->dd; + struct qib_chip_specific *cspec = dd->cspec; + + if (!(dd->flags & QIB_DCA_ENABLED)) + return; + if (cspec->rhdr_cpu[rcd->ctxt] != cpu) { + const struct dca_reg_map *rmp; + + cspec->rhdr_cpu[rcd->ctxt] = cpu; + rmp = &dca_rcvhdr_reg_map[rcd->ctxt]; + cspec->dca_rcvhdr_ctrl[rmp->shadow_inx] &= rmp->mask; + cspec->dca_rcvhdr_ctrl[rmp->shadow_inx] |= + (u64) dca3_get_tag(&dd->pcidev->dev, cpu) << rmp->lsb; + qib_devinfo(dd->pcidev, + "Ctxt %d cpu %d dca %llx\n", rcd->ctxt, cpu, + (long long) cspec->dca_rcvhdr_ctrl[rmp->shadow_inx]); + qib_write_kreg(dd, rmp->regno, + cspec->dca_rcvhdr_ctrl[rmp->shadow_inx]); + cspec->dca_ctrl |= SYM_MASK(DCACtrlA, RcvHdrqDCAEnable); + qib_write_kreg(dd, KREG_IDX(DCACtrlA), cspec->dca_ctrl); + } +} + +static void qib_update_sdma_dca(struct qib_pportdata *ppd, int cpu) +{ + struct qib_devdata *dd = ppd->dd; + struct qib_chip_specific *cspec = dd->cspec; + unsigned pidx = ppd->port - 1; + + if (!(dd->flags & QIB_DCA_ENABLED)) + return; + if (cspec->sdma_cpu[pidx] != cpu) { + cspec->sdma_cpu[pidx] = cpu; + cspec->dca_rcvhdr_ctrl[4] &= ~(ppd->hw_pidx ? + SYM_MASK(DCACtrlF, SendDma1DCAOPH) : + SYM_MASK(DCACtrlF, SendDma0DCAOPH)); + cspec->dca_rcvhdr_ctrl[4] |= + (u64) dca3_get_tag(&dd->pcidev->dev, cpu) << + (ppd->hw_pidx ? + SYM_LSB(DCACtrlF, SendDma1DCAOPH) : + SYM_LSB(DCACtrlF, SendDma0DCAOPH)); + qib_devinfo(dd->pcidev, + "sdma %d cpu %d dca %llx\n", ppd->hw_pidx, cpu, + (long long) cspec->dca_rcvhdr_ctrl[4]); + qib_write_kreg(dd, KREG_IDX(DCACtrlF), + cspec->dca_rcvhdr_ctrl[4]); + cspec->dca_ctrl |= ppd->hw_pidx ? + SYM_MASK(DCACtrlA, SendDMAHead1DCAEnable) : + SYM_MASK(DCACtrlA, SendDMAHead0DCAEnable); + qib_write_kreg(dd, KREG_IDX(DCACtrlA), cspec->dca_ctrl); + } +} + +static void qib_setup_dca(struct qib_devdata *dd) +{ + struct qib_chip_specific *cspec = dd->cspec; + int i; + + for (i = 0; i < ARRAY_SIZE(cspec->rhdr_cpu); i++) + cspec->rhdr_cpu[i] = -1; + for (i = 0; i < ARRAY_SIZE(cspec->sdma_cpu); i++) + cspec->sdma_cpu[i] = -1; + cspec->dca_rcvhdr_ctrl[0] = + (1ULL << SYM_LSB(DCACtrlB, RcvHdrq0DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlB, RcvHdrq1DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlB, RcvHdrq2DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlB, RcvHdrq3DCAXfrCnt)); + cspec->dca_rcvhdr_ctrl[1] = + (1ULL << SYM_LSB(DCACtrlC, RcvHdrq4DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlC, RcvHdrq5DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlC, RcvHdrq6DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlC, RcvHdrq7DCAXfrCnt)); + cspec->dca_rcvhdr_ctrl[2] = + (1ULL << SYM_LSB(DCACtrlD, RcvHdrq8DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlD, RcvHdrq9DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlD, RcvHdrq10DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlD, RcvHdrq11DCAXfrCnt)); + cspec->dca_rcvhdr_ctrl[3] = + (1ULL << SYM_LSB(DCACtrlE, RcvHdrq12DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlE, RcvHdrq13DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlE, RcvHdrq14DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlE, RcvHdrq15DCAXfrCnt)); + cspec->dca_rcvhdr_ctrl[4] = + (1ULL << SYM_LSB(DCACtrlF, RcvHdrq16DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlF, RcvHdrq17DCAXfrCnt)); + for (i = 0; i < ARRAY_SIZE(cspec->sdma_cpu); i++) + qib_write_kreg(dd, KREG_IDX(DCACtrlB) + i, + cspec->dca_rcvhdr_ctrl[i]); + for (i = 0; i < cspec->num_msix_entries; i++) + setup_dca_notifier(dd, &cspec->msix_entries[i]); +} + +static void qib_irq_notifier_notify(struct irq_affinity_notify *notify, + const cpumask_t *mask) +{ + struct qib_irq_notify *n = + container_of(notify, struct qib_irq_notify, notify); + int cpu = cpumask_first(mask); + + if (n->rcv) { + struct qib_ctxtdata *rcd = (struct qib_ctxtdata *)n->arg; + qib_update_rhdrq_dca(rcd, cpu); + } else { + struct qib_pportdata *ppd = (struct qib_pportdata *)n->arg; + qib_update_sdma_dca(ppd, cpu); + } +} + +static void qib_irq_notifier_release(struct kref *ref) +{ + struct qib_irq_notify *n = + container_of(ref, struct qib_irq_notify, notify.kref); + struct qib_devdata *dd; + + if (n->rcv) { + struct qib_ctxtdata *rcd = (struct qib_ctxtdata *)n->arg; + dd = rcd->dd; + } else { + struct qib_pportdata *ppd = (struct qib_pportdata *)n->arg; + dd = ppd->dd; + } + qib_devinfo(dd->pcidev, + "release on HCA notify 0x%p n 0x%p\n", ref, n); + kfree(n); +} +#endif + /* * Disable MSIx interrupt if enabled, call generic MSIx code * to cleanup, and clear pending MSIx interrupts. @@ -2575,6 +2803,9 @@ static void qib_7322_nomsix(struct qib_devdata *dd) dd->cspec->num_msix_entries = 0; for (i = 0; i < n; i++) { +#ifdef CONFIG_INFINIBAND_QIB_DCA + reset_dca_notifier(dd, &dd->cspec->msix_entries[i]); +#endif irq_set_affinity_hint( dd->cspec->msix_entries[i].msix.vector, NULL); free_cpumask_var(dd->cspec->msix_entries[i].mask); @@ -2602,6 +2833,15 @@ static void qib_setup_7322_cleanup(struct qib_devdata *dd) { int i; +#ifdef CONFIG_INFINIBAND_QIB_DCA + if (dd->flags & QIB_DCA_ENABLED) { + dca_remove_requester(&dd->pcidev->dev); + dd->flags &= ~QIB_DCA_ENABLED; + dd->cspec->dca_ctrl = 0; + qib_write_kreg(dd, KREG_IDX(DCACtrlA), dd->cspec->dca_ctrl); + } +#endif + qib_7322_free_irq(dd); kfree(dd->cspec->cntrs); kfree(dd->cspec->sendchkenable); @@ -3068,6 +3308,53 @@ static irqreturn_t sdma_cleanup_intr(int irq, void *data) return IRQ_HANDLED; } +#ifdef CONFIG_INFINIBAND_QIB_DCA + +static void reset_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m) +{ + if (!m->dca) + return; + qib_devinfo(dd->pcidev, + "Disabling notifier on HCA %d irq %d\n", + dd->unit, + m->msix.vector); + irq_set_affinity_notifier( + m->msix.vector, + NULL); + m->notifier = NULL; +} + +static void setup_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m) +{ + struct qib_irq_notify *n; + + if (!m->dca) + return; + n = kzalloc(sizeof(*n), GFP_KERNEL); + if (n) { + int ret; + + m->notifier = n; + n->notify.irq = m->msix.vector; + n->notify.notify = qib_irq_notifier_notify; + n->notify.release = qib_irq_notifier_release; + n->arg = m->arg; + n->rcv = m->rcv; + qib_devinfo(dd->pcidev, + "set notifier irq %d rcv %d notify %p\n", + n->notify.irq, n->rcv, &n->notify); + ret = irq_set_affinity_notifier( + n->notify.irq, + &n->notify); + if (ret) { + m->notifier = NULL; + kfree(n); + } + } +} + +#endif + /* * Set up our chip-specific interrupt handler. * The interrupt type has already been setup, so @@ -3149,6 +3436,9 @@ try_intx: void *arg; u64 val; int lsb, reg, sh; +#ifdef CONFIG_INFINIBAND_QIB_DCA + int dca = 0; +#endif dd->cspec->msix_entries[msixnum]. name[sizeof(dd->cspec->msix_entries[msixnum].name) - 1] @@ -3161,6 +3451,9 @@ try_intx: arg = dd->pport + irq_table[i].port - 1; } else arg = dd; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca = irq_table[i].dca; +#endif lsb = irq_table[i].lsb; handler = irq_table[i].handler; snprintf(dd->cspec->msix_entries[msixnum].name, @@ -3178,6 +3471,9 @@ try_intx: continue; if (qib_krcvq01_no_msi && ctxt < 2) continue; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca = 1; +#endif lsb = QIB_I_RCVAVAIL_LSB + ctxt; handler = qib_7322pintr; snprintf(dd->cspec->msix_entries[msixnum].name, @@ -3203,6 +3499,11 @@ try_intx: goto try_intx; } dd->cspec->msix_entries[msixnum].arg = arg; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dd->cspec->msix_entries[msixnum].dca = dca; + dd->cspec->msix_entries[msixnum].rcv = + handler == qib_7322pintr; +#endif if (lsb >= 0) { reg = lsb / IBA7322_REDIRECT_VEC_PER_REG; sh = (lsb % IBA7322_REDIRECT_VEC_PER_REG) * @@ -6885,6 +7186,9 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev, dd->f_sdma_init_early = qib_7322_sdma_init_early; dd->f_writescratch = writescratch; dd->f_tempsense_rd = qib_7322_tempsense_rd; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dd->f_notify_dca = qib_7322_notify_dca; +#endif /* * Do remaining PCIe setup and save PCIe values in dd. * Any error printing is already done by the init code. @@ -6921,7 +7225,7 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev, actual_cnt -= dd->num_pports; tabsize = actual_cnt; - dd->cspec->msix_entries = kmalloc(tabsize * + dd->cspec->msix_entries = kzalloc(tabsize * sizeof(struct qib_msix_entry), GFP_KERNEL); if (!dd->cspec->msix_entries) { qib_dev_err(dd, "No memory for MSIx table\n"); @@ -6941,7 +7245,13 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev, /* clear diagctrl register, in case diags were running and crashed */ qib_write_kreg(dd, kr_hwdiagctrl, 0); - +#ifdef CONFIG_INFINIBAND_QIB_DCA + if (!dca_add_requester(&pdev->dev)) { + qib_devinfo(dd->pcidev, "DCA enabled\n"); + dd->flags |= QIB_DCA_ENABLED; + qib_setup_dca(dd); + } +#endif goto bail; bail_cleanup: diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 173f805790da..4b64c885fa0d 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -39,6 +39,9 @@ #include #include #include +#ifdef CONFIG_INFINIBAND_QIB_DCA +#include +#endif #include "qib.h" #include "qib_common.h" @@ -1158,6 +1161,35 @@ struct pci_driver qib_driver = { .err_handler = &qib_pci_err_handler, }; +#ifdef CONFIG_INFINIBAND_QIB_DCA + +static int qib_notify_dca(struct notifier_block *, unsigned long, void *); +static struct notifier_block dca_notifier = { + .notifier_call = qib_notify_dca, + .next = NULL, + .priority = 0 +}; + +static int qib_notify_dca_device(struct device *device, void *data) +{ + struct qib_devdata *dd = dev_get_drvdata(device); + unsigned long event = *(unsigned long *)data; + + return dd->f_notify_dca(dd, event); +} + +static int qib_notify_dca(struct notifier_block *nb, unsigned long event, + void *p) +{ + int rval; + + rval = driver_for_each_device(&qib_driver.driver, NULL, + &event, qib_notify_dca_device); + return rval ? NOTIFY_BAD : NOTIFY_DONE; +} + +#endif + /* * Do all the generic driver unit- and chip-independent memory * allocation and initialization. @@ -1182,6 +1214,9 @@ static int __init qlogic_ib_init(void) */ idr_init(&qib_unit_table); +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca_register_notify(&dca_notifier); +#endif ret = pci_register_driver(&qib_driver); if (ret < 0) { pr_err("Unable to register driver: error %d\n", -ret); @@ -1194,6 +1229,9 @@ static int __init qlogic_ib_init(void) goto bail; /* all OK */ bail_unit: +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca_unregister_notify(&dca_notifier); +#endif idr_destroy(&qib_unit_table); destroy_workqueue(qib_cq_wq); bail_dev: @@ -1217,6 +1255,9 @@ static void __exit qlogic_ib_cleanup(void) "Unable to cleanup counter filesystem: error %d\n", -ret); +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca_unregister_notify(&dca_notifier); +#endif pci_unregister_driver(&qib_driver); destroy_workqueue(qib_cq_wq); -- cgit v1.2.3 From f7cf9a618b48212394c07b169864d20beb23b8e5 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sat, 15 Jun 2013 17:06:58 -0400 Subject: IB/qib: Remove atomic_inc_not_zero() from QP RCU Follow Documentation/RCU/rcuref.txt guidance in removing atomic_inc_not_zero() from QP RCU implementation. This patch also removes an unneeded synchronize_rcu() in the add path. Reviewed-by: Dean Luick Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_qp.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index a6a2cc2ba260..c1f573a331c7 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. * All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * @@ -222,8 +222,8 @@ static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp) unsigned long flags; unsigned n = qpn_hash(dev, qp->ibqp.qp_num); - spin_lock_irqsave(&dev->qpt_lock, flags); atomic_inc(&qp->refcount); + spin_lock_irqsave(&dev->qpt_lock, flags); if (qp->ibqp.qp_num == 0) rcu_assign_pointer(ibp->qp0, qp); @@ -235,7 +235,6 @@ static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp) } spin_unlock_irqrestore(&dev->qpt_lock, flags); - synchronize_rcu(); } /* @@ -247,36 +246,39 @@ static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp) struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); unsigned n = qpn_hash(dev, qp->ibqp.qp_num); unsigned long flags; + int removed = 1; spin_lock_irqsave(&dev->qpt_lock, flags); if (rcu_dereference_protected(ibp->qp0, lockdep_is_held(&dev->qpt_lock)) == qp) { - atomic_dec(&qp->refcount); rcu_assign_pointer(ibp->qp0, NULL); } else if (rcu_dereference_protected(ibp->qp1, lockdep_is_held(&dev->qpt_lock)) == qp) { - atomic_dec(&qp->refcount); rcu_assign_pointer(ibp->qp1, NULL); } else { struct qib_qp *q; struct qib_qp __rcu **qpp; + removed = 0; qpp = &dev->qp_table[n]; for (; (q = rcu_dereference_protected(*qpp, lockdep_is_held(&dev->qpt_lock))) != NULL; qpp = &q->next) if (q == qp) { - atomic_dec(&qp->refcount); rcu_assign_pointer(*qpp, rcu_dereference_protected(qp->next, lockdep_is_held(&dev->qpt_lock))); + removed = 1; break; } } spin_unlock_irqrestore(&dev->qpt_lock, flags); - synchronize_rcu(); + if (removed) { + synchronize_rcu(); + atomic_dec(&qp->refcount); + } } /** @@ -334,26 +336,25 @@ struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn) { struct qib_qp *qp = NULL; + rcu_read_lock(); if (unlikely(qpn <= 1)) { - rcu_read_lock(); if (qpn == 0) qp = rcu_dereference(ibp->qp0); else qp = rcu_dereference(ibp->qp1); + if (qp) + atomic_inc(&qp->refcount); } else { struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev; unsigned n = qpn_hash(dev, qpn); - rcu_read_lock(); for (qp = rcu_dereference(dev->qp_table[n]); qp; qp = rcu_dereference(qp->next)) - if (qp->ibqp.qp_num == qpn) + if (qp->ibqp.qp_num == qpn) { + atomic_inc(&qp->refcount); break; + } } - if (qp) - if (unlikely(!atomic_inc_not_zero(&qp->refcount))) - qp = NULL; - rcu_read_unlock(); return qp; } -- cgit v1.2.3 From ab4a13d69bf01b098906c60e7598d10752401a56 Mon Sep 17 00:00:00 2001 From: Vinit Agnihotri Date: Sat, 15 Jun 2013 17:11:38 -0400 Subject: IB/qib: Update minor version number External PSM repositories have advanced the minor number for a variety of reasons. The driver needs to increase to avoid warnings. Signed-off-by: Vinit Agnihotri Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h index d39e0183ff82..4f255b723ffd 100644 --- a/drivers/infiniband/hw/qib/qib_common.h +++ b/drivers/infiniband/hw/qib/qib_common.h @@ -279,7 +279,7 @@ struct qib_base_info { * may not be implemented; the user code must deal with this if it * cares, or it must abort after initialization reports the difference. */ -#define QIB_USER_SWMINOR 11 +#define QIB_USER_SWMINOR 12 #define QIB_USER_SWVERSION ((QIB_USER_SWMAJOR << 16) | QIB_USER_SWMINOR) -- cgit v1.2.3 From e0f30baca1ebe5547f6760f760b8c4e189fc1203 Mon Sep 17 00:00:00 2001 From: Ramkrishna Vepa Date: Tue, 28 May 2013 12:57:33 -0400 Subject: IB/qib: Add optional NUMA affinity This patch adds context relative numa affinity conditioned on the module parameter numa_aware. The qib_ctxtdata has an additional node_id member and qib_create_ctxtdata() has an addition node_id parameter. The allocations within the hdr queue and eager queue setup routines now take this additional member and adjust allocations as necesary. PSM will pass the either current numa node or the node closest to the HCA depending on numa_aware. Verbs will always use the node closest to the HCA. Reviewed-by: Dean Luick Signed-off-by: Ramkrishna Vepa Signed-off-by: Vinit Agnihotri Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib.h | 7 +++++- drivers/infiniband/hw/qib/qib_file_ops.c | 6 ++++- drivers/infiniband/hw/qib/qib_init.c | 39 ++++++++++++++++++++++++++------ 3 files changed, 43 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index cecbd43f9212..2ee82e6550c7 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -154,6 +154,8 @@ struct qib_ctxtdata { */ /* instead of calculating it */ unsigned ctxt; + /* local node of context */ + int node_id; /* non-zero if ctxt is being shared. */ u16 subctxt_cnt; /* non-zero if ctxt is being shared. */ @@ -1088,6 +1090,8 @@ struct qib_devdata { u16 psxmitwait_check_rate; /* high volume overflow errors defered to tasklet */ struct tasklet_struct error_tasklet; + + int assigned_node_id; /* NUMA node closest to HCA */ }; /* hol_state values */ @@ -1167,7 +1171,7 @@ int qib_create_rcvhdrq(struct qib_devdata *, struct qib_ctxtdata *); int qib_setup_eagerbufs(struct qib_ctxtdata *); void qib_set_ctxtcnt(struct qib_devdata *); int qib_create_ctxts(struct qib_devdata *dd); -struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32); +struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32, int); void qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8); void qib_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *); @@ -1458,6 +1462,7 @@ extern unsigned qib_n_krcv_queues; extern unsigned qib_sdma_fetch_arb; extern unsigned qib_compat_ddr_negotiate; extern int qib_special_trigger; +extern unsigned qib_numa_aware; extern struct mutex qib_mutex; diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index b56c9428f3c5..65b2fc3f957c 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -1263,8 +1263,12 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, struct qib_ctxtdata *rcd; void *ptmp = NULL; int ret; + int numa_id; - rcd = qib_create_ctxtdata(ppd, ctxt); + numa_id = qib_numa_aware ? numa_node_id() : + dd->assigned_node_id; + + rcd = qib_create_ctxtdata(ppd, ctxt, numa_id); /* * Allocate memory for use in qib_tid_update() at open to diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 4b64c885fa0d..e02217b5c46d 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -67,6 +67,11 @@ ushort qib_cfgctxts; module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO); MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use"); +unsigned qib_numa_aware; +module_param_named(numa_aware, qib_numa_aware, uint, S_IRUGO); +MODULE_PARM_DESC(numa_aware, + "0 -> PSM allocation close to HCA, 1 -> PSM allocation local to process"); + /* * If set, do not write to any regs if avoidable, hack to allow * check for deranged default register values. @@ -124,6 +129,11 @@ int qib_create_ctxts(struct qib_devdata *dd) { unsigned i; int ret; + int local_node_id = pcibus_to_node(dd->pcidev->bus); + + if (local_node_id < 0) + local_node_id = numa_node_id(); + dd->assigned_node_id = local_node_id; /* * Allocate full ctxtcnt array, rather than just cfgctxts, because @@ -146,7 +156,8 @@ int qib_create_ctxts(struct qib_devdata *dd) continue; ppd = dd->pport + (i % dd->num_pports); - rcd = qib_create_ctxtdata(ppd, i); + + rcd = qib_create_ctxtdata(ppd, i, dd->assigned_node_id); if (!rcd) { qib_dev_err(dd, "Unable to allocate ctxtdata for Kernel ctxt, failing\n"); @@ -164,14 +175,16 @@ done: /* * Common code for user and kernel context setup. */ -struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt) +struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt, + int node_id) { struct qib_devdata *dd = ppd->dd; struct qib_ctxtdata *rcd; - rcd = kzalloc(sizeof(*rcd), GFP_KERNEL); + rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, node_id); if (rcd) { INIT_LIST_HEAD(&rcd->qp_wait_list); + rcd->node_id = node_id; rcd->ppd = ppd; rcd->dd = dd; rcd->cnt = 1; @@ -1524,6 +1537,7 @@ static void qib_remove_one(struct pci_dev *pdev) int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) { unsigned amt; + int old_node_id; if (!rcd->rcvhdrq) { dma_addr_t phys_hdrqtail; @@ -1533,9 +1547,13 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) sizeof(u32), PAGE_SIZE); gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ? GFP_USER : GFP_KERNEL; + + old_node_id = dev_to_node(&dd->pcidev->dev); + set_dev_node(&dd->pcidev->dev, rcd->node_id); rcd->rcvhdrq = dma_alloc_coherent( &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys, gfp_flags | __GFP_COMP); + set_dev_node(&dd->pcidev->dev, old_node_id); if (!rcd->rcvhdrq) { qib_dev_err(dd, @@ -1551,9 +1569,11 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) } if (!(dd->flags & QIB_NODMA_RTAIL)) { + set_dev_node(&dd->pcidev->dev, rcd->node_id); rcd->rcvhdrtail_kvaddr = dma_alloc_coherent( &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, gfp_flags); + set_dev_node(&dd->pcidev->dev, old_node_id); if (!rcd->rcvhdrtail_kvaddr) goto bail_free; rcd->rcvhdrqtailaddr_phys = phys_hdrqtail; @@ -1597,6 +1617,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd) unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff; size_t size; gfp_t gfp_flags; + int old_node_id; /* * GFP_USER, but without GFP_FS, so buffer cache can be @@ -1615,25 +1636,29 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd) size = rcd->rcvegrbuf_size; if (!rcd->rcvegrbuf) { rcd->rcvegrbuf = - kzalloc(chunk * sizeof(rcd->rcvegrbuf[0]), - GFP_KERNEL); + kzalloc_node(chunk * sizeof(rcd->rcvegrbuf[0]), + GFP_KERNEL, rcd->node_id); if (!rcd->rcvegrbuf) goto bail; } if (!rcd->rcvegrbuf_phys) { rcd->rcvegrbuf_phys = - kmalloc(chunk * sizeof(rcd->rcvegrbuf_phys[0]), - GFP_KERNEL); + kmalloc_node(chunk * sizeof(rcd->rcvegrbuf_phys[0]), + GFP_KERNEL, rcd->node_id); if (!rcd->rcvegrbuf_phys) goto bail_rcvegrbuf; } for (e = 0; e < rcd->rcvegrbuf_chunks; e++) { if (rcd->rcvegrbuf[e]) continue; + + old_node_id = dev_to_node(&dd->pcidev->dev); + set_dev_node(&dd->pcidev->dev, rcd->node_id); rcd->rcvegrbuf[e] = dma_alloc_coherent(&dd->pcidev->dev, size, &rcd->rcvegrbuf_phys[e], gfp_flags); + set_dev_node(&dd->pcidev->dev, old_node_id); if (!rcd->rcvegrbuf[e]) goto bail_rcvegrbuf_phys; } -- cgit v1.2.3 From c804f07248895ff9c9dccb6cda703068a0657b6c Mon Sep 17 00:00:00 2001 From: Ramkrishna Vepa Date: Sun, 2 Jun 2013 15:16:11 -0400 Subject: IB/qib: Add dual-rail NUMA awareness for PSM processes The driver currently selects a HCA based on the algorithm that PSM chooses, contexts within a HCA or across. The HCA can also be chosen by the user. Either way, this patch assigns a CPU on the NUMA node local to the selected HCA. This patch also tries to select the HCA closest to the NUMA node of the CPU assigned via taskset to PSM process. If this HCA is unusable then another unit is selected based on the algorithm that is currently enforced or selected by PSM - round robin context selection 'within' or 'across' HCA's. Fixed a bug wherein contexts are setup on the NUMA node on which the processes are opened (setup_ctxt()) and not on the NUMA node that the driver recommends the CPU on. Reviewed-by: Mike Marciniszyn Signed-off-by: Vinit Agnihotri Signed-off-by: Ramkrishna Vepa Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_file_ops.c | 174 ++++++++++++++++++++++--------- 1 file changed, 125 insertions(+), 49 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 65b2fc3f957c..df3808a38381 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * @@ -1155,6 +1155,49 @@ static unsigned int qib_poll(struct file *fp, struct poll_table_struct *pt) return pollflag; } +static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd) +{ + struct qib_filedata *fd = fp->private_data; + const unsigned int weight = cpumask_weight(¤t->cpus_allowed); + const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus); + int local_cpu; + + /* + * If process has NOT already set it's affinity, select and + * reserve a processor for it on the local NUMA node. + */ + if ((weight >= qib_cpulist_count) && + (cpumask_weight(local_mask) <= qib_cpulist_count)) { + for_each_cpu(local_cpu, local_mask) + if (!test_and_set_bit(local_cpu, qib_cpulist)) { + fd->rec_cpu_num = local_cpu; + return; + } + } + + /* + * If process has NOT already set it's affinity, select and + * reserve a processor for it, as a rendevous for all + * users of the driver. If they don't actually later + * set affinity to this cpu, or set it to some other cpu, + * it just means that sooner or later we don't recommend + * a cpu, and let the scheduler do it's best. + */ + if (weight >= qib_cpulist_count) { + int cpu; + cpu = find_first_zero_bit(qib_cpulist, + qib_cpulist_count); + if (cpu == qib_cpulist_count) + qib_dev_err(dd, + "no cpus avail for affinity PID %u\n", + current->pid); + else { + __set_bit(cpu, qib_cpulist); + fd->rec_cpu_num = cpu; + } + } +} + /* * Check that userland and driver are compatible for subcontexts. */ @@ -1259,14 +1302,18 @@ bail: static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, struct file *fp, const struct qib_user_info *uinfo) { + struct qib_filedata *fd = fp->private_data; struct qib_devdata *dd = ppd->dd; struct qib_ctxtdata *rcd; void *ptmp = NULL; int ret; int numa_id; - numa_id = qib_numa_aware ? numa_node_id() : - dd->assigned_node_id; + assign_ctxt_affinity(fp, dd); + + numa_id = qib_numa_aware ? ((fd->rec_cpu_num != -1) ? + cpu_to_node(fd->rec_cpu_num) : + numa_node_id()) : dd->assigned_node_id; rcd = qib_create_ctxtdata(ppd, ctxt, numa_id); @@ -1300,6 +1347,9 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, goto bail; bailerr: + if (fd->rec_cpu_num != -1) + __clear_bit(fd->rec_cpu_num, qib_cpulist); + dd->rcd[ctxt] = NULL; kfree(rcd); kfree(ptmp); @@ -1489,6 +1539,57 @@ static int qib_open(struct inode *in, struct file *fp) return fp->private_data ? 0 : -ENOMEM; } +static int find_hca(unsigned int cpu, int *unit) +{ + int ret = 0, devmax, npresent, nup, ndev; + + *unit = -1; + + devmax = qib_count_units(&npresent, &nup); + if (!npresent) { + ret = -ENXIO; + goto done; + } + if (!nup) { + ret = -ENETDOWN; + goto done; + } + for (ndev = 0; ndev < devmax; ndev++) { + struct qib_devdata *dd = qib_lookup(ndev); + if (dd) { + if (pcibus_to_node(dd->pcidev->bus) < 0) { + ret = -EINVAL; + goto done; + } + if (cpu_to_node(cpu) == + pcibus_to_node(dd->pcidev->bus)) { + *unit = ndev; + goto done; + } + } + } +done: + return ret; +} + +static int do_qib_user_sdma_queue_create(struct file *fp) +{ + struct qib_filedata *fd = fp->private_data; + struct qib_ctxtdata *rcd = fd->rcd; + struct qib_devdata *dd = rcd->dd; + + if (dd->flags & QIB_HAS_SEND_DMA) + + fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev, + dd->unit, + rcd->ctxt, + fd->subctxt); + if (!fd->pq) + return -ENOMEM; + + return 0; +} + /* * Get ctxt early, so can set affinity prior to memory allocation. */ @@ -1521,61 +1622,36 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo) if (qib_compatible_subctxts(swmajor, swminor) && uinfo->spu_subctxt_cnt) { ret = find_shared_ctxt(fp, uinfo); - if (ret) { - if (ret > 0) - ret = 0; - goto done_chk_sdma; + if (ret > 0) { + ret = do_qib_user_sdma_queue_create(fp); + if (!ret) + assign_ctxt_affinity(fp, (ctxt_fp(fp))->dd); + goto done_ok; } } i_minor = iminor(file_inode(fp)) - QIB_USER_MINOR_BASE; if (i_minor) ret = find_free_ctxt(i_minor - 1, fp, uinfo); - else + else { + int unit; + const unsigned int cpu = cpumask_first(¤t->cpus_allowed); + const unsigned int weight = + cpumask_weight(¤t->cpus_allowed); + + if (weight == 1 && !test_bit(cpu, qib_cpulist)) + if (!find_hca(cpu, &unit) && unit >= 0) + if (!find_free_ctxt(unit, fp, uinfo)) { + ret = 0; + goto done_chk_sdma; + } ret = get_a_ctxt(fp, uinfo, alg); - -done_chk_sdma: - if (!ret) { - struct qib_filedata *fd = fp->private_data; - const struct qib_ctxtdata *rcd = fd->rcd; - const struct qib_devdata *dd = rcd->dd; - unsigned int weight; - - if (dd->flags & QIB_HAS_SEND_DMA) { - fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev, - dd->unit, - rcd->ctxt, - fd->subctxt); - if (!fd->pq) - ret = -ENOMEM; - } - - /* - * If process has NOT already set it's affinity, select and - * reserve a processor for it, as a rendezvous for all - * users of the driver. If they don't actually later - * set affinity to this cpu, or set it to some other cpu, - * it just means that sooner or later we don't recommend - * a cpu, and let the scheduler do it's best. - */ - weight = cpumask_weight(tsk_cpus_allowed(current)); - if (!ret && weight >= qib_cpulist_count) { - int cpu; - cpu = find_first_zero_bit(qib_cpulist, - qib_cpulist_count); - if (cpu != qib_cpulist_count) { - __set_bit(cpu, qib_cpulist); - fd->rec_cpu_num = cpu; - } - } else if (weight == 1 && - test_bit(cpumask_first(tsk_cpus_allowed(current)), - qib_cpulist)) - qib_devinfo(dd->pcidev, - "%s PID %u affinity set to cpu %d; already allocated\n", - current->comm, current->pid, - cpumask_first(tsk_cpus_allowed(current))); } +done_chk_sdma: + if (!ret) + ret = do_qib_user_sdma_queue_create(fp); +done_ok: mutex_unlock(&qib_mutex); done: -- cgit v1.2.3 From 85caafe307a06e4f9993c8f3c994a07374c07831 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Tue, 4 Jun 2013 15:05:37 -0400 Subject: IB/qib: Optimize CQ callbacks The current workqueue implemention has the following performance deficiencies on QDR HCAs: - The CQ call backs tend to run on the CPUs processing the receive queues - The single thread queue isn't optimal for multiple HCAs This patch adds a dedicated per HCA bound thread to process CQ callbacks. Reviewed-by: Ramkrishna Vepa Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib.h | 5 ++- drivers/infiniband/hw/qib/qib_cq.c | 67 +++++++++++++++++++++++++++++++---- drivers/infiniband/hw/qib/qib_init.c | 18 +++------- drivers/infiniband/hw/qib/qib_verbs.h | 10 ++++-- 4 files changed, 76 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 2ee82e6550c7..3a78b92cbd4e 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -1,7 +1,7 @@ #ifndef _QIB_KERNEL_H #define _QIB_KERNEL_H /* - * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * @@ -51,6 +51,7 @@ #include #include #include +#include #include "qib_common.h" #include "qib_verbs.h" @@ -1090,6 +1091,8 @@ struct qib_devdata { u16 psxmitwait_check_rate; /* high volume overflow errors defered to tasklet */ struct tasklet_struct error_tasklet; + /* per device cq worker */ + struct kthread_worker *worker; int assigned_node_id; /* NUMA node closest to HCA */ }; diff --git a/drivers/infiniband/hw/qib/qib_cq.c b/drivers/infiniband/hw/qib/qib_cq.c index 5246aa486bbe..ab4e11cfab15 100644 --- a/drivers/infiniband/hw/qib/qib_cq.c +++ b/drivers/infiniband/hw/qib/qib_cq.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006, 2007, 2008, 2010 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * @@ -34,8 +35,10 @@ #include #include #include +#include #include "qib_verbs.h" +#include "qib.h" /** * qib_cq_enter - add a new entry to the completion queue @@ -102,13 +105,18 @@ void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int solicited) if (cq->notify == IB_CQ_NEXT_COMP || (cq->notify == IB_CQ_SOLICITED && (solicited || entry->status != IB_WC_SUCCESS))) { - cq->notify = IB_CQ_NONE; - cq->triggered++; + struct kthread_worker *worker; /* * This will cause send_complete() to be called in * another thread. */ - queue_work(qib_cq_wq, &cq->comptask); + smp_rmb(); + worker = cq->dd->worker; + if (likely(worker)) { + cq->notify = IB_CQ_NONE; + cq->triggered++; + queue_kthread_work(worker, &cq->comptask); + } } spin_unlock_irqrestore(&cq->lock, flags); @@ -163,7 +171,7 @@ bail: return npolled; } -static void send_complete(struct work_struct *work) +static void send_complete(struct kthread_work *work) { struct qib_cq *cq = container_of(work, struct qib_cq, comptask); @@ -287,11 +295,12 @@ struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries, * The number of entries should be >= the number requested or return * an error. */ + cq->dd = dd_from_dev(dev); cq->ibcq.cqe = entries; cq->notify = IB_CQ_NONE; cq->triggered = 0; spin_lock_init(&cq->lock); - INIT_WORK(&cq->comptask, send_complete); + init_kthread_work(&cq->comptask, send_complete); wc->head = 0; wc->tail = 0; cq->queue = wc; @@ -323,7 +332,7 @@ int qib_destroy_cq(struct ib_cq *ibcq) struct qib_ibdev *dev = to_idev(ibcq->device); struct qib_cq *cq = to_icq(ibcq); - flush_work(&cq->comptask); + flush_kthread_work(&cq->comptask); spin_lock(&dev->n_cqs_lock); dev->n_cqs_allocated--; spin_unlock(&dev->n_cqs_lock); @@ -483,3 +492,49 @@ bail_free: bail: return ret; } + +int qib_cq_init(struct qib_devdata *dd) +{ + int ret = 0; + int cpu; + struct task_struct *task; + + if (dd->worker) + return 0; + dd->worker = kzalloc(sizeof(*dd->worker), GFP_KERNEL); + if (!dd->worker) + return -ENOMEM; + init_kthread_worker(dd->worker); + task = kthread_create_on_node( + kthread_worker_fn, + dd->worker, + dd->assigned_node_id, + "qib_cq%d", dd->unit); + if (IS_ERR(task)) + goto task_fail; + cpu = cpumask_first(cpumask_of_node(dd->assigned_node_id)); + kthread_bind(task, cpu); + wake_up_process(task); +out: + return ret; +task_fail: + ret = PTR_ERR(task); + kfree(dd->worker); + dd->worker = NULL; + goto out; +} + +void qib_cq_exit(struct qib_devdata *dd) +{ + struct kthread_worker *worker; + + worker = dd->worker; + if (!worker) + return; + /* blocks future queuing from send_complete() */ + dd->worker = NULL; + smp_wmb(); + flush_kthread_worker(worker); + kthread_stop(worker->task); + kfree(worker); +} diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index e02217b5c46d..ff36903474ea 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -97,8 +97,6 @@ unsigned qib_wc_pat = 1; /* default (1) is to use PAT, not MTRR */ module_param_named(wc_pat, qib_wc_pat, uint, S_IRUGO); MODULE_PARM_DESC(wc_pat, "enable write-combining via PAT mechanism"); -struct workqueue_struct *qib_cq_wq; - static void verify_interrupt(unsigned long); static struct idr qib_unit_table; @@ -445,6 +443,7 @@ static int loadtime_init(struct qib_devdata *dd) dd->intrchk_timer.function = verify_interrupt; dd->intrchk_timer.data = (unsigned long) dd; + ret = qib_cq_init(dd); done: return ret; } @@ -1215,12 +1214,6 @@ static int __init qlogic_ib_init(void) if (ret) goto bail; - qib_cq_wq = create_singlethread_workqueue("qib_cq"); - if (!qib_cq_wq) { - ret = -ENOMEM; - goto bail_dev; - } - /* * These must be called before the driver is registered with * the PCI subsystem. @@ -1233,7 +1226,7 @@ static int __init qlogic_ib_init(void) ret = pci_register_driver(&qib_driver); if (ret < 0) { pr_err("Unable to register driver: error %d\n", -ret); - goto bail_unit; + goto bail_dev; } /* not fatal if it doesn't work */ @@ -1241,13 +1234,11 @@ static int __init qlogic_ib_init(void) pr_err("Unable to register ipathfs\n"); goto bail; /* all OK */ -bail_unit: +bail_dev: #ifdef CONFIG_INFINIBAND_QIB_DCA dca_unregister_notify(&dca_notifier); #endif idr_destroy(&qib_unit_table); - destroy_workqueue(qib_cq_wq); -bail_dev: qib_dev_cleanup(); bail: return ret; @@ -1273,8 +1264,6 @@ static void __exit qlogic_ib_cleanup(void) #endif pci_unregister_driver(&qib_driver); - destroy_workqueue(qib_cq_wq); - qib_cpulist_count = 0; kfree(qib_cpulist); @@ -1365,6 +1354,7 @@ static void cleanup_device_data(struct qib_devdata *dd) } kfree(tmp); kfree(dd->boardname); + qib_cq_exit(dd); } /* diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index aff8b2c17886..86c2cb3c9caf 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -267,7 +268,8 @@ struct qib_cq_wc { */ struct qib_cq { struct ib_cq ibcq; - struct work_struct comptask; + struct kthread_work comptask; + struct qib_devdata *dd; spinlock_t lock; /* protect changes in this struct */ u8 notify; u8 triggered; @@ -832,8 +834,6 @@ static inline int qib_send_ok(struct qib_qp *qp) !(qp->s_flags & QIB_S_ANY_WAIT_SEND)); } -extern struct workqueue_struct *qib_cq_wq; - /* * This must be called with s_lock held. */ @@ -972,6 +972,10 @@ int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); int qib_destroy_srq(struct ib_srq *ibsrq); +int qib_cq_init(struct qib_devdata *dd); + +void qib_cq_exit(struct qib_devdata *dd); + void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int sig); int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); -- cgit v1.2.3 From ddb8876589702a9396d15d9d4075e6388d0600cf Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sat, 15 Jun 2013 17:07:03 -0400 Subject: IB/qib: Convert opcode counters to per-context This fix changes the opcode relative counters for receive to per context. Profiling has shown that when mulitple contexts are being used there is a lot of cache activity associated with these counters. The code formerly kept these counters per port, but only provided the interface to read per HCA. This patch converts the read of counters to per HCA and adds the debugfs hooks to be able to read the file as a sequence of opcodes. Reviewed-by: Dean Luick Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/Makefile | 1 + drivers/infiniband/hw/qib/qib.h | 36 ++++--- drivers/infiniband/hw/qib/qib_debugfs.c | 166 ++++++++++++++++++++++++++++++++ drivers/infiniband/hw/qib/qib_debugfs.h | 45 +++++++++ drivers/infiniband/hw/qib/qib_driver.c | 1 - drivers/infiniband/hw/qib/qib_init.c | 41 +++++++- drivers/infiniband/hw/qib/qib_verbs.c | 8 +- drivers/infiniband/hw/qib/qib_verbs.h | 9 +- 8 files changed, 284 insertions(+), 23 deletions(-) create mode 100644 drivers/infiniband/hw/qib/qib_debugfs.c create mode 100644 drivers/infiniband/hw/qib/qib_debugfs.h diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile index f12d7bb8b39f..57f8103e51f8 100644 --- a/drivers/infiniband/hw/qib/Makefile +++ b/drivers/infiniband/hw/qib/Makefile @@ -13,3 +13,4 @@ ib_qib-$(CONFIG_PCI_MSI) += qib_iba6120.o ib_qib-$(CONFIG_X86_64) += qib_wc_x86_64.o ib_qib-$(CONFIG_PPC64) += qib_wc_ppc64.o +ib_qib-$(CONFIG_DEBUG_FS) += qib_debugfs.o diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 3a78b92cbd4e..5453e2b36567 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -115,6 +115,11 @@ struct qib_eep_log_mask { /* * Below contains all data related to a single context (formerly called port). */ + +#ifdef CONFIG_DEBUG_FS +struct qib_opcode_stats_perctx; +#endif + struct qib_ctxtdata { void **rcvegrbuf; dma_addr_t *rcvegrbuf_phys; @@ -225,12 +230,15 @@ struct qib_ctxtdata { u8 redirect_seq_cnt; /* ctxt rcvhdrq head offset */ u32 head; - u32 pkt_count; /* lookaside fields */ struct qib_qp *lookaside_qp; u32 lookaside_qpn; /* QPs waiting for context processing */ struct list_head qp_wait_list; +#ifdef CONFIG_DEBUG_FS + /* verbs stats per CTX */ + struct qib_opcode_stats_perctx *opstats; +#endif }; struct qib_sge_state; @@ -1495,27 +1503,23 @@ extern struct mutex qib_mutex; * first to avoid possible serial port delays from printk. */ #define qib_early_err(dev, fmt, ...) \ - do { \ - dev_err(dev, fmt, ##__VA_ARGS__); \ - } while (0) + dev_err(dev, fmt, ##__VA_ARGS__) #define qib_dev_err(dd, fmt, ...) \ - do { \ - dev_err(&(dd)->pcidev->dev, "%s: " fmt, \ - qib_get_unit_name((dd)->unit), ##__VA_ARGS__); \ - } while (0) + dev_err(&(dd)->pcidev->dev, "%s: " fmt, \ + qib_get_unit_name((dd)->unit), ##__VA_ARGS__) + +#define qib_dev_warn(dd, fmt, ...) \ + dev_warn(&(dd)->pcidev->dev, "%s: " fmt, \ + qib_get_unit_name((dd)->unit), ##__VA_ARGS__) #define qib_dev_porterr(dd, port, fmt, ...) \ - do { \ - dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \ - qib_get_unit_name((dd)->unit), (dd)->unit, (port), \ - ##__VA_ARGS__); \ - } while (0) + dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \ + qib_get_unit_name((dd)->unit), (dd)->unit, (port), \ + ##__VA_ARGS__) #define qib_devinfo(pcidev, fmt, ...) \ - do { \ - dev_info(&(pcidev)->dev, fmt, ##__VA_ARGS__); \ - } while (0) + dev_info(&(pcidev)->dev, fmt, ##__VA_ARGS__) /* * this is used for formatting hw error messages... diff --git a/drivers/infiniband/hw/qib/qib_debugfs.c b/drivers/infiniband/hw/qib/qib_debugfs.c new file mode 100644 index 000000000000..47d01164cc91 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_debugfs.c @@ -0,0 +1,166 @@ +#ifdef CONFIG_DEBUG_FS +/* + * Copyright (c) 2013 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include +#include + +#include "qib.h" +#include "qib_verbs.h" +#include "qib_debugfs.h" + +static struct dentry *qib_dbg_root; + +#define DEBUGFS_FILE(name) \ +static const struct seq_operations _##name##_seq_ops = { \ + .start = _##name##_seq_start, \ + .next = _##name##_seq_next, \ + .stop = _##name##_seq_stop, \ + .show = _##name##_seq_show \ +}; \ +static int _##name##_open(struct inode *inode, struct file *s) \ +{ \ + struct seq_file *seq; \ + int ret; \ + ret = seq_open(s, &_##name##_seq_ops); \ + if (ret) \ + return ret; \ + seq = s->private_data; \ + seq->private = inode->i_private; \ + return 0; \ +} \ +static const struct file_operations _##name##_file_ops = { \ + .owner = THIS_MODULE, \ + .open = _##name##_open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = seq_release \ +}; + +#define DEBUGFS_FILE_CREATE(name) \ +do { \ + struct dentry *ent; \ + ent = debugfs_create_file(#name , 0400, ibd->qib_ibdev_dbg, \ + ibd, &_##name##_file_ops); \ + if (!ent) \ + pr_warn("create of " #name " failed\n"); \ +} while (0) + +static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos) +{ + struct qib_opcode_stats_perctx *opstats; + + if (*pos >= ARRAY_SIZE(opstats->stats)) + return NULL; + return pos; +} + +static void *_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct qib_opcode_stats_perctx *opstats; + + ++*pos; + if (*pos >= ARRAY_SIZE(opstats->stats)) + return NULL; + return pos; +} + + +static void _opcode_stats_seq_stop(struct seq_file *s, void *v) +{ + /* nothing allocated */ +} + +static int _opcode_stats_seq_show(struct seq_file *s, void *v) +{ + loff_t *spos = v; + loff_t i = *spos, j; + u64 n_packets = 0, n_bytes = 0; + struct qib_ibdev *ibd = (struct qib_ibdev *)s->private; + struct qib_devdata *dd = dd_from_dev(ibd); + + for (j = 0; j < dd->first_user_ctxt; j++) { + if (!dd->rcd[j]) + continue; + n_packets += dd->rcd[j]->opstats->stats[i].n_packets; + n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes; + } + if (!n_packets && !n_bytes) + return SEQ_SKIP; + seq_printf(s, "%02llx %llu/%llu\n", i, + (unsigned long long) n_packets, + (unsigned long long) n_bytes); + + return 0; +} + +DEBUGFS_FILE(opcode_stats) + +void qib_dbg_ibdev_init(struct qib_ibdev *ibd) +{ + char name[10]; + + snprintf(name, sizeof(name), "qib%d", dd_from_dev(ibd)->unit); + ibd->qib_ibdev_dbg = debugfs_create_dir(name, qib_dbg_root); + if (!ibd->qib_ibdev_dbg) { + pr_warn("create of %s failed\n", name); + return; + } + DEBUGFS_FILE_CREATE(opcode_stats); + return; +} + +void qib_dbg_ibdev_exit(struct qib_ibdev *ibd) +{ + if (!qib_dbg_root) + goto out; + debugfs_remove_recursive(ibd->qib_ibdev_dbg); +out: + ibd->qib_ibdev_dbg = NULL; +} + +void qib_dbg_init(void) +{ + qib_dbg_root = debugfs_create_dir(QIB_DRV_NAME, NULL); + if (!qib_dbg_root) + pr_warn("init of debugfs failed\n"); +} + +void qib_dbg_exit(void) +{ + debugfs_remove_recursive(qib_dbg_root); + qib_dbg_root = NULL; +} + +#endif + diff --git a/drivers/infiniband/hw/qib/qib_debugfs.h b/drivers/infiniband/hw/qib/qib_debugfs.h new file mode 100644 index 000000000000..7ae983a91b8b --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_debugfs.h @@ -0,0 +1,45 @@ +#ifndef _QIB_DEBUGFS_H +#define _QIB_DEBUGFS_H + +#ifdef CONFIG_DEBUG_FS +/* + * Copyright (c) 2013 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +struct qib_ibdev; +void qib_dbg_ibdev_init(struct qib_ibdev *ibd); +void qib_dbg_ibdev_exit(struct qib_ibdev *ibd); +void qib_dbg_init(void); +void qib_dbg_exit(void); + +#endif + +#endif /* _QIB_DEBUGFS_H */ diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 216092477dfc..5bee08f16d74 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -558,7 +558,6 @@ move_along: } rcd->head = l; - rcd->pkt_count += i; /* * Iterate over all QPs waiting to respond. diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index ff36903474ea..fdae42973056 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -46,6 +46,10 @@ #include "qib.h" #include "qib_common.h" #include "qib_mad.h" +#ifdef CONFIG_DEBUG_FS +#include "qib_debugfs.h" +#include "qib_verbs.h" +#endif #undef pr_fmt #define pr_fmt(fmt) QIB_DRV_NAME ": " fmt @@ -188,7 +192,18 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt, rcd->cnt = 1; rcd->ctxt = ctxt; dd->rcd[ctxt] = rcd; - +#ifdef CONFIG_DEBUG_FS + if (ctxt < dd->first_user_ctxt) { /* N/A for PSM contexts */ + rcd->opstats = kzalloc_node(sizeof(*rcd->opstats), + GFP_KERNEL, node_id); + if (!rcd->opstats) { + kfree(rcd); + qib_dev_err(dd, + "Unable to allocate per ctxt stats buffer\n"); + return NULL; + } + } +#endif dd->f_init_ctxt(rcd); /* @@ -959,6 +974,10 @@ void qib_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd) vfree(rcd->subctxt_uregbase); vfree(rcd->subctxt_rcvegrbuf); vfree(rcd->subctxt_rcvhdr_base); +#ifdef CONFIG_DEBUG_FS + kfree(rcd->opstats); + rcd->opstats = NULL; +#endif kfree(rcd); } @@ -1048,7 +1067,6 @@ done: dd->f_set_armlaunch(dd, 1); } - void qib_free_devdata(struct qib_devdata *dd) { unsigned long flags; @@ -1058,6 +1076,9 @@ void qib_free_devdata(struct qib_devdata *dd) list_del(&dd->list); spin_unlock_irqrestore(&qib_devs_lock, flags); +#ifdef CONFIG_DEBUG_FS + qib_dbg_ibdev_exit(&dd->verbs_dev); +#endif ib_dealloc_device(&dd->verbs_dev.ibdev); } @@ -1081,6 +1102,10 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) goto bail; } +#ifdef CONFIG_DEBUG_FS + qib_dbg_ibdev_init(&dd->verbs_dev); +#endif + idr_preload(GFP_KERNEL); spin_lock_irqsave(&qib_devs_lock, flags); @@ -1096,6 +1121,9 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) if (ret < 0) { qib_early_err(&pdev->dev, "Could not allocate unit ID: error %d\n", -ret); +#ifdef CONFIG_DEBUG_FS + qib_dbg_ibdev_exit(&dd->verbs_dev); +#endif ib_dealloc_device(&dd->verbs_dev.ibdev); dd = ERR_PTR(ret); goto bail; @@ -1222,6 +1250,9 @@ static int __init qlogic_ib_init(void) #ifdef CONFIG_INFINIBAND_QIB_DCA dca_register_notify(&dca_notifier); +#endif +#ifdef CONFIG_DEBUG_FS + qib_dbg_init(); #endif ret = pci_register_driver(&qib_driver); if (ret < 0) { @@ -1237,6 +1268,9 @@ static int __init qlogic_ib_init(void) bail_dev: #ifdef CONFIG_INFINIBAND_QIB_DCA dca_unregister_notify(&dca_notifier); +#endif +#ifdef CONFIG_DEBUG_FS + qib_dbg_exit(); #endif idr_destroy(&qib_unit_table); qib_dev_cleanup(); @@ -1263,6 +1297,9 @@ static void __exit qlogic_ib_cleanup(void) dca_unregister_notify(&dca_notifier); #endif pci_unregister_driver(&qib_driver); +#ifdef CONFIG_DEBUG_FS + qib_dbg_exit(); +#endif qib_cpulist_count = 0; kfree(qib_cpulist); diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 904c384aa361..092b0bb1bb78 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -645,9 +645,11 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) } else goto drop; - opcode = be32_to_cpu(ohdr->bth[0]) >> 24; - ibp->opstats[opcode & 0x7f].n_bytes += tlen; - ibp->opstats[opcode & 0x7f].n_packets++; + opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f; +#ifdef CONFIG_DEBUG_FS + rcd->opstats->stats[opcode].n_bytes += tlen; + rcd->opstats->stats[opcode].n_packets++; +#endif /* Get the destination QP number. */ qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 86c2cb3c9caf..4a22a85b9f03 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -660,6 +660,10 @@ struct qib_opcode_stats { u64 n_bytes; /* total number of bytes */ }; +struct qib_opcode_stats_perctx { + struct qib_opcode_stats stats[128]; +}; + struct qib_ibport { struct qib_qp __rcu *qp0; struct qib_qp __rcu *qp1; @@ -726,7 +730,6 @@ struct qib_ibport { u8 vl_high_limit; u8 sl_to_vl[16]; - struct qib_opcode_stats opstats[128]; }; @@ -770,6 +773,10 @@ struct qib_ibdev { spinlock_t n_srqs_lock; u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ spinlock_t n_mcast_grps_lock; +#ifdef CONFIG_DEBUG_FS + /* per HCA debugfs */ + struct dentry *qib_ibdev_dbg; +#endif }; struct qib_verbs_counters { -- cgit v1.2.3 From 17db3a92c144a0f8a81c52e94b7110bc86b5067f Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sat, 15 Jun 2013 17:07:09 -0400 Subject: IB/qib: Add per-context stats interface This patch adds a debugfs stats interface for per kernel contexts packet counts. The code uses the opcode stats count and eliminates the counter in the context. Reviewed-by: Dean Luick Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_debugfs.c | 63 +++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/drivers/infiniband/hw/qib/qib_debugfs.c b/drivers/infiniband/hw/qib/qib_debugfs.c index 47d01164cc91..a4e6ee154f19 100644 --- a/drivers/infiniband/hw/qib/qib_debugfs.c +++ b/drivers/infiniband/hw/qib/qib_debugfs.c @@ -126,6 +126,68 @@ static int _opcode_stats_seq_show(struct seq_file *s, void *v) DEBUGFS_FILE(opcode_stats) +static void *_ctx_stats_seq_start(struct seq_file *s, loff_t *pos) +{ + struct qib_ibdev *ibd = (struct qib_ibdev *)s->private; + struct qib_devdata *dd = dd_from_dev(ibd); + + if (!*pos) + return SEQ_START_TOKEN; + if (*pos >= dd->first_user_ctxt) + return NULL; + return pos; +} + +static void *_ctx_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct qib_ibdev *ibd = (struct qib_ibdev *)s->private; + struct qib_devdata *dd = dd_from_dev(ibd); + + if (v == SEQ_START_TOKEN) + return pos; + + ++*pos; + if (*pos >= dd->first_user_ctxt) + return NULL; + return pos; +} + +static void _ctx_stats_seq_stop(struct seq_file *s, void *v) +{ + /* nothing allocated */ +} + +static int _ctx_stats_seq_show(struct seq_file *s, void *v) +{ + loff_t *spos; + loff_t i, j; + u64 n_packets = 0; + struct qib_ibdev *ibd = (struct qib_ibdev *)s->private; + struct qib_devdata *dd = dd_from_dev(ibd); + + if (v == SEQ_START_TOKEN) { + seq_puts(s, "Ctx:npkts\n"); + return 0; + } + + spos = v; + i = *spos; + + if (!dd->rcd[i]) + return SEQ_SKIP; + + for (j = 0; j < ARRAY_SIZE(dd->rcd[i]->opstats->stats); j++) + n_packets += dd->rcd[i]->opstats->stats[j].n_packets; + + if (!n_packets) + return SEQ_SKIP; + + seq_printf(s, " %llu:%llu\n", i, n_packets); + return 0; +} + +DEBUGFS_FILE(ctx_stats) + void qib_dbg_ibdev_init(struct qib_ibdev *ibd) { char name[10]; @@ -137,6 +199,7 @@ void qib_dbg_ibdev_init(struct qib_ibdev *ibd) return; } DEBUGFS_FILE_CREATE(opcode_stats); + DEBUGFS_FILE_CREATE(ctx_stats); return; } -- cgit v1.2.3 From 1dd173b01f52c7a197cab20563d5f4967472a852 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sat, 15 Jun 2013 17:07:14 -0400 Subject: IB/qib: Add qp_stats debug file This adds a seq_file iterator for reporting the QP hash table when the qp_stats file is read. Reviewed-by: Dean Luick Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_debugfs.c | 54 +++++++++++++++++++ drivers/infiniband/hw/qib/qib_qp.c | 94 +++++++++++++++++++++++++++++++++ drivers/infiniband/hw/qib/qib_verbs.h | 14 ++++- 3 files changed, 161 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_debugfs.c b/drivers/infiniband/hw/qib/qib_debugfs.c index a4e6ee154f19..799a0c3bffc4 100644 --- a/drivers/infiniband/hw/qib/qib_debugfs.c +++ b/drivers/infiniband/hw/qib/qib_debugfs.c @@ -188,6 +188,59 @@ static int _ctx_stats_seq_show(struct seq_file *s, void *v) DEBUGFS_FILE(ctx_stats) +static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos) +{ + struct qib_qp_iter *iter; + loff_t n = *pos; + + iter = qib_qp_iter_init(s->private); + if (!iter) + return NULL; + + while (n--) { + if (qib_qp_iter_next(iter)) { + kfree(iter); + return NULL; + } + } + + return iter; +} + +static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, + loff_t *pos) +{ + struct qib_qp_iter *iter = iter_ptr; + + (*pos)++; + + if (qib_qp_iter_next(iter)) { + kfree(iter); + return NULL; + } + + return iter; +} + +static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr) +{ + /* nothing for now */ +} + +static int _qp_stats_seq_show(struct seq_file *s, void *iter_ptr) +{ + struct qib_qp_iter *iter = iter_ptr; + + if (!iter) + return 0; + + qib_qp_iter_print(s, iter); + + return 0; +} + +DEBUGFS_FILE(qp_stats) + void qib_dbg_ibdev_init(struct qib_ibdev *ibd) { char name[10]; @@ -200,6 +253,7 @@ void qib_dbg_ibdev_init(struct qib_ibdev *ibd) } DEBUGFS_FILE_CREATE(opcode_stats); DEBUGFS_FILE_CREATE(ctx_stats); + DEBUGFS_FILE_CREATE(qp_stats); return; } diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index c1f573a331c7..3cca55b51e54 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -35,6 +35,9 @@ #include #include #include +#ifdef CONFIG_DEBUG_FS +#include +#endif #include "qib.h" @@ -1287,3 +1290,94 @@ void qib_get_credit(struct qib_qp *qp, u32 aeth) } } } + +#ifdef CONFIG_DEBUG_FS + +struct qib_qp_iter { + struct qib_ibdev *dev; + struct qib_qp *qp; + int n; +}; + +struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev) +{ + struct qib_qp_iter *iter; + + iter = kzalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return NULL; + + iter->dev = dev; + if (qib_qp_iter_next(iter)) { + kfree(iter); + return NULL; + } + + return iter; +} + +int qib_qp_iter_next(struct qib_qp_iter *iter) +{ + struct qib_ibdev *dev = iter->dev; + int n = iter->n; + int ret = 1; + struct qib_qp *pqp = iter->qp; + struct qib_qp *qp; + + rcu_read_lock(); + for (; n < dev->qp_table_size; n++) { + if (pqp) + qp = rcu_dereference(pqp->next); + else + qp = rcu_dereference(dev->qp_table[n]); + pqp = qp; + if (qp) { + if (iter->qp) + atomic_dec(&iter->qp->refcount); + atomic_inc(&qp->refcount); + rcu_read_unlock(); + iter->qp = qp; + iter->n = n; + return 0; + } + } + rcu_read_unlock(); + if (iter->qp) + atomic_dec(&iter->qp->refcount); + return ret; +} + +static const char * const qp_type_str[] = { + "SMI", "GSI", "RC", "UC", "UD", +}; + +void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter) +{ + struct qib_swqe *wqe; + struct qib_qp *qp = iter->qp; + + wqe = get_swqe_ptr(qp, qp->s_last); + seq_printf(s, + "N %d QP%u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x\n", + iter->n, + qp->ibqp.qp_num, + qp_type_str[qp->ibqp.qp_type], + qp->state, + wqe->wr.opcode, + qp->s_hdrwords, + qp->s_flags, + atomic_read(&qp->s_dma_busy), + !list_empty(&qp->iowait), + qp->timeout, + wqe->ssn, + qp->s_lsn, + qp->s_last_psn, + qp->s_psn, qp->s_next_psn, + qp->s_sending_psn, qp->s_sending_hpsn, + qp->s_last, qp->s_acked, qp->s_cur, + qp->s_tail, qp->s_head, qp->s_size, + qp->remote_qpn, + qp->remote_ah_attr.dlid); +} + +#endif diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 4a22a85b9f03..012e2c7575ad 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * @@ -917,6 +917,18 @@ void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt); void qib_free_qpn_table(struct qib_qpn_table *qpt); +#ifdef CONFIG_DEBUG_FS + +struct qib_qp_iter; + +struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev); + +int qib_qp_iter_next(struct qib_qp_iter *iter); + +void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter); + +#endif + void qib_get_credit(struct qib_qp *qp, u32 aeth); unsigned qib_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult); -- cgit v1.2.3 From c94e15c5cb4d02579321382871eb87e17d10858e Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 23 Jun 2013 09:07:19 +0800 Subject: RDMA/ocrdma: Fix error return code in ocrdma_set_create_qp_rq_cmd() Fix to return -ENOMEM in the alloc dma coherent error case instead of 0, as done elsewhere in this function. Signed-off-by: Wei Yongjun Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 76c9e192ddcc..0965278dd2ed 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1886,7 +1886,7 @@ static int ocrdma_set_create_qp_rq_cmd(struct ocrdma_create_qp_req *cmd, qp->rq.va = dma_alloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL); if (!qp->rq.va) - return status; + return -ENOMEM; memset(qp->rq.va, 0, len); qp->rq.pa = pa; qp->rq.len = len; -- cgit v1.2.3 From 80b15043e3450e730d30b71c099ab00d75a551ce Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 21 Jun 2013 11:24:27 +0800 Subject: IB/core: Fix error return code in add_port() Fix to return -ENOMEM in the add_port() error handling case instead of 0, as done elsewhere in this function. Signed-off-by: Wei Yongjun Signed-off-by: Roland Dreier --- drivers/infiniband/core/sysfs.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 246fdc151652..d9b78c4d0aad 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -545,8 +545,10 @@ static int add_port(struct ib_device *device, int port_num, p->gid_group.name = "gids"; p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len); - if (!p->gid_group.attrs) + if (!p->gid_group.attrs) { + ret = -ENOMEM; goto err_remove_pma; + } ret = sysfs_create_group(&p->kobj, &p->gid_group); if (ret) @@ -555,8 +557,10 @@ static int add_port(struct ib_device *device, int port_num, p->pkey_group.name = "pkeys"; p->pkey_group.attrs = alloc_group_attrs(show_port_pkey, attr.pkey_tbl_len); - if (!p->pkey_group.attrs) + if (!p->pkey_group.attrs) { + ret = -ENOMEM; goto err_remove_gid; + } ret = sysfs_create_group(&p->kobj, &p->pkey_group); if (ret) -- cgit v1.2.3 From 22baa407f9e67636a3fd740a552f8f5e06cf8db5 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Wed, 26 Jun 2013 10:46:22 -0400 Subject: IB/qib: New transmitter tunning settings for Dell 1.1 backplane The Dell blade chassis got an updated backplane which requires new transmitter tuning settings. Signed-off-by: Mitko Haralanov Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_iba7322.c | 59 +++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 46ffea033be0..f7c4b44b1f93 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -590,7 +590,7 @@ struct vendor_txdds_ent { static void write_tx_serdes_param(struct qib_pportdata *, struct txdds_ent *); #define TXDDS_TABLE_SZ 16 /* number of entries per speed in onchip table */ -#define TXDDS_EXTRA_SZ 13 /* number of extra tx settings entries */ +#define TXDDS_EXTRA_SZ 18 /* number of extra tx settings entries */ #define TXDDS_MFG_SZ 2 /* number of mfg tx settings entries */ #define SERDES_CHANS 4 /* yes, it's obvious, but one less magic number */ @@ -7466,15 +7466,20 @@ static const struct txdds_ent txdds_extra_sdr[TXDDS_EXTRA_SZ] = { { 0, 0, 0, 1 }, /* QMH7342 backplane settings */ { 0, 0, 0, 2 }, /* QMH7342 backplane settings */ { 0, 0, 0, 2 }, /* QMH7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ { 0, 0, 0, 3 }, /* QMH7342 backplane settings */ { 0, 0, 0, 4 }, /* QMH7342 backplane settings */ + { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */ }; static const struct txdds_ent txdds_extra_ddr[TXDDS_EXTRA_SZ] = { @@ -7483,15 +7488,20 @@ static const struct txdds_ent txdds_extra_ddr[TXDDS_EXTRA_SZ] = { { 0, 0, 0, 7 }, /* QMH7342 backplane settings */ { 0, 0, 0, 8 }, /* QMH7342 backplane settings */ { 0, 0, 0, 8 }, /* QMH7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ { 0, 0, 0, 9 }, /* QMH7342 backplane settings */ { 0, 0, 0, 10 }, /* QMH7342 backplane settings */ + { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */ }; static const struct txdds_ent txdds_extra_qdr[TXDDS_EXTRA_SZ] = { @@ -7500,15 +7510,20 @@ static const struct txdds_ent txdds_extra_qdr[TXDDS_EXTRA_SZ] = { { 0, 1, 0, 5 }, /* QMH7342 backplane settings */ { 0, 1, 0, 6 }, /* QMH7342 backplane settings */ { 0, 1, 0, 8 }, /* QMH7342 backplane settings */ - { 0, 1, 12, 10 }, /* QME7342 backplane setting */ - { 0, 1, 12, 11 }, /* QME7342 backplane setting */ - { 0, 1, 12, 12 }, /* QME7342 backplane setting */ - { 0, 1, 12, 14 }, /* QME7342 backplane setting */ - { 0, 1, 12, 6 }, /* QME7342 backplane setting */ - { 0, 1, 12, 7 }, /* QME7342 backplane setting */ - { 0, 1, 12, 8 }, /* QME7342 backplane setting */ { 0, 1, 0, 10 }, /* QMH7342 backplane settings */ { 0, 1, 0, 12 }, /* QMH7342 backplane settings */ + { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */ }; static const struct txdds_ent txdds_extra_mfg[TXDDS_MFG_SZ] = { -- cgit v1.2.3 From 1fe0cb848890a3f4dd965fdce8b4108feb03971f Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Wed, 12 Jun 2013 15:20:36 +0200 Subject: IB/srp: Fix remove_one crash due to resource exhaustion If the add_one callback fails during driver load no resources are allocated so there isn't a need to release any resources. Trying to clean the resource may lead to the following kernel panic: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] srp_remove_one+0x31/0x240 [ib_srp] RIP: 0010:[] [] srp_remove_one+0x31/0x240 [ib_srp] Process rmmod (pid: 4562, threadinfo ffff8800dd738000, task ffff8801167e60c0) Call Trace: [] ib_unregister_client+0x4e/0x120 [ib_core] [] srp_cleanup_module+0x15/0x71 [ib_srp] [] sys_delete_module+0x194/0x260 [] system_call_fastpath+0x16/0x1b Signed-off-by: Dotan Barak Reviewed-by: Eli Cohen Signed-off-by: Bart Van Assche Acked-by: Sebastian Riemer Acked-by: David Dillow Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 7ccf3284dda3..368d1606e16f 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -2507,6 +2507,8 @@ static void srp_remove_one(struct ib_device *device) struct srp_target_port *target; srp_dev = ib_get_client_data(device, &srp_client); + if (!srp_dev) + return; list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) { device_unregister(&host->dev); -- cgit v1.2.3 From 086f44f58855ae18bab19fb794cce6c6d2c6143b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 12 Jun 2013 15:23:04 +0200 Subject: IB/srp: Avoid skipping srp_reset_host() after a transport error The SCSI error handler assumes that the transport layer is operational if an eh_abort_handler() returns SUCCESS. Hence srp_abort() only should return SUCCESS if sending the ABORT TASK task management function succeeded. This patch avoids the SCSI error handler skipping the srp_reset_host() call after a transport layer error. Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 368d1606e16f..759c55b9685f 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1744,18 +1744,23 @@ static int srp_abort(struct scsi_cmnd *scmnd) { struct srp_target_port *target = host_to_target(scmnd->device->host); struct srp_request *req = (struct srp_request *) scmnd->host_scribble; + int ret; shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); if (!req || !srp_claim_req(target, req, scmnd)) return FAILED; - srp_send_tsk_mgmt(target, req->index, scmnd->device->lun, - SRP_TSK_ABORT_TASK); + if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun, + SRP_TSK_ABORT_TASK) == 0 || + target->transport_offline) + ret = SUCCESS; + else + ret = FAILED; srp_free_req(target, req, scmnd, 0); scmnd->result = DID_ABORT << 16; scmnd->scsi_done(scmnd); - return SUCCESS; + return ret; } static int srp_reset_device(struct scsi_cmnd *scmnd) -- cgit v1.2.3 From 2742c1dadde602baea6f0547c028154aebd6f1ca Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 12 Jun 2013 15:24:25 +0200 Subject: IB/srp: Skip host settle delay The SRP initiator implements host reset by reconnecting to the SRP target. That means that communication with the target is possible as soon as host reset finished. Hence skip the host settle delay. Signed-off-by: Bart Van Assche Reviewed-by: Sebastian Riemer Reviewed-by: Christoph Hellwig Acked-by: David Dillow Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 759c55b9685f..bc13c8db7fc2 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1951,6 +1951,7 @@ static struct scsi_host_template srp_template = { .eh_abort_handler = srp_abort, .eh_device_reset_handler = srp_reset_device, .eh_host_reset_handler = srp_reset_host, + .skip_settle_delay = true, .sg_tablesize = SRP_DEF_SG_TABLESIZE, .can_queue = SRP_CMD_SQ_SIZE, .this_id = -1, -- cgit v1.2.3 From 99e1c1398f44a056b16e78122133988c82b66d97 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 28 Jun 2013 14:49:58 +0200 Subject: IB/srp: Fail I/O fast if target offline If reconnecting failed we know that no command completion will be received anymore. Hence let the SCSI error handler fail such commands immediately. Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index bc13c8db7fc2..1f331011653e 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1754,6 +1754,8 @@ static int srp_abort(struct scsi_cmnd *scmnd) SRP_TSK_ABORT_TASK) == 0 || target->transport_offline) ret = SUCCESS; + else if (target->transport_offline) + ret = FAST_IO_FAIL; else ret = FAILED; srp_free_req(target, req, scmnd, 0); -- cgit v1.2.3 From 96fc248a4c6fe1e5ffac4903b39d2dd5cbe2dc9a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 28 Jun 2013 14:51:26 +0200 Subject: IB/srp: Maintain a single connection per I_T nexus An SRP target is required to maintain a single connection between initiator and target. This means that if the 'add_target' attribute is used to create a second connection to a target, the first connection will be logged out and that the SCSI error handler will kick in. The SCSI error handler will cause the SRP initiator to reconnect, which will cause I/O over the second connection to fail. Avoid such ping-pong behavior by disabling relogins. If reconnecting manually is necessary, that is possible by deleting and recreating an rport via sysfs. Signed-off-by: Bart Van Assche Signed-off-by: Sebastian Riemer Acked-by: David Dillow Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 44 +++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 1f331011653e..830ef0037087 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -542,11 +542,11 @@ static void srp_remove_work(struct work_struct *work) WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); + srp_remove_target(target); + spin_lock(&target->srp_host->target_lock); list_del(&target->list); spin_unlock(&target->srp_host->target_lock); - - srp_remove_target(target); } static void srp_rport_delete(struct srp_rport *rport) @@ -2009,6 +2009,36 @@ static struct class srp_class = { .dev_release = srp_release_dev }; +/** + * srp_conn_unique() - check whether the connection to a target is unique + */ +static bool srp_conn_unique(struct srp_host *host, + struct srp_target_port *target) +{ + struct srp_target_port *t; + bool ret = false; + + if (target->state == SRP_TARGET_REMOVED) + goto out; + + ret = true; + + spin_lock(&host->target_lock); + list_for_each_entry(t, &host->target_list, list) { + if (t != target && + target->id_ext == t->id_ext && + target->ioc_guid == t->ioc_guid && + target->initiator_ext == t->initiator_ext) { + ret = false; + break; + } + } + spin_unlock(&host->target_lock); + +out: + return ret; +} + /* * Target ports are added by writing * @@ -2265,6 +2295,16 @@ static ssize_t srp_create_target(struct device *dev, if (ret) goto err; + if (!srp_conn_unique(target->srp_host, target)) { + shost_printk(KERN_INFO, target->scsi_host, + PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n", + be64_to_cpu(target->id_ext), + be64_to_cpu(target->ioc_guid), + be64_to_cpu(target->initiator_ext)); + ret = -EEXIST; + goto err; + } + if (!host->srp_dev->fmr_pool && !target->allow_ext_sg && target->cmd_sg_cnt < target->sg_tablesize) { pr_warn("No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); -- cgit v1.2.3 From 4b5e5f41c8e114bb856347134657eb9e1cccc822 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 28 Jun 2013 14:57:42 +0200 Subject: IB/srp: Make HCA completion vector configurable Several InfiniBand HCAs allow configuring the completion vector per CQ. This allows spreading the workload created by IB completion interrupts over multiple MSI-X vectors and hence over multiple CPU cores. In other words, configuring the completion vector properly not only allows reducing latency on an initiator connected to multiple SRP targets but also allows improving throughput. Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier --- Documentation/ABI/stable/sysfs-driver-ib_srp | 7 +++++++ drivers/infiniband/ulp/srp/ib_srp.c | 26 ++++++++++++++++++++++++-- drivers/infiniband/ulp/srp/ib_srp.h | 1 + 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/stable/sysfs-driver-ib_srp b/Documentation/ABI/stable/sysfs-driver-ib_srp index 481aae95c7d1..5c53d28f775c 100644 --- a/Documentation/ABI/stable/sysfs-driver-ib_srp +++ b/Documentation/ABI/stable/sysfs-driver-ib_srp @@ -54,6 +54,13 @@ Description: Interface for making ib_srp connect to a new target. ib_srp. Specifying a value that exceeds cmd_sg_entries is only safe with partial memory descriptor list support enabled (allow_ext_sg=1). + * comp_vector, a number in the range 0..n-1 specifying the + MSI-X completion vector. Some HCA's allocate multiple (n) + MSI-X vectors per HCA port. If the IRQ affinity masks of + these interrupts have been configured such that each MSI-X + interrupt is handled by a different CPU then the comp_vector + parameter can be used to spread the SRP completion workload + over multiple CPU's. What: /sys/class/infiniband_srp/srp--/ibdev Date: January 2, 2006 diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 830ef0037087..9b30366cb017 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -231,14 +231,16 @@ static int srp_create_target_ib(struct srp_target_port *target) return -ENOMEM; recv_cq = ib_create_cq(target->srp_host->srp_dev->dev, - srp_recv_completion, NULL, target, SRP_RQ_SIZE, 0); + srp_recv_completion, NULL, target, SRP_RQ_SIZE, + target->comp_vector); if (IS_ERR(recv_cq)) { ret = PTR_ERR(recv_cq); goto err; } send_cq = ib_create_cq(target->srp_host->srp_dev->dev, - srp_send_completion, NULL, target, SRP_SQ_SIZE, 0); + srp_send_completion, NULL, target, SRP_SQ_SIZE, + target->comp_vector); if (IS_ERR(send_cq)) { ret = PTR_ERR(send_cq); goto err_recv_cq; @@ -1898,6 +1900,14 @@ static ssize_t show_local_ib_device(struct device *dev, return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name); } +static ssize_t show_comp_vector(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sprintf(buf, "%d\n", target->comp_vector); +} + static ssize_t show_cmd_sg_entries(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1924,6 +1934,7 @@ static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL); static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL); static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL); static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL); +static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL); static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL); static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL); @@ -1938,6 +1949,7 @@ static struct device_attribute *srp_host_attrs[] = { &dev_attr_zero_req_lim, &dev_attr_local_ib_port, &dev_attr_local_ib_device, + &dev_attr_comp_vector, &dev_attr_cmd_sg_entries, &dev_attr_allow_ext_sg, NULL @@ -2061,6 +2073,7 @@ enum { SRP_OPT_CMD_SG_ENTRIES = 1 << 9, SRP_OPT_ALLOW_EXT_SG = 1 << 10, SRP_OPT_SG_TABLESIZE = 1 << 11, + SRP_OPT_COMP_VECTOR = 1 << 12, SRP_OPT_ALL = (SRP_OPT_ID_EXT | SRP_OPT_IOC_GUID | SRP_OPT_DGID | @@ -2081,6 +2094,7 @@ static const match_table_t srp_opt_tokens = { { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" }, { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" }, { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" }, + { SRP_OPT_COMP_VECTOR, "comp_vector=%u" }, { SRP_OPT_ERR, NULL } }; @@ -2236,6 +2250,14 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target) target->sg_tablesize = token; break; + case SRP_OPT_COMP_VECTOR: + if (match_int(args, &token) || token < 0) { + pr_warn("bad comp_vector parameter '%s'\n", p); + goto out; + } + target->comp_vector = token; + break; + default: pr_warn("unknown parameter or missing value '%s' in target creation request\n", p); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 66fbedda4571..e641088c14dc 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -156,6 +156,7 @@ struct srp_target_port { char target_name[32]; unsigned int scsi_id; unsigned int sg_tablesize; + int comp_vector; struct ib_sa_path_rec path; __be16 orig_dgid[8]; -- cgit v1.2.3 From e8ca413558de8ea235a1223074c4ed09616b9034 Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Fri, 28 Jun 2013 14:59:08 +0200 Subject: IB/srp: Bump driver version and release date Signed-off-by: Vu Pham Signed-off-by: Bart Van Assche Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 9b30366cb017..9d8b46eafdb2 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -53,8 +53,8 @@ #define DRV_NAME "ib_srp" #define PFX DRV_NAME ": " -#define DRV_VERSION "0.2" -#define DRV_RELDATE "November 1, 2005" +#define DRV_VERSION "1.0" +#define DRV_RELDATE "July 1, 2013" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator " -- cgit v1.2.3 From 0134f16bc91cc15a38c867b81568b791c9b626aa Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Sun, 7 Jul 2013 17:25:52 +0300 Subject: IB/core: Add reserved values to enums for low-level driver use Continue the approach taken by commit d2b57063e4a ("IB/core: Reserve bits in enum ib_qp_create_flags for low-level driver use") and add reserved entries to the ib_qp_type and ib_wr_opcode enums. Low-level drivers can then define macros to use these reserved values, giving proper names to the macros for readability. Also add a range of reserved flags to enum ib_send_flags. The mlx5 IB driver uses the new additions. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- include/rdma/ib_verbs.h | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 98cc4b29fc5b..645c3cedce9c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -610,7 +610,21 @@ enum ib_qp_type { IB_QPT_RAW_PACKET = 8, IB_QPT_XRC_INI = 9, IB_QPT_XRC_TGT, - IB_QPT_MAX + IB_QPT_MAX, + /* Reserve a range for qp types internal to the low level driver. + * These qp types will not be visible at the IB core layer, so the + * IB_QPT_MAX usages should not be affected in the core layer + */ + IB_QPT_RESERVED1 = 0x1000, + IB_QPT_RESERVED2, + IB_QPT_RESERVED3, + IB_QPT_RESERVED4, + IB_QPT_RESERVED5, + IB_QPT_RESERVED6, + IB_QPT_RESERVED7, + IB_QPT_RESERVED8, + IB_QPT_RESERVED9, + IB_QPT_RESERVED10, }; enum ib_qp_create_flags { @@ -766,6 +780,19 @@ enum ib_wr_opcode { IB_WR_MASKED_ATOMIC_CMP_AND_SWP, IB_WR_MASKED_ATOMIC_FETCH_AND_ADD, IB_WR_BIND_MW, + /* reserve values for low level drivers' internal use. + * These values will not be used at all in the ib core layer. + */ + IB_WR_RESERVED1 = 0xf0, + IB_WR_RESERVED2, + IB_WR_RESERVED3, + IB_WR_RESERVED4, + IB_WR_RESERVED5, + IB_WR_RESERVED6, + IB_WR_RESERVED7, + IB_WR_RESERVED8, + IB_WR_RESERVED9, + IB_WR_RESERVED10, }; enum ib_send_flags { @@ -773,7 +800,11 @@ enum ib_send_flags { IB_SEND_SIGNALED = (1<<1), IB_SEND_SOLICITED = (1<<2), IB_SEND_INLINE = (1<<3), - IB_SEND_IP_CSUM = (1<<4) + IB_SEND_IP_CSUM = (1<<4), + + /* reserve bits 26-31 for low level drivers' internal use */ + IB_SEND_RESERVED_START = (1 << 26), + IB_SEND_RESERVED_END = (1 << 31), }; struct ib_sge { -- cgit v1.2.3 From e126ba97dba9edeb6fafa3665b5f8497fc9cdf8c Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 7 Jul 2013 17:25:49 +0300 Subject: mlx5: Add driver for Mellanox Connect-IB adapters The driver is comprised of two kernel modules: mlx5_ib and mlx5_core. This partitioning resembles what we have for mlx4, except that mlx5_ib is the pci device driver and not mlx5_core. mlx5_core is essentially a library that provides general functionality that is intended to be used by other Mellanox devices that will be introduced in the future. mlx5_ib has a similar role as any hardware device under drivers/infiniband/hw. Signed-off-by: Eli Cohen Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz [ Merge in coccinelle fixes from Fengguang Wu . - Roland ] Signed-off-by: Roland Dreier --- MAINTAINERS | 22 + drivers/infiniband/Kconfig | 1 + drivers/infiniband/Makefile | 1 + drivers/infiniband/hw/mlx5/Kconfig | 10 + drivers/infiniband/hw/mlx5/Makefile | 3 + drivers/infiniband/hw/mlx5/ah.c | 92 + drivers/infiniband/hw/mlx5/cq.c | 843 +++++++ drivers/infiniband/hw/mlx5/doorbell.c | 100 + drivers/infiniband/hw/mlx5/mad.c | 139 ++ drivers/infiniband/hw/mlx5/main.c | 1504 ++++++++++++ drivers/infiniband/hw/mlx5/mem.c | 162 ++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 545 +++++ drivers/infiniband/hw/mlx5/mr.c | 1007 ++++++++ drivers/infiniband/hw/mlx5/qp.c | 2524 ++++++++++++++++++++ drivers/infiniband/hw/mlx5/srq.c | 473 ++++ drivers/infiniband/hw/mlx5/user.h | 121 + drivers/net/ethernet/mellanox/Kconfig | 1 + drivers/net/ethernet/mellanox/Makefile | 1 + drivers/net/ethernet/mellanox/mlx5/core/Kconfig | 18 + drivers/net/ethernet/mellanox/mlx5/core/Makefile | 5 + drivers/net/ethernet/mellanox/mlx5/core/alloc.c | 238 ++ drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 1515 ++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/cq.c | 224 ++ drivers/net/ethernet/mellanox/mlx5/core/debugfs.c | 587 +++++ drivers/net/ethernet/mellanox/mlx5/core/eq.c | 521 ++++ drivers/net/ethernet/mellanox/mlx5/core/fw.c | 185 ++ drivers/net/ethernet/mellanox/mlx5/core/health.c | 217 ++ drivers/net/ethernet/mellanox/mlx5/core/mad.c | 78 + drivers/net/ethernet/mellanox/mlx5/core/main.c | 475 ++++ drivers/net/ethernet/mellanox/mlx5/core/mcg.c | 106 + .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 73 + drivers/net/ethernet/mellanox/mlx5/core/mr.c | 136 ++ .../net/ethernet/mellanox/mlx5/core/pagealloc.c | 435 ++++ drivers/net/ethernet/mellanox/mlx5/core/pd.c | 101 + drivers/net/ethernet/mellanox/mlx5/core/port.c | 104 + drivers/net/ethernet/mellanox/mlx5/core/qp.c | 301 +++ drivers/net/ethernet/mellanox/mlx5/core/srq.c | 223 ++ drivers/net/ethernet/mellanox/mlx5/core/uar.c | 223 ++ include/linux/mlx5/cmd.h | 51 + include/linux/mlx5/cq.h | 165 ++ include/linux/mlx5/device.h | 893 +++++++ include/linux/mlx5/doorbell.h | 79 + include/linux/mlx5/driver.h | 769 ++++++ include/linux/mlx5/qp.h | 467 ++++ include/linux/mlx5/srq.h | 41 + 45 files changed, 15779 insertions(+) create mode 100644 drivers/infiniband/hw/mlx5/Kconfig create mode 100644 drivers/infiniband/hw/mlx5/Makefile create mode 100644 drivers/infiniband/hw/mlx5/ah.c create mode 100644 drivers/infiniband/hw/mlx5/cq.c create mode 100644 drivers/infiniband/hw/mlx5/doorbell.c create mode 100644 drivers/infiniband/hw/mlx5/mad.c create mode 100644 drivers/infiniband/hw/mlx5/main.c create mode 100644 drivers/infiniband/hw/mlx5/mem.c create mode 100644 drivers/infiniband/hw/mlx5/mlx5_ib.h create mode 100644 drivers/infiniband/hw/mlx5/mr.c create mode 100644 drivers/infiniband/hw/mlx5/qp.c create mode 100644 drivers/infiniband/hw/mlx5/srq.c create mode 100644 drivers/infiniband/hw/mlx5/user.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/Kconfig create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/Makefile create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/alloc.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/cmd.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/cq.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/debugfs.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/eq.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fw.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/health.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/mad.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/main.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/mcg.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/mr.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/pd.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/port.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/qp.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/srq.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/uar.c create mode 100644 include/linux/mlx5/cmd.h create mode 100644 include/linux/mlx5/cq.h create mode 100644 include/linux/mlx5/device.h create mode 100644 include/linux/mlx5/doorbell.h create mode 100644 include/linux/mlx5/driver.h create mode 100644 include/linux/mlx5/qp.h create mode 100644 include/linux/mlx5/srq.h diff --git a/MAINTAINERS b/MAINTAINERS index 60d6a3393500..b4265364ed27 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5365,6 +5365,28 @@ W: http://linuxtv.org S: Odd Fixes F: drivers/media/radio/radio-miropcm20* +Mellanox MLX5 core VPI driver +M: Eli Cohen +L: netdev@vger.kernel.org +L: linux-rdma@vger.kernel.org +W: http://www.mellanox.com +Q: http://patchwork.ozlabs.org/project/netdev/list/ +Q: http://patchwork.kernel.org/project/linux-rdma/list/ +T: git://openfabrics.org/~eli/connect-ib.git +S: Supported +F: drivers/net/ethernet/mellanox/mlx5/core/ +F: include/linux/mlx5/ + +Mellanox MLX5 IB driver +M: Eli Cohen +L: linux-rdma@vger.kernel.org +W: http://www.mellanox.com +Q: http://patchwork.kernel.org/project/linux-rdma/list/ +T: git://openfabrics.org/~eli/connect-ib.git +S: Supported +F: include/linux/mlx5/ +F: drivers/infiniband/hw/mlx5/ + MODULE SUPPORT M: Rusty Russell S: Maintained diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index c85b56c28099..5ceda710f516 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -50,6 +50,7 @@ source "drivers/infiniband/hw/amso1100/Kconfig" source "drivers/infiniband/hw/cxgb3/Kconfig" source "drivers/infiniband/hw/cxgb4/Kconfig" source "drivers/infiniband/hw/mlx4/Kconfig" +source "drivers/infiniband/hw/mlx5/Kconfig" source "drivers/infiniband/hw/nes/Kconfig" source "drivers/infiniband/hw/ocrdma/Kconfig" diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile index b126fefe0b1c..1fe69888515f 100644 --- a/drivers/infiniband/Makefile +++ b/drivers/infiniband/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_INFINIBAND_AMSO1100) += hw/amso1100/ obj-$(CONFIG_INFINIBAND_CXGB3) += hw/cxgb3/ obj-$(CONFIG_INFINIBAND_CXGB4) += hw/cxgb4/ obj-$(CONFIG_MLX4_INFINIBAND) += hw/mlx4/ +obj-$(CONFIG_MLX5_INFINIBAND) += hw/mlx5/ obj-$(CONFIG_INFINIBAND_NES) += hw/nes/ obj-$(CONFIG_INFINIBAND_OCRDMA) += hw/ocrdma/ obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig new file mode 100644 index 000000000000..8e6aebfaf8a4 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/Kconfig @@ -0,0 +1,10 @@ +config MLX5_INFINIBAND + tristate "Mellanox Connect-IB HCA support" + depends on NETDEVICES && ETHERNET && PCI && X86 + select NET_VENDOR_MELLANOX + select MLX5_CORE + ---help--- + This driver provides low-level InfiniBand support for + Mellanox Connect-IB PCI Express host channel adapters (HCAs). + This is required to use InfiniBand protocols such as + IP-over-IB or SRP with these devices. diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile new file mode 100644 index 000000000000..4ea0135af484 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o + +mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c new file mode 100644 index 000000000000..39ab0caefdf9 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx5_ib.h" + +struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr, + struct mlx5_ib_ah *ah) +{ + if (ah_attr->ah_flags & IB_AH_GRH) { + memcpy(ah->av.rgid, &ah_attr->grh.dgid, 16); + ah->av.grh_gid_fl = cpu_to_be32(ah_attr->grh.flow_label | + (1 << 30) | + ah_attr->grh.sgid_index << 20); + ah->av.hop_limit = ah_attr->grh.hop_limit; + ah->av.tclass = ah_attr->grh.traffic_class; + } + + ah->av.rlid = cpu_to_be16(ah_attr->dlid); + ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f; + ah->av.stat_rate_sl = (ah_attr->static_rate << 4) | (ah_attr->sl & 0xf); + + return &ah->ibah; +} + +struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +{ + struct mlx5_ib_ah *ah; + + ah = kzalloc(sizeof(*ah), GFP_ATOMIC); + if (!ah) + return ERR_PTR(-ENOMEM); + + return create_ib_ah(ah_attr, ah); /* never fails */ +} + +int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) +{ + struct mlx5_ib_ah *ah = to_mah(ibah); + u32 tmp; + + memset(ah_attr, 0, sizeof(*ah_attr)); + + tmp = be32_to_cpu(ah->av.grh_gid_fl); + if (tmp & (1 << 30)) { + ah_attr->ah_flags = IB_AH_GRH; + ah_attr->grh.sgid_index = (tmp >> 20) & 0xff; + ah_attr->grh.flow_label = tmp & 0xfffff; + memcpy(&ah_attr->grh.dgid, ah->av.rgid, 16); + ah_attr->grh.hop_limit = ah->av.hop_limit; + ah_attr->grh.traffic_class = ah->av.tclass; + } + ah_attr->dlid = be16_to_cpu(ah->av.rlid); + ah_attr->static_rate = ah->av.stat_rate_sl >> 4; + ah_attr->sl = ah->av.stat_rate_sl & 0xf; + + return 0; +} + +int mlx5_ib_destroy_ah(struct ib_ah *ah) +{ + kfree(to_mah(ah)); + return 0; +} diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c new file mode 100644 index 000000000000..344ab03948a3 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -0,0 +1,843 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include "mlx5_ib.h" +#include "user.h" + +static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq) +{ + struct ib_cq *ibcq = &to_mibcq(cq)->ibcq; + + ibcq->comp_handler(ibcq, ibcq->cq_context); +} + +static void mlx5_ib_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type) +{ + struct mlx5_ib_cq *cq = container_of(mcq, struct mlx5_ib_cq, mcq); + struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); + struct ib_cq *ibcq = &cq->ibcq; + struct ib_event event; + + if (type != MLX5_EVENT_TYPE_CQ_ERROR) { + mlx5_ib_warn(dev, "Unexpected event type %d on CQ %06x\n", + type, mcq->cqn); + return; + } + + if (ibcq->event_handler) { + event.device = &dev->ib_dev; + event.event = IB_EVENT_CQ_ERR; + event.element.cq = ibcq; + ibcq->event_handler(&event, ibcq->cq_context); + } +} + +static void *get_cqe_from_buf(struct mlx5_ib_cq_buf *buf, int n, int size) +{ + return mlx5_buf_offset(&buf->buf, n * size); +} + +static void *get_cqe(struct mlx5_ib_cq *cq, int n) +{ + return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz); +} + +static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n) +{ + void *cqe = get_cqe(cq, n & cq->ibcq.cqe); + struct mlx5_cqe64 *cqe64; + + cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; + return ((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ + !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe; +} + +static void *next_cqe_sw(struct mlx5_ib_cq *cq) +{ + return get_sw_cqe(cq, cq->mcq.cons_index); +} + +static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx) +{ + switch (wq->wr_data[idx]) { + case MLX5_IB_WR_UMR: + return 0; + + case IB_WR_LOCAL_INV: + return IB_WC_LOCAL_INV; + + case IB_WR_FAST_REG_MR: + return IB_WC_FAST_REG_MR; + + default: + pr_warn("unknown completion status\n"); + return 0; + } +} + +static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe, + struct mlx5_ib_wq *wq, int idx) +{ + wc->wc_flags = 0; + switch (be32_to_cpu(cqe->sop_drop_qpn) >> 24) { + case MLX5_OPCODE_RDMA_WRITE_IMM: + wc->wc_flags |= IB_WC_WITH_IMM; + case MLX5_OPCODE_RDMA_WRITE: + wc->opcode = IB_WC_RDMA_WRITE; + break; + case MLX5_OPCODE_SEND_IMM: + wc->wc_flags |= IB_WC_WITH_IMM; + case MLX5_OPCODE_SEND: + case MLX5_OPCODE_SEND_INVAL: + wc->opcode = IB_WC_SEND; + break; + case MLX5_OPCODE_RDMA_READ: + wc->opcode = IB_WC_RDMA_READ; + wc->byte_len = be32_to_cpu(cqe->byte_cnt); + break; + case MLX5_OPCODE_ATOMIC_CS: + wc->opcode = IB_WC_COMP_SWAP; + wc->byte_len = 8; + break; + case MLX5_OPCODE_ATOMIC_FA: + wc->opcode = IB_WC_FETCH_ADD; + wc->byte_len = 8; + break; + case MLX5_OPCODE_ATOMIC_MASKED_CS: + wc->opcode = IB_WC_MASKED_COMP_SWAP; + wc->byte_len = 8; + break; + case MLX5_OPCODE_ATOMIC_MASKED_FA: + wc->opcode = IB_WC_MASKED_FETCH_ADD; + wc->byte_len = 8; + break; + case MLX5_OPCODE_BIND_MW: + wc->opcode = IB_WC_BIND_MW; + break; + case MLX5_OPCODE_UMR: + wc->opcode = get_umr_comp(wq, idx); + break; + } +} + +enum { + MLX5_GRH_IN_BUFFER = 1, + MLX5_GRH_IN_CQE = 2, +}; + +static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, + struct mlx5_ib_qp *qp) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device); + struct mlx5_ib_srq *srq; + struct mlx5_ib_wq *wq; + u16 wqe_ctr; + u8 g; + + if (qp->ibqp.srq || qp->ibqp.xrcd) { + struct mlx5_core_srq *msrq = NULL; + + if (qp->ibqp.xrcd) { + msrq = mlx5_core_get_srq(&dev->mdev, + be32_to_cpu(cqe->srqn)); + srq = to_mibsrq(msrq); + } else { + srq = to_msrq(qp->ibqp.srq); + } + if (srq) { + wqe_ctr = be16_to_cpu(cqe->wqe_counter); + wc->wr_id = srq->wrid[wqe_ctr]; + mlx5_ib_free_srq_wqe(srq, wqe_ctr); + if (msrq && atomic_dec_and_test(&msrq->refcount)) + complete(&msrq->free); + } + } else { + wq = &qp->rq; + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + } + wc->byte_len = be32_to_cpu(cqe->byte_cnt); + + switch (cqe->op_own >> 4) { + case MLX5_CQE_RESP_WR_IMM: + wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; + wc->wc_flags = IB_WC_WITH_IMM; + wc->ex.imm_data = cqe->imm_inval_pkey; + break; + case MLX5_CQE_RESP_SEND: + wc->opcode = IB_WC_RECV; + wc->wc_flags = 0; + break; + case MLX5_CQE_RESP_SEND_IMM: + wc->opcode = IB_WC_RECV; + wc->wc_flags = IB_WC_WITH_IMM; + wc->ex.imm_data = cqe->imm_inval_pkey; + break; + case MLX5_CQE_RESP_SEND_INV: + wc->opcode = IB_WC_RECV; + wc->wc_flags = IB_WC_WITH_INVALIDATE; + wc->ex.invalidate_rkey = be32_to_cpu(cqe->imm_inval_pkey); + break; + } + wc->slid = be16_to_cpu(cqe->slid); + wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf; + wc->src_qp = be32_to_cpu(cqe->flags_rqpn) & 0xffffff; + wc->dlid_path_bits = cqe->ml_path; + g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3; + wc->wc_flags |= g ? IB_WC_GRH : 0; + wc->pkey_index = be32_to_cpu(cqe->imm_inval_pkey) & 0xffff; +} + +static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe) +{ + __be32 *p = (__be32 *)cqe; + int i; + + mlx5_ib_warn(dev, "dump error cqe\n"); + for (i = 0; i < sizeof(*cqe) / 16; i++, p += 4) + pr_info("%08x %08x %08x %08x\n", be32_to_cpu(p[0]), + be32_to_cpu(p[1]), be32_to_cpu(p[2]), + be32_to_cpu(p[3])); +} + +static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, + struct mlx5_err_cqe *cqe, + struct ib_wc *wc) +{ + int dump = 1; + + switch (cqe->syndrome) { + case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR: + wc->status = IB_WC_LOC_LEN_ERR; + break; + case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR: + wc->status = IB_WC_LOC_QP_OP_ERR; + break; + case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR: + wc->status = IB_WC_LOC_PROT_ERR; + break; + case MLX5_CQE_SYNDROME_WR_FLUSH_ERR: + dump = 0; + wc->status = IB_WC_WR_FLUSH_ERR; + break; + case MLX5_CQE_SYNDROME_MW_BIND_ERR: + wc->status = IB_WC_MW_BIND_ERR; + break; + case MLX5_CQE_SYNDROME_BAD_RESP_ERR: + wc->status = IB_WC_BAD_RESP_ERR; + break; + case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR: + wc->status = IB_WC_LOC_ACCESS_ERR; + break; + case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR: + wc->status = IB_WC_REM_INV_REQ_ERR; + break; + case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR: + wc->status = IB_WC_REM_ACCESS_ERR; + break; + case MLX5_CQE_SYNDROME_REMOTE_OP_ERR: + wc->status = IB_WC_REM_OP_ERR; + break; + case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR: + wc->status = IB_WC_RETRY_EXC_ERR; + dump = 0; + break; + case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR: + wc->status = IB_WC_RNR_RETRY_EXC_ERR; + dump = 0; + break; + case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR: + wc->status = IB_WC_REM_ABORT_ERR; + break; + default: + wc->status = IB_WC_GENERAL_ERR; + break; + } + + wc->vendor_err = cqe->vendor_err_synd; + if (dump) + dump_cqe(dev, cqe); +} + +static int is_atomic_response(struct mlx5_ib_qp *qp, uint16_t idx) +{ + /* TBD: waiting decision + */ + return 0; +} + +static void *mlx5_get_atomic_laddr(struct mlx5_ib_qp *qp, uint16_t idx) +{ + struct mlx5_wqe_data_seg *dpseg; + void *addr; + + dpseg = mlx5_get_send_wqe(qp, idx) + sizeof(struct mlx5_wqe_ctrl_seg) + + sizeof(struct mlx5_wqe_raddr_seg) + + sizeof(struct mlx5_wqe_atomic_seg); + addr = (void *)(unsigned long)be64_to_cpu(dpseg->addr); + return addr; +} + +static void handle_atomic(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, + uint16_t idx) +{ + void *addr; + int byte_count; + int i; + + if (!is_atomic_response(qp, idx)) + return; + + byte_count = be32_to_cpu(cqe64->byte_cnt); + addr = mlx5_get_atomic_laddr(qp, idx); + + if (byte_count == 4) { + *(uint32_t *)addr = be32_to_cpu(*((__be32 *)addr)); + } else { + for (i = 0; i < byte_count; i += 8) { + *(uint64_t *)addr = be64_to_cpu(*((__be64 *)addr)); + addr += 8; + } + } + + return; +} + +static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, + u16 tail, u16 head) +{ + int idx; + + do { + idx = tail & (qp->sq.wqe_cnt - 1); + handle_atomic(qp, cqe64, idx); + if (idx == head) + break; + + tail = qp->sq.w_list[idx].next; + } while (1); + tail = qp->sq.w_list[idx].next; + qp->sq.last_poll = tail; +} + +static int mlx5_poll_one(struct mlx5_ib_cq *cq, + struct mlx5_ib_qp **cur_qp, + struct ib_wc *wc) +{ + struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); + struct mlx5_err_cqe *err_cqe; + struct mlx5_cqe64 *cqe64; + struct mlx5_core_qp *mqp; + struct mlx5_ib_wq *wq; + uint8_t opcode; + uint32_t qpn; + u16 wqe_ctr; + void *cqe; + int idx; + + cqe = next_cqe_sw(cq); + if (!cqe) + return -EAGAIN; + + cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; + + ++cq->mcq.cons_index; + + /* Make sure we read CQ entry contents after we've checked the + * ownership bit. + */ + rmb(); + + /* TBD: resize CQ */ + + qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff; + if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) { + /* We do not have to take the QP table lock here, + * because CQs will be locked while QPs are removed + * from the table. + */ + mqp = __mlx5_qp_lookup(&dev->mdev, qpn); + if (unlikely(!mqp)) { + mlx5_ib_warn(dev, "CQE@CQ %06x for unknown QPN %6x\n", + cq->mcq.cqn, qpn); + return -EINVAL; + } + + *cur_qp = to_mibqp(mqp); + } + + wc->qp = &(*cur_qp)->ibqp; + opcode = cqe64->op_own >> 4; + switch (opcode) { + case MLX5_CQE_REQ: + wq = &(*cur_qp)->sq; + wqe_ctr = be16_to_cpu(cqe64->wqe_counter); + idx = wqe_ctr & (wq->wqe_cnt - 1); + handle_good_req(wc, cqe64, wq, idx); + handle_atomics(*cur_qp, cqe64, wq->last_poll, idx); + wc->wr_id = wq->wrid[idx]; + wq->tail = wq->wqe_head[idx] + 1; + wc->status = IB_WC_SUCCESS; + break; + case MLX5_CQE_RESP_WR_IMM: + case MLX5_CQE_RESP_SEND: + case MLX5_CQE_RESP_SEND_IMM: + case MLX5_CQE_RESP_SEND_INV: + handle_responder(wc, cqe64, *cur_qp); + wc->status = IB_WC_SUCCESS; + break; + case MLX5_CQE_RESIZE_CQ: + break; + case MLX5_CQE_REQ_ERR: + case MLX5_CQE_RESP_ERR: + err_cqe = (struct mlx5_err_cqe *)cqe64; + mlx5_handle_error_cqe(dev, err_cqe, wc); + mlx5_ib_dbg(dev, "%s error cqe on cqn 0x%x:\n", + opcode == MLX5_CQE_REQ_ERR ? + "Requestor" : "Responder", cq->mcq.cqn); + mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n", + err_cqe->syndrome, err_cqe->vendor_err_synd); + if (opcode == MLX5_CQE_REQ_ERR) { + wq = &(*cur_qp)->sq; + wqe_ctr = be16_to_cpu(cqe64->wqe_counter); + idx = wqe_ctr & (wq->wqe_cnt - 1); + wc->wr_id = wq->wrid[idx]; + wq->tail = wq->wqe_head[idx] + 1; + } else { + struct mlx5_ib_srq *srq; + + if ((*cur_qp)->ibqp.srq) { + srq = to_msrq((*cur_qp)->ibqp.srq); + wqe_ctr = be16_to_cpu(cqe64->wqe_counter); + wc->wr_id = srq->wrid[wqe_ctr]; + mlx5_ib_free_srq_wqe(srq, wqe_ctr); + } else { + wq = &(*cur_qp)->rq; + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + } + } + break; + } + + return 0; +} + +int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) +{ + struct mlx5_ib_cq *cq = to_mcq(ibcq); + struct mlx5_ib_qp *cur_qp = NULL; + unsigned long flags; + int npolled; + int err = 0; + + spin_lock_irqsave(&cq->lock, flags); + + for (npolled = 0; npolled < num_entries; npolled++) { + err = mlx5_poll_one(cq, &cur_qp, wc + npolled); + if (err) + break; + } + + if (npolled) + mlx5_cq_set_ci(&cq->mcq); + + spin_unlock_irqrestore(&cq->lock, flags); + + if (err == 0 || err == -EAGAIN) + return npolled; + else + return err; +} + +int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) +{ + mlx5_cq_arm(&to_mcq(ibcq)->mcq, + (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? + MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT, + to_mdev(ibcq->device)->mdev.priv.uuari.uars[0].map, + MLX5_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->mdev.priv.cq_uar_lock)); + + return 0; +} + +static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf, + int nent, int cqe_size) +{ + int err; + + err = mlx5_buf_alloc(&dev->mdev, nent * cqe_size, + PAGE_SIZE * 2, &buf->buf); + if (err) + return err; + + buf->cqe_size = cqe_size; + + return 0; +} + +static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) +{ + mlx5_buf_free(&dev->mdev, &buf->buf); +} + +static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, + struct ib_ucontext *context, struct mlx5_ib_cq *cq, + int entries, struct mlx5_create_cq_mbox_in **cqb, + int *cqe_size, int *index, int *inlen) +{ + struct mlx5_ib_create_cq ucmd; + int page_shift; + int npages; + int ncont; + int err; + + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) + return -EFAULT; + + if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128) + return -EINVAL; + + *cqe_size = ucmd.cqe_size; + + cq->buf.umem = ib_umem_get(context, ucmd.buf_addr, + entries * ucmd.cqe_size, + IB_ACCESS_LOCAL_WRITE, 1); + if (IS_ERR(cq->buf.umem)) { + err = PTR_ERR(cq->buf.umem); + return err; + } + + err = mlx5_ib_db_map_user(to_mucontext(context), ucmd.db_addr, + &cq->db); + if (err) + goto err_umem; + + mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, &npages, &page_shift, + &ncont, NULL); + mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n", + ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont); + + *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * ncont; + *cqb = mlx5_vzalloc(*inlen); + if (!*cqb) { + err = -ENOMEM; + goto err_db; + } + mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0); + (*cqb)->ctx.log_pg_sz = page_shift - PAGE_SHIFT; + + *index = to_mucontext(context)->uuari.uars[0].index; + + return 0; + +err_db: + mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db); + +err_umem: + ib_umem_release(cq->buf.umem); + return err; +} + +static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context) +{ + mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db); + ib_umem_release(cq->buf.umem); +} + +static void init_cq_buf(struct mlx5_ib_cq *cq, int nent) +{ + int i; + void *cqe; + struct mlx5_cqe64 *cqe64; + + for (i = 0; i < nent; i++) { + cqe = get_cqe(cq, i); + cqe64 = (cq->buf.cqe_size == 64) ? cqe : cqe + 64; + cqe64->op_own = 0xf1; + } +} + +static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, + int entries, int cqe_size, + struct mlx5_create_cq_mbox_in **cqb, + int *index, int *inlen) +{ + int err; + + err = mlx5_db_alloc(&dev->mdev, &cq->db); + if (err) + return err; + + cq->mcq.set_ci_db = cq->db.db; + cq->mcq.arm_db = cq->db.db + 1; + *cq->mcq.set_ci_db = 0; + *cq->mcq.arm_db = 0; + cq->mcq.cqe_sz = cqe_size; + + err = alloc_cq_buf(dev, &cq->buf, entries, cqe_size); + if (err) + goto err_db; + + init_cq_buf(cq, entries); + + *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages; + *cqb = mlx5_vzalloc(*inlen); + if (!*cqb) { + err = -ENOMEM; + goto err_buf; + } + mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas); + + (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - PAGE_SHIFT; + *index = dev->mdev.priv.uuari.uars[0].index; + + return 0; + +err_buf: + free_cq_buf(dev, &cq->buf); + +err_db: + mlx5_db_free(&dev->mdev, &cq->db); + return err; +} + +static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq) +{ + free_cq_buf(dev, &cq->buf); + mlx5_db_free(&dev->mdev, &cq->db); +} + +struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries, + int vector, struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct mlx5_create_cq_mbox_in *cqb = NULL; + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_ib_cq *cq; + int uninitialized_var(index); + int uninitialized_var(inlen); + int cqe_size; + int irqn; + int eqn; + int err; + + entries = roundup_pow_of_two(entries + 1); + if (entries < 1 || entries > dev->mdev.caps.max_cqes) + return ERR_PTR(-EINVAL); + + cq = kzalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) + return ERR_PTR(-ENOMEM); + + cq->ibcq.cqe = entries - 1; + mutex_init(&cq->resize_mutex); + spin_lock_init(&cq->lock); + cq->resize_buf = NULL; + cq->resize_umem = NULL; + + if (context) { + err = create_cq_user(dev, udata, context, cq, entries, + &cqb, &cqe_size, &index, &inlen); + if (err) + goto err_create; + } else { + /* for now choose 64 bytes till we have a proper interface */ + cqe_size = 64; + err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb, + &index, &inlen); + if (err) + goto err_create; + } + + cq->cqe_size = cqe_size; + cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5; + cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index); + err = mlx5_vector2eqn(dev, vector, &eqn, &irqn); + if (err) + goto err_cqb; + + cqb->ctx.c_eqn = cpu_to_be16(eqn); + cqb->ctx.db_record_addr = cpu_to_be64(cq->db.dma); + + err = mlx5_core_create_cq(&dev->mdev, &cq->mcq, cqb, inlen); + if (err) + goto err_cqb; + + mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn); + cq->mcq.irqn = irqn; + cq->mcq.comp = mlx5_ib_cq_comp; + cq->mcq.event = mlx5_ib_cq_event; + + if (context) + if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) { + err = -EFAULT; + goto err_cmd; + } + + + mlx5_vfree(cqb); + return &cq->ibcq; + +err_cmd: + mlx5_core_destroy_cq(&dev->mdev, &cq->mcq); + +err_cqb: + mlx5_vfree(cqb); + if (context) + destroy_cq_user(cq, context); + else + destroy_cq_kernel(dev, cq); + +err_create: + kfree(cq); + + return ERR_PTR(err); +} + + +int mlx5_ib_destroy_cq(struct ib_cq *cq) +{ + struct mlx5_ib_dev *dev = to_mdev(cq->device); + struct mlx5_ib_cq *mcq = to_mcq(cq); + struct ib_ucontext *context = NULL; + + if (cq->uobject) + context = cq->uobject->context; + + mlx5_core_destroy_cq(&dev->mdev, &mcq->mcq); + if (context) + destroy_cq_user(mcq, context); + else + destroy_cq_kernel(dev, mcq); + + kfree(mcq); + + return 0; +} + +static int is_equal_rsn(struct mlx5_cqe64 *cqe64, struct mlx5_ib_srq *srq, + u32 rsn) +{ + u32 lrsn; + + if (srq) + lrsn = be32_to_cpu(cqe64->srqn) & 0xffffff; + else + lrsn = be32_to_cpu(cqe64->sop_drop_qpn) & 0xffffff; + + return rsn == lrsn; +} + +void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq) +{ + struct mlx5_cqe64 *cqe64, *dest64; + void *cqe, *dest; + u32 prod_index; + int nfreed = 0; + u8 owner_bit; + + if (!cq) + return; + + /* First we need to find the current producer index, so we + * know where to start cleaning from. It doesn't matter if HW + * adds new entries after this loop -- the QP we're worried + * about is already in RESET, so the new entries won't come + * from our QP and therefore don't need to be checked. + */ + for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); prod_index++) + if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe) + break; + + /* Now sweep backwards through the CQ, removing CQ entries + * that match our QP by copying older entries on top of them. + */ + while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) { + cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); + cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; + if (is_equal_rsn(cqe64, srq, rsn)) { + if (srq) + mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter)); + ++nfreed; + } else if (nfreed) { + dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe); + dest64 = (cq->mcq.cqe_sz == 64) ? dest : dest + 64; + owner_bit = dest64->op_own & MLX5_CQE_OWNER_MASK; + memcpy(dest, cqe, cq->mcq.cqe_sz); + dest64->op_own = owner_bit | + (dest64->op_own & ~MLX5_CQE_OWNER_MASK); + } + } + + if (nfreed) { + cq->mcq.cons_index += nfreed; + /* Make sure update of buffer contents is done before + * updating consumer index. + */ + wmb(); + mlx5_cq_set_ci(&cq->mcq); + } +} + +void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq) +{ + if (!cq) + return; + + spin_lock_irq(&cq->lock); + __mlx5_ib_cq_clean(cq, qpn, srq); + spin_unlock_irq(&cq->lock); +} + +int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) +{ + return -ENOSYS; +} + +int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) +{ + return -ENOSYS; +} + +int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq) +{ + struct mlx5_ib_cq *cq; + + if (!ibcq) + return 128; + + cq = to_mcq(ibcq); + return cq->cqe_size; +} diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c new file mode 100644 index 000000000000..256a23344f28 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/doorbell.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "mlx5_ib.h" + +struct mlx5_ib_user_db_page { + struct list_head list; + struct ib_umem *umem; + unsigned long user_virt; + int refcnt; +}; + +int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, + struct mlx5_db *db) +{ + struct mlx5_ib_user_db_page *page; + struct ib_umem_chunk *chunk; + int err = 0; + + mutex_lock(&context->db_page_mutex); + + list_for_each_entry(page, &context->db_page_list, list) + if (page->user_virt == (virt & PAGE_MASK)) + goto found; + + page = kmalloc(sizeof(*page), GFP_KERNEL); + if (!page) { + err = -ENOMEM; + goto out; + } + + page->user_virt = (virt & PAGE_MASK); + page->refcnt = 0; + page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK, + PAGE_SIZE, 0, 0); + if (IS_ERR(page->umem)) { + err = PTR_ERR(page->umem); + kfree(page); + goto out; + } + + list_add(&page->list, &context->db_page_list); + +found: + chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list); + db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK); + db->u.user_page = page; + ++page->refcnt; + +out: + mutex_unlock(&context->db_page_mutex); + + return err; +} + +void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db) +{ + mutex_lock(&context->db_page_mutex); + + if (!--db->u.user_page->refcnt) { + list_del(&db->u.user_page->list); + ib_umem_release(db->u.user_page->umem); + kfree(db->u.user_page); + } + + mutex_unlock(&context->db_page_mutex); +} diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c new file mode 100644 index 000000000000..5c8938be0e08 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include "mlx5_ib.h" + +enum { + MLX5_IB_VENDOR_CLASS1 = 0x9, + MLX5_IB_VENDOR_CLASS2 = 0xa +}; + +int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, + int port, struct ib_wc *in_wc, struct ib_grh *in_grh, + void *in_mad, void *response_mad) +{ + u8 op_modifier = 0; + + /* Key check traps can't be generated unless we have in_wc to + * tell us where to send the trap. + */ + if (ignore_mkey || !in_wc) + op_modifier |= 0x1; + if (ignore_bkey || !in_wc) + op_modifier |= 0x2; + + return mlx5_core_mad_ifc(&dev->mdev, in_mad, response_mad, op_modifier, port); +} + +int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + struct ib_wc *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + u16 slid; + int err; + + slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); + + if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + + if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && + in_mad->mad_hdr.method != IB_MGMT_METHOD_SET && + in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS) + return IB_MAD_RESULT_SUCCESS; + + /* Don't process SMInfo queries -- the SMA can't handle them. + */ + if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO) + return IB_MAD_RESULT_SUCCESS; + } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || + in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS1 || + in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS2 || + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) { + if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && + in_mad->mad_hdr.method != IB_MGMT_METHOD_SET) + return IB_MAD_RESULT_SUCCESS; + } else { + return IB_MAD_RESULT_SUCCESS; + } + + err = mlx5_MAD_IFC(to_mdev(ibdev), + mad_flags & IB_MAD_IGNORE_MKEY, + mad_flags & IB_MAD_IGNORE_BKEY, + port_num, in_wc, in_grh, in_mad, out_mad); + if (err) + return IB_MAD_RESULT_FAILURE; + + /* set return bit in status of directed route responses */ + if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + out_mad->mad_hdr.status |= cpu_to_be16(1 << 15); + + if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) + /* no response for trap repress */ + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; +} + +int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + u16 packet_error; + + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); + + packet_error = be16_to_cpu(out_mad->status); + + dev->mdev.caps.ext_port_cap[port - 1] = (!err && !packet_error) ? + MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO : 0; + +out: + kfree(in_mad); + kfree(out_mad); + return err; +} diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c new file mode 100644 index 000000000000..6b1007f9bc29 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/main.c @@ -0,0 +1,1504 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "user.h" +#include "mlx5_ib.h" + +#define DRIVER_NAME "mlx5_ib" +#define DRIVER_VERSION "1.0" +#define DRIVER_RELDATE "June 2013" + +MODULE_AUTHOR("Eli Cohen "); +MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(DRIVER_VERSION); + +static int prof_sel = 2; +module_param_named(prof_sel, prof_sel, int, 0444); +MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2"); + +static char mlx5_version[] = + DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v" + DRIVER_VERSION " (" DRIVER_RELDATE ")\n"; + +struct mlx5_profile profile[] = { + [0] = { + .mask = 0, + }, + [1] = { + .mask = MLX5_PROF_MASK_QP_SIZE, + .log_max_qp = 12, + }, + [2] = { + .mask = MLX5_PROF_MASK_QP_SIZE | + MLX5_PROF_MASK_MR_CACHE, + .log_max_qp = 17, + .mr_cache[0] = { + .size = 500, + .limit = 250 + }, + .mr_cache[1] = { + .size = 500, + .limit = 250 + }, + .mr_cache[2] = { + .size = 500, + .limit = 250 + }, + .mr_cache[3] = { + .size = 500, + .limit = 250 + }, + .mr_cache[4] = { + .size = 500, + .limit = 250 + }, + .mr_cache[5] = { + .size = 500, + .limit = 250 + }, + .mr_cache[6] = { + .size = 500, + .limit = 250 + }, + .mr_cache[7] = { + .size = 500, + .limit = 250 + }, + .mr_cache[8] = { + .size = 500, + .limit = 250 + }, + .mr_cache[9] = { + .size = 500, + .limit = 250 + }, + .mr_cache[10] = { + .size = 500, + .limit = 250 + }, + .mr_cache[11] = { + .size = 500, + .limit = 250 + }, + .mr_cache[12] = { + .size = 64, + .limit = 32 + }, + .mr_cache[13] = { + .size = 32, + .limit = 16 + }, + .mr_cache[14] = { + .size = 16, + .limit = 8 + }, + .mr_cache[15] = { + .size = 8, + .limit = 4 + }, + }, +}; + +int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn) +{ + struct mlx5_eq_table *table = &dev->mdev.priv.eq_table; + struct mlx5_eq *eq, *n; + int err = -ENOENT; + + spin_lock(&table->lock); + list_for_each_entry_safe(eq, n, &dev->eqs_list, list) { + if (eq->index == vector) { + *eqn = eq->eqn; + *irqn = eq->irqn; + err = 0; + break; + } + } + spin_unlock(&table->lock); + + return err; +} + +static int alloc_comp_eqs(struct mlx5_ib_dev *dev) +{ + struct mlx5_eq_table *table = &dev->mdev.priv.eq_table; + struct mlx5_eq *eq, *n; + int ncomp_vec; + int nent; + int err; + int i; + + INIT_LIST_HEAD(&dev->eqs_list); + ncomp_vec = table->num_comp_vectors; + nent = MLX5_COMP_EQ_SIZE; + for (i = 0; i < ncomp_vec; i++) { + eq = kzalloc(sizeof(*eq), GFP_KERNEL); + if (!eq) { + err = -ENOMEM; + goto clean; + } + + snprintf(eq->name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i); + err = mlx5_create_map_eq(&dev->mdev, eq, + i + MLX5_EQ_VEC_COMP_BASE, nent, 0, + eq->name, + &dev->mdev.priv.uuari.uars[0]); + if (err) { + kfree(eq); + goto clean; + } + mlx5_ib_dbg(dev, "allocated completion EQN %d\n", eq->eqn); + eq->index = i; + spin_lock(&table->lock); + list_add_tail(&eq->list, &dev->eqs_list); + spin_unlock(&table->lock); + } + + dev->num_comp_vectors = ncomp_vec; + return 0; + +clean: + spin_lock(&table->lock); + list_for_each_entry_safe(eq, n, &dev->eqs_list, list) { + list_del(&eq->list); + spin_unlock(&table->lock); + if (mlx5_destroy_unmap_eq(&dev->mdev, eq)) + mlx5_ib_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn); + kfree(eq); + spin_lock(&table->lock); + } + spin_unlock(&table->lock); + return err; +} + +static void free_comp_eqs(struct mlx5_ib_dev *dev) +{ + struct mlx5_eq_table *table = &dev->mdev.priv.eq_table; + struct mlx5_eq *eq, *n; + + spin_lock(&table->lock); + list_for_each_entry_safe(eq, n, &dev->eqs_list, list) { + list_del(&eq->list); + spin_unlock(&table->lock); + if (mlx5_destroy_unmap_eq(&dev->mdev, eq)) + mlx5_ib_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn); + kfree(eq); + spin_lock(&table->lock); + } + spin_unlock(&table->lock); +} + +static int mlx5_ib_query_device(struct ib_device *ibdev, + struct ib_device_attr *props) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + int max_rq_sg; + int max_sq_sg; + u64 flags; + + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; + + err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + memset(props, 0, sizeof(*props)); + + props->fw_ver = ((u64)fw_rev_maj(&dev->mdev) << 32) | + (fw_rev_min(&dev->mdev) << 16) | + fw_rev_sub(&dev->mdev); + props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | + IB_DEVICE_PORT_ACTIVE_EVENT | + IB_DEVICE_SYS_IMAGE_GUID | + IB_DEVICE_RC_RNR_NAK_GEN | + IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; + flags = dev->mdev.caps.flags; + if (flags & MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR) + props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; + if (flags & MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR) + props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; + if (flags & MLX5_DEV_CAP_FLAG_APM) + props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; + props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; + if (flags & MLX5_DEV_CAP_FLAG_XRC) + props->device_cap_flags |= IB_DEVICE_XRC; + props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; + + props->vendor_id = be32_to_cpup((__be32 *)(out_mad->data + 36)) & + 0xffffff; + props->vendor_part_id = be16_to_cpup((__be16 *)(out_mad->data + 30)); + props->hw_ver = be32_to_cpup((__be32 *)(out_mad->data + 32)); + memcpy(&props->sys_image_guid, out_mad->data + 4, 8); + + props->max_mr_size = ~0ull; + props->page_size_cap = dev->mdev.caps.min_page_sz; + props->max_qp = 1 << dev->mdev.caps.log_max_qp; + props->max_qp_wr = dev->mdev.caps.max_wqes; + max_rq_sg = dev->mdev.caps.max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg); + max_sq_sg = (dev->mdev.caps.max_sq_desc_sz - sizeof(struct mlx5_wqe_ctrl_seg)) / + sizeof(struct mlx5_wqe_data_seg); + props->max_sge = min(max_rq_sg, max_sq_sg); + props->max_cq = 1 << dev->mdev.caps.log_max_cq; + props->max_cqe = dev->mdev.caps.max_cqes - 1; + props->max_mr = 1 << dev->mdev.caps.log_max_mkey; + props->max_pd = 1 << dev->mdev.caps.log_max_pd; + props->max_qp_rd_atom = dev->mdev.caps.max_ra_req_qp; + props->max_qp_init_rd_atom = dev->mdev.caps.max_ra_res_qp; + props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; + props->max_srq = 1 << dev->mdev.caps.log_max_srq; + props->max_srq_wr = dev->mdev.caps.max_srq_wqes - 1; + props->max_srq_sge = max_rq_sg - 1; + props->max_fast_reg_page_list_len = (unsigned int)-1; + props->local_ca_ack_delay = dev->mdev.caps.local_ca_ack_delay; + props->atomic_cap = dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_ATOMIC ? + IB_ATOMIC_HCA : IB_ATOMIC_NONE; + props->masked_atomic_cap = IB_ATOMIC_HCA; + props->max_pkeys = be16_to_cpup((__be16 *)(out_mad->data + 28)); + props->max_mcast_grp = 1 << dev->mdev.caps.log_max_mcg; + props->max_mcast_qp_attach = dev->mdev.caps.max_qp_mcg; + props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * + props->max_mcast_grp; + props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */ + +out: + kfree(in_mad); + kfree(out_mad); + + return err; +} + +int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int ext_active_speed; + int err = -ENOMEM; + + if (port < 1 || port > dev->mdev.caps.num_ports) { + mlx5_ib_warn(dev, "invalid port number %d\n", port); + return -EINVAL; + } + + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + memset(props, 0, sizeof(*props)); + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx5_MAD_IFC(dev, 1, 1, port, NULL, NULL, in_mad, out_mad); + if (err) { + mlx5_ib_warn(dev, "err %d\n", err); + goto out; + } + + + props->lid = be16_to_cpup((__be16 *)(out_mad->data + 16)); + props->lmc = out_mad->data[34] & 0x7; + props->sm_lid = be16_to_cpup((__be16 *)(out_mad->data + 18)); + props->sm_sl = out_mad->data[36] & 0xf; + props->state = out_mad->data[32] & 0xf; + props->phys_state = out_mad->data[33] >> 4; + props->port_cap_flags = be32_to_cpup((__be32 *)(out_mad->data + 20)); + props->gid_tbl_len = out_mad->data[50]; + props->max_msg_sz = 1 << to_mdev(ibdev)->mdev.caps.log_max_msg; + props->pkey_tbl_len = to_mdev(ibdev)->mdev.caps.port[port - 1].pkey_table_len; + props->bad_pkey_cntr = be16_to_cpup((__be16 *)(out_mad->data + 46)); + props->qkey_viol_cntr = be16_to_cpup((__be16 *)(out_mad->data + 48)); + props->active_width = out_mad->data[31] & 0xf; + props->active_speed = out_mad->data[35] >> 4; + props->max_mtu = out_mad->data[41] & 0xf; + props->active_mtu = out_mad->data[36] >> 4; + props->subnet_timeout = out_mad->data[51] & 0x1f; + props->max_vl_num = out_mad->data[37] >> 4; + props->init_type_reply = out_mad->data[41] >> 4; + + /* Check if extended speeds (EDR/FDR/...) are supported */ + if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) { + ext_active_speed = out_mad->data[62] >> 4; + + switch (ext_active_speed) { + case 1: + props->active_speed = 16; /* FDR */ + break; + case 2: + props->active_speed = 32; /* EDR */ + break; + } + } + + /* If reported active speed is QDR, check if is FDR-10 */ + if (props->active_speed == 4) { + if (dev->mdev.caps.ext_port_cap[port - 1] & + MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) { + init_query_mad(in_mad); + in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx5_MAD_IFC(dev, 1, 1, port, + NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + /* Checking LinkSpeedActive for FDR-10 */ + if (out_mad->data[15] & 0x1) + props->active_speed = 8; + } + } + +out: + kfree(in_mad); + kfree(out_mad); + + return err; +} + +static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + memcpy(gid->raw, out_mad->data + 8, 8); + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; + in_mad->attr_mod = cpu_to_be32(index / 8); + + err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8); + +out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE; + in_mad->attr_mod = cpu_to_be32(index / 32); + + err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + *pkey = be16_to_cpu(((__be16 *)out_mad->data)[index % 32]); + +out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +struct mlx5_reg_node_desc { + u8 desc[64]; +}; + +static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask, + struct ib_device_modify *props) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_reg_node_desc in; + struct mlx5_reg_node_desc out; + int err; + + if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) + return -EOPNOTSUPP; + + if (!(mask & IB_DEVICE_MODIFY_NODE_DESC)) + return 0; + + /* + * If possible, pass node desc to FW, so it can generate + * a 144 trap. If cmd fails, just ignore. + */ + memcpy(&in, props->node_desc, 64); + err = mlx5_core_access_reg(&dev->mdev, &in, sizeof(in), &out, + sizeof(out), MLX5_REG_NODE_DESC, 0, 1); + if (err) + return err; + + memcpy(ibdev->node_desc, props->node_desc, 64); + + return err; +} + +static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, + struct ib_port_modify *props) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct ib_port_attr attr; + u32 tmp; + int err; + + mutex_lock(&dev->cap_mask_mutex); + + err = mlx5_ib_query_port(ibdev, port, &attr); + if (err) + goto out; + + tmp = (attr.port_cap_flags | props->set_port_cap_mask) & + ~props->clr_port_cap_mask; + + err = mlx5_set_port_caps(&dev->mdev, port, tmp); + +out: + mutex_unlock(&dev->cap_mask_mutex); + return err; +} + +static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_ib_alloc_ucontext_req req; + struct mlx5_ib_alloc_ucontext_resp resp; + struct mlx5_ib_ucontext *context; + struct mlx5_uuar_info *uuari; + struct mlx5_uar *uars; + int num_uars; + int uuarn; + int err; + int i; + + if (!dev->ib_active) + return ERR_PTR(-EAGAIN); + + err = ib_copy_from_udata(&req, udata, sizeof(req)); + if (err) + return ERR_PTR(err); + + if (req.total_num_uuars > MLX5_MAX_UUARS) + return ERR_PTR(-ENOMEM); + + if (req.total_num_uuars == 0) + return ERR_PTR(-EINVAL); + + req.total_num_uuars = ALIGN(req.total_num_uuars, MLX5_BF_REGS_PER_PAGE); + if (req.num_low_latency_uuars > req.total_num_uuars - 1) + return ERR_PTR(-EINVAL); + + num_uars = req.total_num_uuars / MLX5_BF_REGS_PER_PAGE; + resp.qp_tab_size = 1 << dev->mdev.caps.log_max_qp; + resp.bf_reg_size = dev->mdev.caps.bf_reg_size; + resp.cache_line_size = L1_CACHE_BYTES; + resp.max_sq_desc_sz = dev->mdev.caps.max_sq_desc_sz; + resp.max_rq_desc_sz = dev->mdev.caps.max_rq_desc_sz; + resp.max_send_wqebb = dev->mdev.caps.max_wqes; + resp.max_recv_wr = dev->mdev.caps.max_wqes; + resp.max_srq_recv_wr = dev->mdev.caps.max_srq_wqes; + + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return ERR_PTR(-ENOMEM); + + uuari = &context->uuari; + mutex_init(&uuari->lock); + uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL); + if (!uars) { + err = -ENOMEM; + goto out_ctx; + } + + uuari->bitmap = kcalloc(BITS_TO_LONGS(req.total_num_uuars), + sizeof(*uuari->bitmap), + GFP_KERNEL); + if (!uuari->bitmap) { + err = -ENOMEM; + goto out_uar_ctx; + } + /* + * clear all fast path uuars + */ + for (i = 0; i < req.total_num_uuars; i++) { + uuarn = i & 3; + if (uuarn == 2 || uuarn == 3) + set_bit(i, uuari->bitmap); + } + + uuari->count = kcalloc(req.total_num_uuars, sizeof(*uuari->count), GFP_KERNEL); + if (!uuari->count) { + err = -ENOMEM; + goto out_bitmap; + } + + for (i = 0; i < num_uars; i++) { + err = mlx5_cmd_alloc_uar(&dev->mdev, &uars[i].index); + if (err) + goto out_count; + } + + INIT_LIST_HEAD(&context->db_page_list); + mutex_init(&context->db_page_mutex); + + resp.tot_uuars = req.total_num_uuars; + resp.num_ports = dev->mdev.caps.num_ports; + err = ib_copy_to_udata(udata, &resp, sizeof(resp)); + if (err) + goto out_uars; + + uuari->num_low_latency_uuars = req.num_low_latency_uuars; + uuari->uars = uars; + uuari->num_uars = num_uars; + return &context->ibucontext; + +out_uars: + for (i--; i >= 0; i--) + mlx5_cmd_free_uar(&dev->mdev, uars[i].index); +out_count: + kfree(uuari->count); + +out_bitmap: + kfree(uuari->bitmap); + +out_uar_ctx: + kfree(uars); + +out_ctx: + kfree(context); + return ERR_PTR(err); +} + +static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) +{ + struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); + struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); + struct mlx5_uuar_info *uuari = &context->uuari; + int i; + + for (i = 0; i < uuari->num_uars; i++) { + if (mlx5_cmd_free_uar(&dev->mdev, uuari->uars[i].index)) + mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index); + } + + kfree(uuari->count); + kfree(uuari->bitmap); + kfree(uuari->uars); + kfree(context); + + return 0; +} + +static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index) +{ + return (pci_resource_start(dev->mdev.pdev, 0) >> PAGE_SHIFT) + index; +} + +static int get_command(unsigned long offset) +{ + return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK; +} + +static int get_arg(unsigned long offset) +{ + return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1); +} + +static int get_index(unsigned long offset) +{ + return get_arg(offset); +} + +static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) +{ + struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); + struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); + struct mlx5_uuar_info *uuari = &context->uuari; + unsigned long command; + unsigned long idx; + phys_addr_t pfn; + + command = get_command(vma->vm_pgoff); + switch (command) { + case MLX5_IB_MMAP_REGULAR_PAGE: + if (vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; + + idx = get_index(vma->vm_pgoff); + pfn = uar_index2pfn(dev, uuari->uars[idx].index); + mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx, + (unsigned long long)pfn); + + if (idx >= uuari->num_uars) + return -EINVAL; + + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + if (io_remap_pfn_range(vma, vma->vm_start, pfn, + PAGE_SIZE, vma->vm_page_prot)) + return -EAGAIN; + + mlx5_ib_dbg(dev, "mapped WC at 0x%lx, PA 0x%llx\n", + vma->vm_start, + (unsigned long long)pfn << PAGE_SHIFT); + break; + + case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES: + return -ENOSYS; + + default: + return -EINVAL; + } + + return 0; +} + +static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn) +{ + struct mlx5_create_mkey_mbox_in *in; + struct mlx5_mkey_seg *seg; + struct mlx5_core_mr mr; + int err; + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + seg = &in->seg; + seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA; + seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64); + seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + seg->start_addr = 0; + + err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in)); + if (err) { + mlx5_ib_warn(dev, "failed to create mkey, %d\n", err); + goto err_in; + } + + kfree(in); + *key = mr.key; + + return 0; + +err_in: + kfree(in); + + return err; +} + +static void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key) +{ + struct mlx5_core_mr mr; + int err; + + memset(&mr, 0, sizeof(mr)); + mr.key = key; + err = mlx5_core_destroy_mkey(&dev->mdev, &mr); + if (err) + mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key); +} + +static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct mlx5_ib_alloc_pd_resp resp; + struct mlx5_ib_pd *pd; + int err; + + pd = kmalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) + return ERR_PTR(-ENOMEM); + + err = mlx5_core_alloc_pd(&to_mdev(ibdev)->mdev, &pd->pdn); + if (err) { + kfree(pd); + return ERR_PTR(err); + } + + if (context) { + resp.pdn = pd->pdn; + if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { + mlx5_core_dealloc_pd(&to_mdev(ibdev)->mdev, pd->pdn); + kfree(pd); + return ERR_PTR(-EFAULT); + } + } else { + err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn); + if (err) { + mlx5_core_dealloc_pd(&to_mdev(ibdev)->mdev, pd->pdn); + kfree(pd); + return ERR_PTR(err); + } + } + + return &pd->ibpd; +} + +static int mlx5_ib_dealloc_pd(struct ib_pd *pd) +{ + struct mlx5_ib_dev *mdev = to_mdev(pd->device); + struct mlx5_ib_pd *mpd = to_mpd(pd); + + if (!pd->uobject) + free_pa_mkey(mdev, mpd->pa_lkey); + + mlx5_core_dealloc_pd(&mdev->mdev, mpd->pdn); + kfree(mpd); + + return 0; +} + +static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + int err; + + err = mlx5_core_attach_mcg(&dev->mdev, gid, ibqp->qp_num); + if (err) + mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n", + ibqp->qp_num, gid->raw); + + return err; +} + +static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + int err; + + err = mlx5_core_detach_mcg(&dev->mdev, gid, ibqp->qp_num); + if (err) + mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n", + ibqp->qp_num, gid->raw); + + return err; +} + +static int init_node_data(struct mlx5_ib_dev *dev) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_NODE_DESC; + + err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + memcpy(dev->ib_dev.node_desc, out_mad->data, 64); + + in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; + + err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + dev->mdev.rev_id = be32_to_cpup((__be32 *)(out_mad->data + 32)); + memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8); + +out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct mlx5_ib_dev *dev = + container_of(device, struct mlx5_ib_dev, ib_dev.dev); + + return sprintf(buf, "%d\n", dev->mdev.priv.fw_pages); +} + +static ssize_t show_reg_pages(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct mlx5_ib_dev *dev = + container_of(device, struct mlx5_ib_dev, ib_dev.dev); + + return sprintf(buf, "%d\n", dev->mdev.priv.reg_pages); +} + +static ssize_t show_hca(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct mlx5_ib_dev *dev = + container_of(device, struct mlx5_ib_dev, ib_dev.dev); + return sprintf(buf, "MT%d\n", dev->mdev.pdev->device); +} + +static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct mlx5_ib_dev *dev = + container_of(device, struct mlx5_ib_dev, ib_dev.dev); + return sprintf(buf, "%d.%d.%d\n", fw_rev_maj(&dev->mdev), + fw_rev_min(&dev->mdev), fw_rev_sub(&dev->mdev)); +} + +static ssize_t show_rev(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct mlx5_ib_dev *dev = + container_of(device, struct mlx5_ib_dev, ib_dev.dev); + return sprintf(buf, "%x\n", dev->mdev.rev_id); +} + +static ssize_t show_board(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct mlx5_ib_dev *dev = + container_of(device, struct mlx5_ib_dev, ib_dev.dev); + return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN, + dev->mdev.board_id); +} + +static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); +static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); +static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); +static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); +static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL); +static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL); + +static struct device_attribute *mlx5_class_attributes[] = { + &dev_attr_hw_rev, + &dev_attr_fw_ver, + &dev_attr_hca_type, + &dev_attr_board_id, + &dev_attr_fw_pages, + &dev_attr_reg_pages, +}; + +static void mlx5_ib_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, + void *data) +{ + struct mlx5_ib_dev *ibdev = container_of(dev, struct mlx5_ib_dev, mdev); + struct ib_event ibev; + u8 port = 0; + + switch (event) { + case MLX5_DEV_EVENT_SYS_ERROR: + ibdev->ib_active = false; + ibev.event = IB_EVENT_DEVICE_FATAL; + break; + + case MLX5_DEV_EVENT_PORT_UP: + ibev.event = IB_EVENT_PORT_ACTIVE; + port = *(u8 *)data; + break; + + case MLX5_DEV_EVENT_PORT_DOWN: + ibev.event = IB_EVENT_PORT_ERR; + port = *(u8 *)data; + break; + + case MLX5_DEV_EVENT_PORT_INITIALIZED: + /* not used by ULPs */ + return; + + case MLX5_DEV_EVENT_LID_CHANGE: + ibev.event = IB_EVENT_LID_CHANGE; + port = *(u8 *)data; + break; + + case MLX5_DEV_EVENT_PKEY_CHANGE: + ibev.event = IB_EVENT_PKEY_CHANGE; + port = *(u8 *)data; + break; + + case MLX5_DEV_EVENT_GUID_CHANGE: + ibev.event = IB_EVENT_GID_CHANGE; + port = *(u8 *)data; + break; + + case MLX5_DEV_EVENT_CLIENT_REREG: + ibev.event = IB_EVENT_CLIENT_REREGISTER; + port = *(u8 *)data; + break; + } + + ibev.device = &ibdev->ib_dev; + ibev.element.port_num = port; + + if (ibdev->ib_active) + ib_dispatch_event(&ibev); +} + +static void get_ext_port_caps(struct mlx5_ib_dev *dev) +{ + int port; + + for (port = 1; port <= dev->mdev.caps.num_ports; port++) + mlx5_query_ext_port_caps(dev, port); +} + +static int get_port_caps(struct mlx5_ib_dev *dev) +{ + struct ib_device_attr *dprops = NULL; + struct ib_port_attr *pprops = NULL; + int err = 0; + int port; + + pprops = kmalloc(sizeof(*pprops), GFP_KERNEL); + if (!pprops) + goto out; + + dprops = kmalloc(sizeof(*dprops), GFP_KERNEL); + if (!dprops) + goto out; + + err = mlx5_ib_query_device(&dev->ib_dev, dprops); + if (err) { + mlx5_ib_warn(dev, "query_device failed %d\n", err); + goto out; + } + + for (port = 1; port <= dev->mdev.caps.num_ports; port++) { + err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); + if (err) { + mlx5_ib_warn(dev, "query_port %d failed %d\n", port, err); + break; + } + dev->mdev.caps.port[port - 1].pkey_table_len = dprops->max_pkeys; + dev->mdev.caps.port[port - 1].gid_table_len = pprops->gid_tbl_len; + mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n", + dprops->max_pkeys, pprops->gid_tbl_len); + } + +out: + kfree(pprops); + kfree(dprops); + + return err; +} + +static void destroy_umrc_res(struct mlx5_ib_dev *dev) +{ + int err; + + err = mlx5_mr_cache_cleanup(dev); + if (err) + mlx5_ib_warn(dev, "mr cache cleanup failed\n"); + + mlx5_ib_destroy_qp(dev->umrc.qp); + ib_destroy_cq(dev->umrc.cq); + ib_dereg_mr(dev->umrc.mr); + ib_dealloc_pd(dev->umrc.pd); +} + +enum { + MAX_UMR_WR = 128, +}; + +static int create_umr_res(struct mlx5_ib_dev *dev) +{ + struct ib_qp_init_attr *init_attr = NULL; + struct ib_qp_attr *attr = NULL; + struct ib_pd *pd; + struct ib_cq *cq; + struct ib_qp *qp; + struct ib_mr *mr; + int ret; + + attr = kzalloc(sizeof(*attr), GFP_KERNEL); + init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL); + if (!attr || !init_attr) { + ret = -ENOMEM; + goto error_0; + } + + pd = ib_alloc_pd(&dev->ib_dev); + if (IS_ERR(pd)) { + mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); + ret = PTR_ERR(pd); + goto error_0; + } + + mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(mr)) { + mlx5_ib_dbg(dev, "Couldn't create DMA MR for sync UMR QP\n"); + ret = PTR_ERR(mr); + goto error_1; + } + + cq = ib_create_cq(&dev->ib_dev, mlx5_umr_cq_handler, NULL, NULL, 128, + 0); + if (IS_ERR(cq)) { + mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); + ret = PTR_ERR(cq); + goto error_2; + } + ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); + + init_attr->send_cq = cq; + init_attr->recv_cq = cq; + init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; + init_attr->cap.max_send_wr = MAX_UMR_WR; + init_attr->cap.max_send_sge = 1; + init_attr->qp_type = MLX5_IB_QPT_REG_UMR; + init_attr->port_num = 1; + qp = mlx5_ib_create_qp(pd, init_attr, NULL); + if (IS_ERR(qp)) { + mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n"); + ret = PTR_ERR(qp); + goto error_3; + } + qp->device = &dev->ib_dev; + qp->real_qp = qp; + qp->uobject = NULL; + qp->qp_type = MLX5_IB_QPT_REG_UMR; + + attr->qp_state = IB_QPS_INIT; + attr->port_num = 1; + ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX | + IB_QP_PORT, NULL); + if (ret) { + mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); + goto error_4; + } + + memset(attr, 0, sizeof(*attr)); + attr->qp_state = IB_QPS_RTR; + attr->path_mtu = IB_MTU_256; + + ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); + if (ret) { + mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n"); + goto error_4; + } + + memset(attr, 0, sizeof(*attr)); + attr->qp_state = IB_QPS_RTS; + ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); + if (ret) { + mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n"); + goto error_4; + } + + dev->umrc.qp = qp; + dev->umrc.cq = cq; + dev->umrc.mr = mr; + dev->umrc.pd = pd; + + sema_init(&dev->umrc.sem, MAX_UMR_WR); + ret = mlx5_mr_cache_init(dev); + if (ret) { + mlx5_ib_warn(dev, "mr cache init failed %d\n", ret); + goto error_4; + } + + kfree(attr); + kfree(init_attr); + + return 0; + +error_4: + mlx5_ib_destroy_qp(qp); + +error_3: + ib_destroy_cq(cq); + +error_2: + ib_dereg_mr(mr); + +error_1: + ib_dealloc_pd(pd); + +error_0: + kfree(attr); + kfree(init_attr); + return ret; +} + +static int create_dev_resources(struct mlx5_ib_resources *devr) +{ + struct ib_srq_init_attr attr; + struct mlx5_ib_dev *dev; + int ret = 0; + + dev = container_of(devr, struct mlx5_ib_dev, devr); + + devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL); + if (IS_ERR(devr->p0)) { + ret = PTR_ERR(devr->p0); + goto error0; + } + devr->p0->device = &dev->ib_dev; + devr->p0->uobject = NULL; + atomic_set(&devr->p0->usecnt, 0); + + devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, 1, 0, NULL, NULL); + if (IS_ERR(devr->c0)) { + ret = PTR_ERR(devr->c0); + goto error1; + } + devr->c0->device = &dev->ib_dev; + devr->c0->uobject = NULL; + devr->c0->comp_handler = NULL; + devr->c0->event_handler = NULL; + devr->c0->cq_context = NULL; + atomic_set(&devr->c0->usecnt, 0); + + devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL); + if (IS_ERR(devr->x0)) { + ret = PTR_ERR(devr->x0); + goto error2; + } + devr->x0->device = &dev->ib_dev; + devr->x0->inode = NULL; + atomic_set(&devr->x0->usecnt, 0); + mutex_init(&devr->x0->tgt_qp_mutex); + INIT_LIST_HEAD(&devr->x0->tgt_qp_list); + + devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL); + if (IS_ERR(devr->x1)) { + ret = PTR_ERR(devr->x1); + goto error3; + } + devr->x1->device = &dev->ib_dev; + devr->x1->inode = NULL; + atomic_set(&devr->x1->usecnt, 0); + mutex_init(&devr->x1->tgt_qp_mutex); + INIT_LIST_HEAD(&devr->x1->tgt_qp_list); + + memset(&attr, 0, sizeof(attr)); + attr.attr.max_sge = 1; + attr.attr.max_wr = 1; + attr.srq_type = IB_SRQT_XRC; + attr.ext.xrc.cq = devr->c0; + attr.ext.xrc.xrcd = devr->x0; + + devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL); + if (IS_ERR(devr->s0)) { + ret = PTR_ERR(devr->s0); + goto error4; + } + devr->s0->device = &dev->ib_dev; + devr->s0->pd = devr->p0; + devr->s0->uobject = NULL; + devr->s0->event_handler = NULL; + devr->s0->srq_context = NULL; + devr->s0->srq_type = IB_SRQT_XRC; + devr->s0->ext.xrc.xrcd = devr->x0; + devr->s0->ext.xrc.cq = devr->c0; + atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt); + atomic_inc(&devr->s0->ext.xrc.cq->usecnt); + atomic_inc(&devr->p0->usecnt); + atomic_set(&devr->s0->usecnt, 0); + + return 0; + +error4: + mlx5_ib_dealloc_xrcd(devr->x1); +error3: + mlx5_ib_dealloc_xrcd(devr->x0); +error2: + mlx5_ib_destroy_cq(devr->c0); +error1: + mlx5_ib_dealloc_pd(devr->p0); +error0: + return ret; +} + +static void destroy_dev_resources(struct mlx5_ib_resources *devr) +{ + mlx5_ib_destroy_srq(devr->s0); + mlx5_ib_dealloc_xrcd(devr->x0); + mlx5_ib_dealloc_xrcd(devr->x1); + mlx5_ib_destroy_cq(devr->c0); + mlx5_ib_dealloc_pd(devr->p0); +} + +static int init_one(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct mlx5_core_dev *mdev; + struct mlx5_ib_dev *dev; + int err; + int i; + + printk_once(KERN_INFO "%s", mlx5_version); + + dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); + if (!dev) + return -ENOMEM; + + mdev = &dev->mdev; + mdev->event = mlx5_ib_event; + if (prof_sel >= ARRAY_SIZE(profile)) { + pr_warn("selected pofile out of range, selceting default\n"); + prof_sel = 0; + } + mdev->profile = &profile[prof_sel]; + err = mlx5_dev_init(mdev, pdev); + if (err) + goto err_free; + + err = get_port_caps(dev); + if (err) + goto err_cleanup; + + get_ext_port_caps(dev); + + err = alloc_comp_eqs(dev); + if (err) + goto err_cleanup; + + MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock); + + strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX); + dev->ib_dev.owner = THIS_MODULE; + dev->ib_dev.node_type = RDMA_NODE_IB_CA; + dev->ib_dev.local_dma_lkey = mdev->caps.reserved_lkey; + dev->num_ports = mdev->caps.num_ports; + dev->ib_dev.phys_port_cnt = dev->num_ports; + dev->ib_dev.num_comp_vectors = dev->num_comp_vectors; + dev->ib_dev.dma_device = &mdev->pdev->dev; + + dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION; + dev->ib_dev.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | + (1ull << IB_USER_VERBS_CMD_OPEN_QP); + + dev->ib_dev.query_device = mlx5_ib_query_device; + dev->ib_dev.query_port = mlx5_ib_query_port; + dev->ib_dev.query_gid = mlx5_ib_query_gid; + dev->ib_dev.query_pkey = mlx5_ib_query_pkey; + dev->ib_dev.modify_device = mlx5_ib_modify_device; + dev->ib_dev.modify_port = mlx5_ib_modify_port; + dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext; + dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext; + dev->ib_dev.mmap = mlx5_ib_mmap; + dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd; + dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd; + dev->ib_dev.create_ah = mlx5_ib_create_ah; + dev->ib_dev.query_ah = mlx5_ib_query_ah; + dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah; + dev->ib_dev.create_srq = mlx5_ib_create_srq; + dev->ib_dev.modify_srq = mlx5_ib_modify_srq; + dev->ib_dev.query_srq = mlx5_ib_query_srq; + dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq; + dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv; + dev->ib_dev.create_qp = mlx5_ib_create_qp; + dev->ib_dev.modify_qp = mlx5_ib_modify_qp; + dev->ib_dev.query_qp = mlx5_ib_query_qp; + dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp; + dev->ib_dev.post_send = mlx5_ib_post_send; + dev->ib_dev.post_recv = mlx5_ib_post_recv; + dev->ib_dev.create_cq = mlx5_ib_create_cq; + dev->ib_dev.modify_cq = mlx5_ib_modify_cq; + dev->ib_dev.resize_cq = mlx5_ib_resize_cq; + dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq; + dev->ib_dev.poll_cq = mlx5_ib_poll_cq; + dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq; + dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr; + dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr; + dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr; + dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach; + dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach; + dev->ib_dev.process_mad = mlx5_ib_process_mad; + dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr; + dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list; + dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list; + + if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) { + dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; + dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; + dev->ib_dev.uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | + (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); + } + + err = init_node_data(dev); + if (err) + goto err_eqs; + + mutex_init(&dev->cap_mask_mutex); + spin_lock_init(&dev->mr_lock); + + err = create_dev_resources(&dev->devr); + if (err) + goto err_eqs; + + if (ib_register_device(&dev->ib_dev, NULL)) + goto err_rsrc; + + err = create_umr_res(dev); + if (err) + goto err_dev; + + for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) { + if (device_create_file(&dev->ib_dev.dev, + mlx5_class_attributes[i])) + goto err_umrc; + } + + dev->ib_active = true; + + return 0; + +err_umrc: + destroy_umrc_res(dev); + +err_dev: + ib_unregister_device(&dev->ib_dev); + +err_rsrc: + destroy_dev_resources(&dev->devr); + +err_eqs: + free_comp_eqs(dev); + +err_cleanup: + mlx5_dev_cleanup(mdev); + +err_free: + ib_dealloc_device((struct ib_device *)dev); + + return err; +} + +static void remove_one(struct pci_dev *pdev) +{ + struct mlx5_ib_dev *dev = mlx5_pci2ibdev(pdev); + + destroy_umrc_res(dev); + ib_unregister_device(&dev->ib_dev); + destroy_dev_resources(&dev->devr); + free_comp_eqs(dev); + mlx5_dev_cleanup(&dev->mdev); + ib_dealloc_device(&dev->ib_dev); +} + +static DEFINE_PCI_DEVICE_TABLE(mlx5_ib_pci_table) = { + { PCI_VDEVICE(MELLANOX, 4113) }, /* MT4113 Connect-IB */ + { 0, } +}; + +MODULE_DEVICE_TABLE(pci, mlx5_ib_pci_table); + +static struct pci_driver mlx5_ib_driver = { + .name = DRIVER_NAME, + .id_table = mlx5_ib_pci_table, + .probe = init_one, + .remove = remove_one +}; + +static int __init mlx5_ib_init(void) +{ + return pci_register_driver(&mlx5_ib_driver); +} + +static void __exit mlx5_ib_cleanup(void) +{ + pci_unregister_driver(&mlx5_ib_driver); +} + +module_init(mlx5_ib_init); +module_exit(mlx5_ib_cleanup); diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c new file mode 100644 index 000000000000..3a5322870b96 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include "mlx5_ib.h" + +/* @umem: umem object to scan + * @addr: ib virtual address requested by the user + * @count: number of PAGE_SIZE pages covered by umem + * @shift: page shift for the compound pages found in the region + * @ncont: number of compund pages + * @order: log2 of the number of compound pages + */ +void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, + int *ncont, int *order) +{ + struct ib_umem_chunk *chunk; + unsigned long tmp; + unsigned long m; + int i, j, k; + u64 base = 0; + int p = 0; + int skip; + int mask; + u64 len; + u64 pfn; + + addr = addr >> PAGE_SHIFT; + tmp = (unsigned long)addr; + m = find_first_bit(&tmp, sizeof(tmp)); + skip = 1 << m; + mask = skip - 1; + i = 0; + list_for_each_entry(chunk, &umem->chunk_list, list) + for (j = 0; j < chunk->nmap; j++) { + len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT; + pfn = sg_dma_address(&chunk->page_list[j]) >> PAGE_SHIFT; + for (k = 0; k < len; k++) { + if (!(i & mask)) { + tmp = (unsigned long)pfn; + m = min(m, find_first_bit(&tmp, sizeof(tmp))); + skip = 1 << m; + mask = skip - 1; + base = pfn; + p = 0; + } else { + if (base + p != pfn) { + tmp = (unsigned long)p; + m = find_first_bit(&tmp, sizeof(tmp)); + skip = 1 << m; + mask = skip - 1; + base = pfn; + p = 0; + } + } + p++; + i++; + } + } + + if (i) { + m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m); + + if (order) + *order = ilog2(roundup_pow_of_two(i) >> m); + + *ncont = DIV_ROUND_UP(i, (1 << m)); + } else { + m = 0; + + if (order) + *order = 0; + + *ncont = 0; + } + *shift = PAGE_SHIFT + m; + *count = i; +} + +void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, + int page_shift, __be64 *pas, int umr) +{ + int shift = page_shift - PAGE_SHIFT; + int mask = (1 << shift) - 1; + struct ib_umem_chunk *chunk; + int i, j, k; + u64 cur = 0; + u64 base; + int len; + + i = 0; + list_for_each_entry(chunk, &umem->chunk_list, list) + for (j = 0; j < chunk->nmap; j++) { + len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT; + base = sg_dma_address(&chunk->page_list[j]); + for (k = 0; k < len; k++) { + if (!(i & mask)) { + cur = base + (k << PAGE_SHIFT); + if (umr) + cur |= 3; + + pas[i >> shift] = cpu_to_be64(cur); + mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n", + i >> shift, be64_to_cpu(pas[i >> shift])); + } else + mlx5_ib_dbg(dev, "=====> 0x%llx\n", + base + (k << PAGE_SHIFT)); + i++; + } + } +} + +int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) +{ + u64 page_size; + u64 page_mask; + u64 off_size; + u64 off_mask; + u64 buf_off; + + page_size = 1 << page_shift; + page_mask = page_size - 1; + buf_off = addr & page_mask; + off_size = page_size >> 6; + off_mask = off_size - 1; + + if (buf_off & off_mask) + return -EINVAL; + + *offset = buf_off >> ilog2(off_size); + return 0; +} diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h new file mode 100644 index 000000000000..836be9157242 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -0,0 +1,545 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_IB_H +#define MLX5_IB_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define mlx5_ib_dbg(dev, format, arg...) \ +pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ + __LINE__, current->pid, ##arg) + +#define mlx5_ib_err(dev, format, arg...) \ +pr_err("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ + __LINE__, current->pid, ##arg) + +#define mlx5_ib_warn(dev, format, arg...) \ +pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ + __LINE__, current->pid, ##arg) + +enum { + MLX5_IB_MMAP_CMD_SHIFT = 8, + MLX5_IB_MMAP_CMD_MASK = 0xff, +}; + +enum mlx5_ib_mmap_cmd { + MLX5_IB_MMAP_REGULAR_PAGE = 0, + MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1, /* always last */ +}; + +enum { + MLX5_RES_SCAT_DATA32_CQE = 0x1, + MLX5_RES_SCAT_DATA64_CQE = 0x2, + MLX5_REQ_SCAT_DATA32_CQE = 0x11, + MLX5_REQ_SCAT_DATA64_CQE = 0x22, +}; + +enum mlx5_ib_latency_class { + MLX5_IB_LATENCY_CLASS_LOW, + MLX5_IB_LATENCY_CLASS_MEDIUM, + MLX5_IB_LATENCY_CLASS_HIGH, + MLX5_IB_LATENCY_CLASS_FAST_PATH +}; + +enum mlx5_ib_mad_ifc_flags { + MLX5_MAD_IFC_IGNORE_MKEY = 1, + MLX5_MAD_IFC_IGNORE_BKEY = 2, + MLX5_MAD_IFC_NET_VIEW = 4, +}; + +struct mlx5_ib_ucontext { + struct ib_ucontext ibucontext; + struct list_head db_page_list; + + /* protect doorbell record alloc/free + */ + struct mutex db_page_mutex; + struct mlx5_uuar_info uuari; +}; + +static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct mlx5_ib_ucontext, ibucontext); +} + +struct mlx5_ib_pd { + struct ib_pd ibpd; + u32 pdn; + u32 pa_lkey; +}; + +/* Use macros here so that don't have to duplicate + * enum ib_send_flags and enum ib_qp_type for low-level driver + */ + +#define MLX5_IB_SEND_UMR_UNREG IB_SEND_RESERVED_START +#define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1 +#define MLX5_IB_WR_UMR IB_WR_RESERVED1 + +struct wr_list { + u16 opcode; + u16 next; +}; + +struct mlx5_ib_wq { + u64 *wrid; + u32 *wr_data; + struct wr_list *w_list; + unsigned *wqe_head; + u16 unsig_count; + + /* serialize post to the work queue + */ + spinlock_t lock; + int wqe_cnt; + int max_post; + int max_gs; + int offset; + int wqe_shift; + unsigned head; + unsigned tail; + u16 cur_post; + u16 last_poll; + void *qend; +}; + +enum { + MLX5_QP_USER, + MLX5_QP_KERNEL, + MLX5_QP_EMPTY +}; + +struct mlx5_ib_qp { + struct ib_qp ibqp; + struct mlx5_core_qp mqp; + struct mlx5_buf buf; + + struct mlx5_db db; + struct mlx5_ib_wq rq; + + u32 doorbell_qpn; + u8 sq_signal_bits; + u8 fm_cache; + int sq_max_wqes_per_wr; + int sq_spare_wqes; + struct mlx5_ib_wq sq; + + struct ib_umem *umem; + int buf_size; + + /* serialize qp state modifications + */ + struct mutex mutex; + u16 xrcdn; + u32 flags; + u8 port; + u8 alt_port; + u8 atomic_rd_en; + u8 resp_depth; + u8 state; + int mlx_type; + int wq_sig; + int scat_cqe; + int max_inline_data; + struct mlx5_bf *bf; + int has_rq; + + /* only for user space QPs. For kernel + * we have it from the bf object + */ + int uuarn; + + int create_type; + u32 pa_lkey; +}; + +struct mlx5_ib_cq_buf { + struct mlx5_buf buf; + struct ib_umem *umem; + int cqe_size; +}; + +enum mlx5_ib_qp_flags { + MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 0, + MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 1, +}; + +struct mlx5_shared_mr_info { + int mr_id; + struct ib_umem *umem; +}; + +struct mlx5_ib_cq { + struct ib_cq ibcq; + struct mlx5_core_cq mcq; + struct mlx5_ib_cq_buf buf; + struct mlx5_db db; + + /* serialize access to the CQ + */ + spinlock_t lock; + + /* protect resize cq + */ + struct mutex resize_mutex; + struct mlx5_ib_cq_resize *resize_buf; + struct ib_umem *resize_umem; + int cqe_size; +}; + +struct mlx5_ib_srq { + struct ib_srq ibsrq; + struct mlx5_core_srq msrq; + struct mlx5_buf buf; + struct mlx5_db db; + u64 *wrid; + /* protect SRQ hanlding + */ + spinlock_t lock; + int head; + int tail; + u16 wqe_ctr; + struct ib_umem *umem; + /* serialize arming a SRQ + */ + struct mutex mutex; + int wq_sig; +}; + +struct mlx5_ib_xrcd { + struct ib_xrcd ibxrcd; + u32 xrcdn; +}; + +struct mlx5_ib_mr { + struct ib_mr ibmr; + struct mlx5_core_mr mmr; + struct ib_umem *umem; + struct mlx5_shared_mr_info *smr_info; + struct list_head list; + int order; + int umred; + __be64 *pas; + dma_addr_t dma; + int npages; + struct completion done; + enum ib_wc_status status; +}; + +struct mlx5_ib_fast_reg_page_list { + struct ib_fast_reg_page_list ibfrpl; + __be64 *mapped_page_list; + dma_addr_t map; +}; + +struct umr_common { + struct ib_pd *pd; + struct ib_cq *cq; + struct ib_qp *qp; + struct ib_mr *mr; + /* control access to UMR QP + */ + struct semaphore sem; +}; + +enum { + MLX5_FMR_INVALID, + MLX5_FMR_VALID, + MLX5_FMR_BUSY, +}; + +struct mlx5_ib_fmr { + struct ib_fmr ibfmr; + struct mlx5_core_mr mr; + int access_flags; + int state; + /* protect fmr state + */ + spinlock_t lock; + u64 wrid; + struct ib_send_wr wr[2]; + u8 page_shift; + struct ib_fast_reg_page_list page_list; +}; + +struct mlx5_cache_ent { + struct list_head head; + /* sync access to the cahce entry + */ + spinlock_t lock; + + + struct dentry *dir; + char name[4]; + u32 order; + u32 size; + u32 cur; + u32 miss; + u32 limit; + + struct dentry *fsize; + struct dentry *fcur; + struct dentry *fmiss; + struct dentry *flimit; + + struct mlx5_ib_dev *dev; + struct work_struct work; + struct delayed_work dwork; +}; + +struct mlx5_mr_cache { + struct workqueue_struct *wq; + struct mlx5_cache_ent ent[MAX_MR_CACHE_ENTRIES]; + int stopped; + struct dentry *root; + unsigned long last_add; +}; + +struct mlx5_ib_resources { + struct ib_cq *c0; + struct ib_xrcd *x0; + struct ib_xrcd *x1; + struct ib_pd *p0; + struct ib_srq *s0; +}; + +struct mlx5_ib_dev { + struct ib_device ib_dev; + struct mlx5_core_dev mdev; + MLX5_DECLARE_DOORBELL_LOCK(uar_lock); + struct list_head eqs_list; + int num_ports; + int num_comp_vectors; + /* serialize update of capability mask + */ + struct mutex cap_mask_mutex; + bool ib_active; + struct umr_common umrc; + /* sync used page count stats + */ + spinlock_t mr_lock; + struct mlx5_ib_resources devr; + struct mlx5_mr_cache cache; +}; + +static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) +{ + return container_of(mcq, struct mlx5_ib_cq, mcq); +} + +static inline struct mlx5_ib_xrcd *to_mxrcd(struct ib_xrcd *ibxrcd) +{ + return container_of(ibxrcd, struct mlx5_ib_xrcd, ibxrcd); +} + +static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct mlx5_ib_dev, ib_dev); +} + +static inline struct mlx5_ib_fmr *to_mfmr(struct ib_fmr *ibfmr) +{ + return container_of(ibfmr, struct mlx5_ib_fmr, ibfmr); +} + +static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct mlx5_ib_cq, ibcq); +} + +static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp) +{ + return container_of(mqp, struct mlx5_ib_qp, mqp); +} + +static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct mlx5_ib_pd, ibpd); +} + +static inline struct mlx5_ib_srq *to_msrq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct mlx5_ib_srq, ibsrq); +} + +static inline struct mlx5_ib_qp *to_mqp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct mlx5_ib_qp, ibqp); +} + +static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq) +{ + return container_of(msrq, struct mlx5_ib_srq, msrq); +} + +static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct mlx5_ib_mr, ibmr); +} + +static inline struct mlx5_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl) +{ + return container_of(ibfrpl, struct mlx5_ib_fast_reg_page_list, ibfrpl); +} + +struct mlx5_ib_ah { + struct ib_ah ibah; + struct mlx5_av av; +}; + +static inline struct mlx5_ib_ah *to_mah(struct ib_ah *ibah) +{ + return container_of(ibah, struct mlx5_ib_ah, ibah); +} + +static inline struct mlx5_ib_dev *mlx5_core2ibdev(struct mlx5_core_dev *dev) +{ + return container_of(dev, struct mlx5_ib_dev, mdev); +} + +static inline struct mlx5_ib_dev *mlx5_pci2ibdev(struct pci_dev *pdev) +{ + return mlx5_core2ibdev(pci2mlx5_core_dev(pdev)); +} + +int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, + struct mlx5_db *db); +void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db); +void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); +void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); +void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); +int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, + int port, struct ib_wc *in_wc, struct ib_grh *in_grh, + void *in_mad, void *response_mad); +struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr, + struct mlx5_ib_ah *ah); +struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); +int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); +int mlx5_ib_destroy_ah(struct ib_ah *ah); +struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata); +int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); +int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr); +int mlx5_ib_destroy_srq(struct ib_srq *srq); +int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); +struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata); +int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); +int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr); +int mlx5_ib_destroy_qp(struct ib_qp *qp); +int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr); +int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); +void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n); +struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries, + int vector, struct ib_ucontext *context, + struct ib_udata *udata); +int mlx5_ib_destroy_cq(struct ib_cq *cq); +int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); +int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); +int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); +int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); +struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc); +struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata); +int mlx5_ib_dereg_mr(struct ib_mr *ibmr); +struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, + int max_page_list_len); +struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, + int page_list_len); +void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); +struct ib_fmr *mlx5_ib_fmr_alloc(struct ib_pd *pd, int acc, + struct ib_fmr_attr *fmr_attr); +int mlx5_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, + int npages, u64 iova); +int mlx5_ib_unmap_fmr(struct list_head *fmr_list); +int mlx5_ib_fmr_dealloc(struct ib_fmr *ibfmr); +int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + struct ib_wc *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad); +struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata); +int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd); +int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn); +int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset); +int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); +int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props); +int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev); +void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev); +void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, + int *ncont, int *order); +void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, + int page_shift, __be64 *pas, int umr); +void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); +int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq); +int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); +int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); +int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift); +void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context); + +static inline void init_query_mad(struct ib_smp *mad) +{ + mad->base_version = 1; + mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + mad->class_version = 1; + mad->method = IB_MGMT_METHOD_GET; +} + +static inline u8 convert_access(int acc) +{ + return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) | + (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) | + (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) | + (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) | + MLX5_PERM_LOCAL_READ; +} + +#endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c new file mode 100644 index 000000000000..e2daa8f02476 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -0,0 +1,1007 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#include +#include +#include +#include +#include +#include "mlx5_ib.h" + +enum { + DEF_CACHE_SIZE = 10, +}; + +static __be64 *mr_align(__be64 *ptr, int align) +{ + unsigned long mask = align - 1; + + return (__be64 *)(((unsigned long)ptr + mask) & ~mask); +} + +static int order2idx(struct mlx5_ib_dev *dev, int order) +{ + struct mlx5_mr_cache *cache = &dev->cache; + + if (order < cache->ent[0].order) + return 0; + else + return order - cache->ent[0].order; +} + +static int add_keys(struct mlx5_ib_dev *dev, int c, int num) +{ + struct device *ddev = dev->ib_dev.dma_device; + struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_cache_ent *ent = &cache->ent[c]; + struct mlx5_create_mkey_mbox_in *in; + struct mlx5_ib_mr *mr; + int npages = 1 << ent->order; + int size = sizeof(u64) * npages; + int err = 0; + int i; + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + for (i = 0; i < num; i++) { + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) { + err = -ENOMEM; + goto out; + } + mr->order = ent->order; + mr->umred = 1; + mr->pas = kmalloc(size + 0x3f, GFP_KERNEL); + if (!mr->pas) { + kfree(mr); + err = -ENOMEM; + goto out; + } + mr->dma = dma_map_single(ddev, mr_align(mr->pas, 0x40), size, + DMA_TO_DEVICE); + if (dma_mapping_error(ddev, mr->dma)) { + kfree(mr->pas); + kfree(mr); + err = -ENOMEM; + goto out; + } + + in->seg.status = 1 << 6; + in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2); + in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN; + in->seg.log2_page_size = 12; + + err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, + sizeof(*in)); + if (err) { + mlx5_ib_warn(dev, "create mkey failed %d\n", err); + dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); + kfree(mr->pas); + kfree(mr); + goto out; + } + cache->last_add = jiffies; + + spin_lock(&ent->lock); + list_add_tail(&mr->list, &ent->head); + ent->cur++; + ent->size++; + spin_unlock(&ent->lock); + } + +out: + kfree(in); + return err; +} + +static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) +{ + struct device *ddev = dev->ib_dev.dma_device; + struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_cache_ent *ent = &cache->ent[c]; + struct mlx5_ib_mr *mr; + int size; + int err; + int i; + + for (i = 0; i < num; i++) { + spin_lock(&ent->lock); + if (list_empty(&ent->head)) { + spin_unlock(&ent->lock); + return; + } + mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); + list_del(&mr->list); + ent->cur--; + ent->size--; + spin_unlock(&ent->lock); + err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); + if (err) { + mlx5_ib_warn(dev, "failed destroy mkey\n"); + } else { + size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40); + dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); + kfree(mr->pas); + kfree(mr); + } + } +} + +static ssize_t size_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_cache_ent *ent = filp->private_data; + struct mlx5_ib_dev *dev = ent->dev; + char lbuf[20]; + u32 var; + int err; + int c; + + if (copy_from_user(lbuf, buf, sizeof(lbuf))) + return -EPERM; + + c = order2idx(dev, ent->order); + lbuf[sizeof(lbuf) - 1] = 0; + + if (sscanf(lbuf, "%u", &var) != 1) + return -EINVAL; + + if (var < ent->limit) + return -EINVAL; + + if (var > ent->size) { + err = add_keys(dev, c, var - ent->size); + if (err) + return err; + } else if (var < ent->size) { + remove_keys(dev, c, ent->size - var); + } + + return count; +} + +static ssize_t size_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_cache_ent *ent = filp->private_data; + char lbuf[20]; + int err; + + if (*pos) + return 0; + + err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size); + if (err < 0) + return err; + + if (copy_to_user(buf, lbuf, err)) + return -EPERM; + + *pos += err; + + return err; +} + +static const struct file_operations size_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = size_write, + .read = size_read, +}; + +static ssize_t limit_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_cache_ent *ent = filp->private_data; + struct mlx5_ib_dev *dev = ent->dev; + char lbuf[20]; + u32 var; + int err; + int c; + + if (copy_from_user(lbuf, buf, sizeof(lbuf))) + return -EPERM; + + c = order2idx(dev, ent->order); + lbuf[sizeof(lbuf) - 1] = 0; + + if (sscanf(lbuf, "%u", &var) != 1) + return -EINVAL; + + if (var > ent->size) + return -EINVAL; + + ent->limit = var; + + if (ent->cur < ent->limit) { + err = add_keys(dev, c, 2 * ent->limit - ent->cur); + if (err) + return err; + } + + return count; +} + +static ssize_t limit_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_cache_ent *ent = filp->private_data; + char lbuf[20]; + int err; + + if (*pos) + return 0; + + err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit); + if (err < 0) + return err; + + if (copy_to_user(buf, lbuf, err)) + return -EPERM; + + *pos += err; + + return err; +} + +static const struct file_operations limit_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = limit_write, + .read = limit_read, +}; + +static int someone_adding(struct mlx5_mr_cache *cache) +{ + int i; + + for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { + if (cache->ent[i].cur < cache->ent[i].limit) + return 1; + } + + return 0; +} + +static void __cache_work_func(struct mlx5_cache_ent *ent) +{ + struct mlx5_ib_dev *dev = ent->dev; + struct mlx5_mr_cache *cache = &dev->cache; + int i = order2idx(dev, ent->order); + + if (cache->stopped) + return; + + ent = &dev->cache.ent[i]; + if (ent->cur < 2 * ent->limit) { + add_keys(dev, i, 1); + if (ent->cur < 2 * ent->limit) + queue_work(cache->wq, &ent->work); + } else if (ent->cur > 2 * ent->limit) { + if (!someone_adding(cache) && + time_after(jiffies, cache->last_add + 60 * HZ)) { + remove_keys(dev, i, 1); + if (ent->cur > ent->limit) + queue_work(cache->wq, &ent->work); + } else { + queue_delayed_work(cache->wq, &ent->dwork, 60 * HZ); + } + } +} + +static void delayed_cache_work_func(struct work_struct *work) +{ + struct mlx5_cache_ent *ent; + + ent = container_of(work, struct mlx5_cache_ent, dwork.work); + __cache_work_func(ent); +} + +static void cache_work_func(struct work_struct *work) +{ + struct mlx5_cache_ent *ent; + + ent = container_of(work, struct mlx5_cache_ent, work); + __cache_work_func(ent); +} + +static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) +{ + struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_ib_mr *mr = NULL; + struct mlx5_cache_ent *ent; + int c; + int i; + + c = order2idx(dev, order); + if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { + mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); + return NULL; + } + + for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) { + ent = &cache->ent[i]; + + mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); + + spin_lock(&ent->lock); + if (!list_empty(&ent->head)) { + mr = list_first_entry(&ent->head, struct mlx5_ib_mr, + list); + list_del(&mr->list); + ent->cur--; + spin_unlock(&ent->lock); + if (ent->cur < ent->limit) + queue_work(cache->wq, &ent->work); + break; + } + spin_unlock(&ent->lock); + + queue_work(cache->wq, &ent->work); + + if (mr) + break; + } + + if (!mr) + cache->ent[c].miss++; + + return mr; +} + +static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) +{ + struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_cache_ent *ent; + int shrink = 0; + int c; + + c = order2idx(dev, mr->order); + if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { + mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c); + return; + } + ent = &cache->ent[c]; + spin_lock(&ent->lock); + list_add_tail(&mr->list, &ent->head); + ent->cur++; + if (ent->cur > 2 * ent->limit) + shrink = 1; + spin_unlock(&ent->lock); + + if (shrink) + queue_work(cache->wq, &ent->work); +} + +static void clean_keys(struct mlx5_ib_dev *dev, int c) +{ + struct device *ddev = dev->ib_dev.dma_device; + struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_cache_ent *ent = &cache->ent[c]; + struct mlx5_ib_mr *mr; + int size; + int err; + + while (1) { + spin_lock(&ent->lock); + if (list_empty(&ent->head)) { + spin_unlock(&ent->lock); + return; + } + mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); + list_del(&mr->list); + ent->cur--; + ent->size--; + spin_unlock(&ent->lock); + err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); + if (err) { + mlx5_ib_warn(dev, "failed destroy mkey\n"); + } else { + size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40); + dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); + kfree(mr->pas); + kfree(mr); + } + } +} + +static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) +{ + struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_cache_ent *ent; + int i; + + if (!mlx5_debugfs_root) + return 0; + + cache->root = debugfs_create_dir("mr_cache", dev->mdev.priv.dbg_root); + if (!cache->root) + return -ENOMEM; + + for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { + ent = &cache->ent[i]; + sprintf(ent->name, "%d", ent->order); + ent->dir = debugfs_create_dir(ent->name, cache->root); + if (!ent->dir) + return -ENOMEM; + + ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent, + &size_fops); + if (!ent->fsize) + return -ENOMEM; + + ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent, + &limit_fops); + if (!ent->flimit) + return -ENOMEM; + + ent->fcur = debugfs_create_u32("cur", 0400, ent->dir, + &ent->cur); + if (!ent->fcur) + return -ENOMEM; + + ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir, + &ent->miss); + if (!ent->fmiss) + return -ENOMEM; + } + + return 0; +} + +static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) +{ + if (!mlx5_debugfs_root) + return; + + debugfs_remove_recursive(dev->cache.root); +} + +int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) +{ + struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_cache_ent *ent; + int limit; + int size; + int err; + int i; + + cache->wq = create_singlethread_workqueue("mkey_cache"); + if (!cache->wq) { + mlx5_ib_warn(dev, "failed to create work queue\n"); + return -ENOMEM; + } + + for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { + INIT_LIST_HEAD(&cache->ent[i].head); + spin_lock_init(&cache->ent[i].lock); + + ent = &cache->ent[i]; + INIT_LIST_HEAD(&ent->head); + spin_lock_init(&ent->lock); + ent->order = i + 2; + ent->dev = dev; + + if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE) { + size = dev->mdev.profile->mr_cache[i].size; + limit = dev->mdev.profile->mr_cache[i].limit; + } else { + size = DEF_CACHE_SIZE; + limit = 0; + } + INIT_WORK(&ent->work, cache_work_func); + INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); + ent->limit = limit; + queue_work(cache->wq, &ent->work); + } + + err = mlx5_mr_cache_debugfs_init(dev); + if (err) + mlx5_ib_warn(dev, "cache debugfs failure\n"); + + return 0; +} + +int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) +{ + int i; + + dev->cache.stopped = 1; + destroy_workqueue(dev->cache.wq); + + mlx5_mr_cache_debugfs_cleanup(dev); + + for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) + clean_keys(dev, i); + + return 0; +} + +struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_core_dev *mdev = &dev->mdev; + struct mlx5_create_mkey_mbox_in *in; + struct mlx5_mkey_seg *seg; + struct mlx5_ib_mr *mr; + int err; + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_free; + } + + seg = &in->seg; + seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA; + seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64); + seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + seg->start_addr = 0; + + err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in)); + if (err) + goto err_in; + + kfree(in); + mr->ibmr.lkey = mr->mmr.key; + mr->ibmr.rkey = mr->mmr.key; + mr->umem = NULL; + + return &mr->ibmr; + +err_in: + kfree(in); + +err_free: + kfree(mr); + + return ERR_PTR(err); +} + +static int get_octo_len(u64 addr, u64 len, int page_size) +{ + u64 offset; + int npages; + + offset = addr & (page_size - 1); + npages = ALIGN(len + offset, page_size) >> ilog2(page_size); + return (npages + 1) / 2; +} + +static int use_umr(int order) +{ + return order <= 17; +} + +static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, + struct ib_sge *sg, u64 dma, int n, u32 key, + int page_shift, u64 virt_addr, u64 len, + int access_flags) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct ib_mr *mr = dev->umrc.mr; + + sg->addr = dma; + sg->length = ALIGN(sizeof(u64) * n, 64); + sg->lkey = mr->lkey; + + wr->next = NULL; + wr->send_flags = 0; + wr->sg_list = sg; + if (n) + wr->num_sge = 1; + else + wr->num_sge = 0; + + wr->opcode = MLX5_IB_WR_UMR; + wr->wr.fast_reg.page_list_len = n; + wr->wr.fast_reg.page_shift = page_shift; + wr->wr.fast_reg.rkey = key; + wr->wr.fast_reg.iova_start = virt_addr; + wr->wr.fast_reg.length = len; + wr->wr.fast_reg.access_flags = access_flags; + wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd; +} + +static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, + struct ib_send_wr *wr, u32 key) +{ + wr->send_flags = MLX5_IB_SEND_UMR_UNREG; + wr->opcode = MLX5_IB_WR_UMR; + wr->wr.fast_reg.rkey = key; +} + +void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context) +{ + struct mlx5_ib_mr *mr; + struct ib_wc wc; + int err; + + while (1) { + err = ib_poll_cq(cq, 1, &wc); + if (err < 0) { + pr_warn("poll cq error %d\n", err); + return; + } + if (err == 0) + break; + + mr = (struct mlx5_ib_mr *)(unsigned long)wc.wr_id; + mr->status = wc.status; + complete(&mr->done); + } + ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); +} + +static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, + u64 virt_addr, u64 len, int npages, + int page_shift, int order, int access_flags) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct umr_common *umrc = &dev->umrc; + struct ib_send_wr wr, *bad; + struct mlx5_ib_mr *mr; + struct ib_sge sg; + int err; + int i; + + for (i = 0; i < 10; i++) { + mr = alloc_cached_mr(dev, order); + if (mr) + break; + + err = add_keys(dev, order2idx(dev, order), 1); + if (err) { + mlx5_ib_warn(dev, "add_keys failed\n"); + break; + } + } + + if (!mr) + return ERR_PTR(-EAGAIN); + + mlx5_ib_populate_pas(dev, umem, page_shift, mr_align(mr->pas, 0x40), 1); + + memset(&wr, 0, sizeof(wr)); + wr.wr_id = (u64)(unsigned long)mr; + prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags); + + /* We serialize polls so one process does not kidnap another's + * completion. This is not a problem since wr is completed in + * around 1 usec + */ + down(&umrc->sem); + init_completion(&mr->done); + err = ib_post_send(umrc->qp, &wr, &bad); + if (err) { + mlx5_ib_warn(dev, "post send failed, err %d\n", err); + up(&umrc->sem); + goto error; + } + wait_for_completion(&mr->done); + up(&umrc->sem); + + if (mr->status != IB_WC_SUCCESS) { + mlx5_ib_warn(dev, "reg umr failed\n"); + err = -EFAULT; + goto error; + } + + return mr; + +error: + free_cached_mr(dev, mr); + return ERR_PTR(err); +} + +static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, + u64 length, struct ib_umem *umem, + int npages, int page_shift, + int access_flags) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_create_mkey_mbox_in *in; + struct mlx5_ib_mr *mr; + int inlen; + int err; + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2; + in = mlx5_vzalloc(inlen); + if (!in) { + err = -ENOMEM; + goto err_1; + } + mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0); + + in->seg.flags = convert_access(access_flags) | + MLX5_ACCESS_MODE_MTT; + in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); + in->seg.start_addr = cpu_to_be64(virt_addr); + in->seg.len = cpu_to_be64(length); + in->seg.bsfs_octo_size = 0; + in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); + in->seg.log2_page_size = page_shift; + in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); + err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen); + if (err) { + mlx5_ib_warn(dev, "create mkey failed\n"); + goto err_2; + } + mr->umem = umem; + mlx5_vfree(in); + + mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key); + + return mr; + +err_2: + mlx5_vfree(in); + +err_1: + kfree(mr); + + return ERR_PTR(err); +} + +struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_ib_mr *mr = NULL; + struct ib_umem *umem; + int page_shift; + int npages; + int ncont; + int order; + int err; + + mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx\n", + start, virt_addr, length); + umem = ib_umem_get(pd->uobject->context, start, length, access_flags, + 0); + if (IS_ERR(umem)) { + mlx5_ib_dbg(dev, "umem get failed\n"); + return (void *)umem; + } + + mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order); + if (!npages) { + mlx5_ib_warn(dev, "avoid zero region\n"); + err = -EINVAL; + goto error; + } + + mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", + npages, ncont, order, page_shift); + + if (use_umr(order)) { + mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, + order, access_flags); + if (PTR_ERR(mr) == -EAGAIN) { + mlx5_ib_dbg(dev, "cache empty for order %d", order); + mr = NULL; + } + } + + if (!mr) + mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift, + access_flags); + + if (IS_ERR(mr)) { + err = PTR_ERR(mr); + goto error; + } + + mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key); + + mr->umem = umem; + mr->npages = npages; + spin_lock(&dev->mr_lock); + dev->mdev.priv.reg_pages += npages; + spin_unlock(&dev->mr_lock); + mr->ibmr.lkey = mr->mmr.key; + mr->ibmr.rkey = mr->mmr.key; + + return &mr->ibmr; + +error: + ib_umem_release(umem); + return ERR_PTR(err); +} + +static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) +{ + struct umr_common *umrc = &dev->umrc; + struct ib_send_wr wr, *bad; + int err; + + memset(&wr, 0, sizeof(wr)); + wr.wr_id = (u64)(unsigned long)mr; + prep_umr_unreg_wqe(dev, &wr, mr->mmr.key); + + down(&umrc->sem); + init_completion(&mr->done); + err = ib_post_send(umrc->qp, &wr, &bad); + if (err) { + up(&umrc->sem); + mlx5_ib_dbg(dev, "err %d\n", err); + goto error; + } + wait_for_completion(&mr->done); + up(&umrc->sem); + if (mr->status != IB_WC_SUCCESS) { + mlx5_ib_warn(dev, "unreg umr failed\n"); + err = -EFAULT; + goto error; + } + return 0; + +error: + return err; +} + +int mlx5_ib_dereg_mr(struct ib_mr *ibmr) +{ + struct mlx5_ib_dev *dev = to_mdev(ibmr->device); + struct mlx5_ib_mr *mr = to_mmr(ibmr); + struct ib_umem *umem = mr->umem; + int npages = mr->npages; + int umred = mr->umred; + int err; + + if (!umred) { + err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); + if (err) { + mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", + mr->mmr.key, err); + return err; + } + } else { + err = unreg_umr(dev, mr); + if (err) { + mlx5_ib_warn(dev, "failed unregister\n"); + return err; + } + free_cached_mr(dev, mr); + } + + if (umem) { + ib_umem_release(umem); + spin_lock(&dev->mr_lock); + dev->mdev.priv.reg_pages -= npages; + spin_unlock(&dev->mr_lock); + } + + if (!umred) + kfree(mr); + + return 0; +} + +struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, + int max_page_list_len) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_create_mkey_mbox_in *in; + struct mlx5_ib_mr *mr; + int err; + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_free; + } + + in->seg.status = 1 << 6; /* free */ + in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2); + in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT; + in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); + /* + * TBD not needed - issue 197292 */ + in->seg.log2_page_size = PAGE_SHIFT; + + err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in)); + kfree(in); + if (err) + goto err_free; + + mr->ibmr.lkey = mr->mmr.key; + mr->ibmr.rkey = mr->mmr.key; + mr->umem = NULL; + + return &mr->ibmr; + +err_free: + kfree(mr); + return ERR_PTR(err); +} + +struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, + int page_list_len) +{ + struct mlx5_ib_fast_reg_page_list *mfrpl; + int size = page_list_len * sizeof(u64); + + mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL); + if (!mfrpl) + return ERR_PTR(-ENOMEM); + + mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL); + if (!mfrpl->ibfrpl.page_list) + goto err_free; + + mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device, + size, &mfrpl->map, + GFP_KERNEL); + if (!mfrpl->mapped_page_list) + goto err_free; + + WARN_ON(mfrpl->map & 0x3f); + + return &mfrpl->ibfrpl; + +err_free: + kfree(mfrpl->ibfrpl.page_list); + kfree(mfrpl); + return ERR_PTR(-ENOMEM); +} + +void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) +{ + struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); + struct mlx5_ib_dev *dev = to_mdev(page_list->device); + int size = page_list->max_page_list_len * sizeof(u64); + + dma_free_coherent(&dev->mdev.pdev->dev, size, mfrpl->mapped_page_list, + mfrpl->map); + kfree(mfrpl->ibfrpl.page_list); + kfree(mfrpl); +} diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c new file mode 100644 index 000000000000..16ac54c9819f --- /dev/null +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -0,0 +1,2524 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include "mlx5_ib.h" +#include "user.h" + +/* not supported currently */ +static int wq_signature; + +enum { + MLX5_IB_ACK_REQ_FREQ = 8, +}; + +enum { + MLX5_IB_DEFAULT_SCHED_QUEUE = 0x83, + MLX5_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f, + MLX5_IB_LINK_TYPE_IB = 0, + MLX5_IB_LINK_TYPE_ETH = 1 +}; + +enum { + MLX5_IB_SQ_STRIDE = 6, + MLX5_IB_CACHE_LINE_SIZE = 64, +}; + +static const u32 mlx5_ib_opcode[] = { + [IB_WR_SEND] = MLX5_OPCODE_SEND, + [IB_WR_SEND_WITH_IMM] = MLX5_OPCODE_SEND_IMM, + [IB_WR_RDMA_WRITE] = MLX5_OPCODE_RDMA_WRITE, + [IB_WR_RDMA_WRITE_WITH_IMM] = MLX5_OPCODE_RDMA_WRITE_IMM, + [IB_WR_RDMA_READ] = MLX5_OPCODE_RDMA_READ, + [IB_WR_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_CS, + [IB_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA, + [IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL, + [IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR, + [IB_WR_FAST_REG_MR] = MLX5_OPCODE_UMR, + [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS, + [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA, + [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR, +}; + +struct umr_wr { + u64 virt_addr; + struct ib_pd *pd; + unsigned int page_shift; + unsigned int npages; + u32 length; + int access_flags; + u32 mkey; +}; + +static int is_qp0(enum ib_qp_type qp_type) +{ + return qp_type == IB_QPT_SMI; +} + +static int is_qp1(enum ib_qp_type qp_type) +{ + return qp_type == IB_QPT_GSI; +} + +static int is_sqp(enum ib_qp_type qp_type) +{ + return is_qp0(qp_type) || is_qp1(qp_type); +} + +static void *get_wqe(struct mlx5_ib_qp *qp, int offset) +{ + return mlx5_buf_offset(&qp->buf, offset); +} + +static void *get_recv_wqe(struct mlx5_ib_qp *qp, int n) +{ + return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift)); +} + +void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n) +{ + return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE)); +} + +static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type) +{ + struct ib_qp *ibqp = &to_mibqp(qp)->ibqp; + struct ib_event event; + + if (type == MLX5_EVENT_TYPE_PATH_MIG) + to_mibqp(qp)->port = to_mibqp(qp)->alt_port; + + if (ibqp->event_handler) { + event.device = ibqp->device; + event.element.qp = ibqp; + switch (type) { + case MLX5_EVENT_TYPE_PATH_MIG: + event.event = IB_EVENT_PATH_MIG; + break; + case MLX5_EVENT_TYPE_COMM_EST: + event.event = IB_EVENT_COMM_EST; + break; + case MLX5_EVENT_TYPE_SQ_DRAINED: + event.event = IB_EVENT_SQ_DRAINED; + break; + case MLX5_EVENT_TYPE_SRQ_LAST_WQE: + event.event = IB_EVENT_QP_LAST_WQE_REACHED; + break; + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + event.event = IB_EVENT_QP_FATAL; + break; + case MLX5_EVENT_TYPE_PATH_MIG_FAILED: + event.event = IB_EVENT_PATH_MIG_ERR; + break; + case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + event.event = IB_EVENT_QP_REQ_ERR; + break; + case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: + event.event = IB_EVENT_QP_ACCESS_ERR; + break; + default: + pr_warn("mlx5_ib: Unexpected event type %d on QP %06x\n", type, qp->qpn); + return; + } + + ibqp->event_handler(&event, ibqp->qp_context); + } +} + +static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, + int has_rq, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd) +{ + int wqe_size; + int wq_size; + + /* Sanity check RQ size before proceeding */ + if (cap->max_recv_wr > dev->mdev.caps.max_wqes) + return -EINVAL; + + if (!has_rq) { + qp->rq.max_gs = 0; + qp->rq.wqe_cnt = 0; + qp->rq.wqe_shift = 0; + } else { + if (ucmd) { + qp->rq.wqe_cnt = ucmd->rq_wqe_count; + qp->rq.wqe_shift = ucmd->rq_wqe_shift; + qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig; + qp->rq.max_post = qp->rq.wqe_cnt; + } else { + wqe_size = qp->wq_sig ? sizeof(struct mlx5_wqe_signature_seg) : 0; + wqe_size += cap->max_recv_sge * sizeof(struct mlx5_wqe_data_seg); + wqe_size = roundup_pow_of_two(wqe_size); + wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size; + wq_size = max_t(int, wq_size, MLX5_SEND_WQE_BB); + qp->rq.wqe_cnt = wq_size / wqe_size; + if (wqe_size > dev->mdev.caps.max_rq_desc_sz) { + mlx5_ib_dbg(dev, "wqe_size %d, max %d\n", + wqe_size, + dev->mdev.caps.max_rq_desc_sz); + return -EINVAL; + } + qp->rq.wqe_shift = ilog2(wqe_size); + qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig; + qp->rq.max_post = qp->rq.wqe_cnt; + } + } + + return 0; +} + +static int sq_overhead(enum ib_qp_type qp_type) +{ + int size; + + switch (qp_type) { + case IB_QPT_XRC_INI: + size = sizeof(struct mlx5_wqe_xrc_seg); + /* fall through */ + case IB_QPT_RC: + size += sizeof(struct mlx5_wqe_ctrl_seg) + + sizeof(struct mlx5_wqe_atomic_seg) + + sizeof(struct mlx5_wqe_raddr_seg); + break; + + case IB_QPT_UC: + size = sizeof(struct mlx5_wqe_ctrl_seg) + + sizeof(struct mlx5_wqe_raddr_seg); + break; + + case IB_QPT_UD: + case IB_QPT_SMI: + case IB_QPT_GSI: + size = sizeof(struct mlx5_wqe_ctrl_seg) + + sizeof(struct mlx5_wqe_datagram_seg); + break; + + case MLX5_IB_QPT_REG_UMR: + size = sizeof(struct mlx5_wqe_ctrl_seg) + + sizeof(struct mlx5_wqe_umr_ctrl_seg) + + sizeof(struct mlx5_mkey_seg); + break; + + default: + return -EINVAL; + } + + return size; +} + +static int calc_send_wqe(struct ib_qp_init_attr *attr) +{ + int inl_size = 0; + int size; + + size = sq_overhead(attr->qp_type); + if (size < 0) + return size; + + if (attr->cap.max_inline_data) { + inl_size = size + sizeof(struct mlx5_wqe_inline_seg) + + attr->cap.max_inline_data; + } + + size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg); + + return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB); +} + +static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, + struct mlx5_ib_qp *qp) +{ + int wqe_size; + int wq_size; + + if (!attr->cap.max_send_wr) + return 0; + + wqe_size = calc_send_wqe(attr); + mlx5_ib_dbg(dev, "wqe_size %d\n", wqe_size); + if (wqe_size < 0) + return wqe_size; + + if (wqe_size > dev->mdev.caps.max_sq_desc_sz) { + mlx5_ib_dbg(dev, "\n"); + return -EINVAL; + } + + qp->max_inline_data = wqe_size - sq_overhead(attr->qp_type) - + sizeof(struct mlx5_wqe_inline_seg); + attr->cap.max_inline_data = qp->max_inline_data; + + wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size); + qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB; + qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); + qp->sq.max_gs = attr->cap.max_send_sge; + qp->sq.max_post = 1 << ilog2(wq_size / wqe_size); + + return wq_size; +} + +static int set_user_buf_size(struct mlx5_ib_dev *dev, + struct mlx5_ib_qp *qp, + struct mlx5_ib_create_qp *ucmd) +{ + int desc_sz = 1 << qp->sq.wqe_shift; + + if (desc_sz > dev->mdev.caps.max_sq_desc_sz) { + mlx5_ib_warn(dev, "desc_sz %d, max_sq_desc_sz %d\n", + desc_sz, dev->mdev.caps.max_sq_desc_sz); + return -EINVAL; + } + + if (ucmd->sq_wqe_count && ((1 << ilog2(ucmd->sq_wqe_count)) != ucmd->sq_wqe_count)) { + mlx5_ib_warn(dev, "sq_wqe_count %d, sq_wqe_count %d\n", + ucmd->sq_wqe_count, ucmd->sq_wqe_count); + return -EINVAL; + } + + qp->sq.wqe_cnt = ucmd->sq_wqe_count; + + if (qp->sq.wqe_cnt > dev->mdev.caps.max_wqes) { + mlx5_ib_warn(dev, "wqe_cnt %d, max_wqes %d\n", + qp->sq.wqe_cnt, dev->mdev.caps.max_wqes); + return -EINVAL; + } + + qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + + (qp->sq.wqe_cnt << 6); + + return 0; +} + +static int qp_has_rq(struct ib_qp_init_attr *attr) +{ + if (attr->qp_type == IB_QPT_XRC_INI || + attr->qp_type == IB_QPT_XRC_TGT || attr->srq || + attr->qp_type == MLX5_IB_QPT_REG_UMR || + !attr->cap.max_recv_wr) + return 0; + + return 1; +} + +static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari) +{ + int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE; + int start_uuar; + int i; + + start_uuar = nuuars - uuari->num_low_latency_uuars; + for (i = start_uuar; i < nuuars; i++) { + if (!test_bit(i, uuari->bitmap)) { + set_bit(i, uuari->bitmap); + uuari->count[i]++; + return i; + } + } + + return -ENOMEM; +} + +static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari) +{ + int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE; + int minidx = 1; + int uuarn; + int end; + int i; + + end = nuuars - uuari->num_low_latency_uuars; + + for (i = 1; i < end; i++) { + uuarn = i & 3; + if (uuarn == 2 || uuarn == 3) + continue; + + if (uuari->count[i] < uuari->count[minidx]) + minidx = i; + } + + uuari->count[minidx]++; + return minidx; +} + +static int alloc_uuar(struct mlx5_uuar_info *uuari, + enum mlx5_ib_latency_class lat) +{ + int uuarn = -EINVAL; + + mutex_lock(&uuari->lock); + switch (lat) { + case MLX5_IB_LATENCY_CLASS_LOW: + uuarn = 0; + uuari->count[uuarn]++; + break; + + case MLX5_IB_LATENCY_CLASS_MEDIUM: + uuarn = alloc_med_class_uuar(uuari); + break; + + case MLX5_IB_LATENCY_CLASS_HIGH: + uuarn = alloc_high_class_uuar(uuari); + break; + + case MLX5_IB_LATENCY_CLASS_FAST_PATH: + uuarn = 2; + break; + } + mutex_unlock(&uuari->lock); + + return uuarn; +} + +static void free_med_class_uuar(struct mlx5_uuar_info *uuari, int uuarn) +{ + clear_bit(uuarn, uuari->bitmap); + --uuari->count[uuarn]; +} + +static void free_high_class_uuar(struct mlx5_uuar_info *uuari, int uuarn) +{ + clear_bit(uuarn, uuari->bitmap); + --uuari->count[uuarn]; +} + +static void free_uuar(struct mlx5_uuar_info *uuari, int uuarn) +{ + int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE; + int high_uuar = nuuars - uuari->num_low_latency_uuars; + + mutex_lock(&uuari->lock); + if (uuarn == 0) { + --uuari->count[uuarn]; + goto out; + } + + if (uuarn < high_uuar) { + free_med_class_uuar(uuari, uuarn); + goto out; + } + + free_high_class_uuar(uuari, uuarn); + +out: + mutex_unlock(&uuari->lock); +} + +static enum mlx5_qp_state to_mlx5_state(enum ib_qp_state state) +{ + switch (state) { + case IB_QPS_RESET: return MLX5_QP_STATE_RST; + case IB_QPS_INIT: return MLX5_QP_STATE_INIT; + case IB_QPS_RTR: return MLX5_QP_STATE_RTR; + case IB_QPS_RTS: return MLX5_QP_STATE_RTS; + case IB_QPS_SQD: return MLX5_QP_STATE_SQD; + case IB_QPS_SQE: return MLX5_QP_STATE_SQER; + case IB_QPS_ERR: return MLX5_QP_STATE_ERR; + default: return -1; + } +} + +static int to_mlx5_st(enum ib_qp_type type) +{ + switch (type) { + case IB_QPT_RC: return MLX5_QP_ST_RC; + case IB_QPT_UC: return MLX5_QP_ST_UC; + case IB_QPT_UD: return MLX5_QP_ST_UD; + case MLX5_IB_QPT_REG_UMR: return MLX5_QP_ST_REG_UMR; + case IB_QPT_XRC_INI: + case IB_QPT_XRC_TGT: return MLX5_QP_ST_XRC; + case IB_QPT_SMI: return MLX5_QP_ST_QP0; + case IB_QPT_GSI: return MLX5_QP_ST_QP1; + case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6; + case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE; + case IB_QPT_RAW_PACKET: + case IB_QPT_MAX: + default: return -EINVAL; + } +} + +static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn) +{ + return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index; +} + +static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, + struct mlx5_ib_qp *qp, struct ib_udata *udata, + struct mlx5_create_qp_mbox_in **in, + struct mlx5_ib_create_qp_resp *resp, int *inlen) +{ + struct mlx5_ib_ucontext *context; + struct mlx5_ib_create_qp ucmd; + int page_shift; + int uar_index; + int npages; + u32 offset; + int uuarn; + int ncont; + int err; + + err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); + if (err) { + mlx5_ib_dbg(dev, "copy failed\n"); + return err; + } + + context = to_mucontext(pd->uobject->context); + /* + * TBD: should come from the verbs when we have the API + */ + uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH); + if (uuarn < 0) { + mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n"); + mlx5_ib_dbg(dev, "reverting to high latency\n"); + uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW); + if (uuarn < 0) { + mlx5_ib_dbg(dev, "uuar allocation failed\n"); + return uuarn; + } + } + + uar_index = uuarn_to_uar_index(&context->uuari, uuarn); + mlx5_ib_dbg(dev, "uuarn 0x%x, uar_index 0x%x\n", uuarn, uar_index); + + err = set_user_buf_size(dev, qp, &ucmd); + if (err) + goto err_uuar; + + qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, + qp->buf_size, 0, 0); + if (IS_ERR(qp->umem)) { + mlx5_ib_dbg(dev, "umem_get failed\n"); + err = PTR_ERR(qp->umem); + goto err_uuar; + } + + mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift, + &ncont, NULL); + err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset); + if (err) { + mlx5_ib_warn(dev, "bad offset\n"); + goto err_umem; + } + mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n", + ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset); + + *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont; + *in = mlx5_vzalloc(*inlen); + if (!*in) { + err = -ENOMEM; + goto err_umem; + } + mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0); + (*in)->ctx.log_pg_sz_remote_qpn = + cpu_to_be32((page_shift - PAGE_SHIFT) << 24); + (*in)->ctx.params2 = cpu_to_be32(offset << 6); + + (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index); + resp->uuar_index = uuarn; + qp->uuarn = uuarn; + + err = mlx5_ib_db_map_user(context, ucmd.db_addr, &qp->db); + if (err) { + mlx5_ib_dbg(dev, "map failed\n"); + goto err_free; + } + + err = ib_copy_to_udata(udata, resp, sizeof(*resp)); + if (err) { + mlx5_ib_dbg(dev, "copy failed\n"); + goto err_unmap; + } + qp->create_type = MLX5_QP_USER; + + return 0; + +err_unmap: + mlx5_ib_db_unmap_user(context, &qp->db); + +err_free: + mlx5_vfree(*in); + +err_umem: + ib_umem_release(qp->umem); + +err_uuar: + free_uuar(&context->uuari, uuarn); + return err; +} + +static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp) +{ + struct mlx5_ib_ucontext *context; + + context = to_mucontext(pd->uobject->context); + mlx5_ib_db_unmap_user(context, &qp->db); + ib_umem_release(qp->umem); + free_uuar(&context->uuari, qp->uuarn); +} + +static int create_kernel_qp(struct mlx5_ib_dev *dev, + struct ib_qp_init_attr *init_attr, + struct mlx5_ib_qp *qp, + struct mlx5_create_qp_mbox_in **in, int *inlen) +{ + enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW; + struct mlx5_uuar_info *uuari; + int uar_index; + int uuarn; + int err; + + uuari = &dev->mdev.priv.uuari; + if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) + qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK; + + if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) + lc = MLX5_IB_LATENCY_CLASS_FAST_PATH; + + uuarn = alloc_uuar(uuari, lc); + if (uuarn < 0) { + mlx5_ib_dbg(dev, "\n"); + return -ENOMEM; + } + + qp->bf = &uuari->bfs[uuarn]; + uar_index = qp->bf->uar->index; + + err = calc_sq_size(dev, init_attr, qp); + if (err < 0) { + mlx5_ib_dbg(dev, "err %d\n", err); + goto err_uuar; + } + + qp->rq.offset = 0; + qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; + qp->buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift); + + err = mlx5_buf_alloc(&dev->mdev, qp->buf_size, PAGE_SIZE * 2, &qp->buf); + if (err) { + mlx5_ib_dbg(dev, "err %d\n", err); + goto err_uuar; + } + + qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt); + *inlen = sizeof(**in) + sizeof(*(*in)->pas) * qp->buf.npages; + *in = mlx5_vzalloc(*inlen); + if (!*in) { + err = -ENOMEM; + goto err_buf; + } + (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index); + (*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((qp->buf.page_shift - PAGE_SHIFT) << 24); + /* Set "fast registration enabled" for all kernel QPs */ + (*in)->ctx.params1 |= cpu_to_be32(1 << 11); + (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4); + + mlx5_fill_page_array(&qp->buf, (*in)->pas); + + err = mlx5_db_alloc(&dev->mdev, &qp->db); + if (err) { + mlx5_ib_dbg(dev, "err %d\n", err); + goto err_free; + } + + qp->db.db[0] = 0; + qp->db.db[1] = 0; + + qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wrid), GFP_KERNEL); + qp->sq.wr_data = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_data), GFP_KERNEL); + qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(*qp->rq.wrid), GFP_KERNEL); + qp->sq.w_list = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.w_list), GFP_KERNEL); + qp->sq.wqe_head = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wqe_head), GFP_KERNEL); + + if (!qp->sq.wrid || !qp->sq.wr_data || !qp->rq.wrid || + !qp->sq.w_list || !qp->sq.wqe_head) { + err = -ENOMEM; + goto err_wrid; + } + qp->create_type = MLX5_QP_KERNEL; + + return 0; + +err_wrid: + mlx5_db_free(&dev->mdev, &qp->db); + kfree(qp->sq.wqe_head); + kfree(qp->sq.w_list); + kfree(qp->sq.wrid); + kfree(qp->sq.wr_data); + kfree(qp->rq.wrid); + +err_free: + mlx5_vfree(*in); + +err_buf: + mlx5_buf_free(&dev->mdev, &qp->buf); + +err_uuar: + free_uuar(&dev->mdev.priv.uuari, uuarn); + return err; +} + +static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) +{ + mlx5_db_free(&dev->mdev, &qp->db); + kfree(qp->sq.wqe_head); + kfree(qp->sq.w_list); + kfree(qp->sq.wrid); + kfree(qp->sq.wr_data); + kfree(qp->rq.wrid); + mlx5_buf_free(&dev->mdev, &qp->buf); + free_uuar(&dev->mdev.priv.uuari, qp->bf->uuarn); +} + +static __be32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr) +{ + if (attr->srq || (attr->qp_type == IB_QPT_XRC_TGT) || + (attr->qp_type == IB_QPT_XRC_INI)) + return cpu_to_be32(MLX5_SRQ_RQ); + else if (!qp->has_rq) + return cpu_to_be32(MLX5_ZERO_LEN_RQ); + else + return cpu_to_be32(MLX5_NON_ZERO_RQ); +} + +static int is_connected(enum ib_qp_type qp_type) +{ + if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC) + return 1; + + return 0; +} + +static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata, struct mlx5_ib_qp *qp) +{ + struct mlx5_ib_resources *devr = &dev->devr; + struct mlx5_ib_create_qp_resp resp; + struct mlx5_create_qp_mbox_in *in; + struct mlx5_ib_create_qp ucmd; + int inlen = sizeof(*in); + int err; + + mutex_init(&qp->mutex); + spin_lock_init(&qp->sq.lock); + spin_lock_init(&qp->rq.lock); + + if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) + qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; + + if (pd && pd->uobject) { + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { + mlx5_ib_dbg(dev, "copy failed\n"); + return -EFAULT; + } + + qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE); + qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE); + } else { + qp->wq_sig = !!wq_signature; + } + + qp->has_rq = qp_has_rq(init_attr); + err = set_rq_size(dev, &init_attr->cap, qp->has_rq, + qp, (pd && pd->uobject) ? &ucmd : NULL); + if (err) { + mlx5_ib_dbg(dev, "err %d\n", err); + return err; + } + + if (pd) { + if (pd->uobject) { + mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count); + if (ucmd.rq_wqe_shift != qp->rq.wqe_shift || + ucmd.rq_wqe_count != qp->rq.wqe_cnt) { + mlx5_ib_dbg(dev, "invalid rq params\n"); + return -EINVAL; + } + if (ucmd.sq_wqe_count > dev->mdev.caps.max_wqes) { + mlx5_ib_dbg(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n", + ucmd.sq_wqe_count, dev->mdev.caps.max_wqes); + return -EINVAL; + } + err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen); + if (err) + mlx5_ib_dbg(dev, "err %d\n", err); + } else { + err = create_kernel_qp(dev, init_attr, qp, &in, &inlen); + if (err) + mlx5_ib_dbg(dev, "err %d\n", err); + else + qp->pa_lkey = to_mpd(pd)->pa_lkey; + } + + if (err) + return err; + } else { + in = mlx5_vzalloc(sizeof(*in)); + if (!in) + return -ENOMEM; + + qp->create_type = MLX5_QP_EMPTY; + } + + if (is_sqp(init_attr->qp_type)) + qp->port = init_attr->port_num; + + in->ctx.flags = cpu_to_be32(to_mlx5_st(init_attr->qp_type) << 16 | + MLX5_QP_PM_MIGRATED << 11); + + if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR) + in->ctx.flags_pd = cpu_to_be32(to_mpd(pd ? pd : devr->p0)->pdn); + else + in->ctx.flags_pd = cpu_to_be32(MLX5_QP_LAT_SENSITIVE); + + if (qp->wq_sig) + in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_ENABLE_SIG); + + if (qp->scat_cqe && is_connected(init_attr->qp_type)) { + int rcqe_sz; + int scqe_sz; + + rcqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->recv_cq); + scqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->send_cq); + + if (rcqe_sz == 128) + in->ctx.cs_res = MLX5_RES_SCAT_DATA64_CQE; + else + in->ctx.cs_res = MLX5_RES_SCAT_DATA32_CQE; + + if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) { + if (scqe_sz == 128) + in->ctx.cs_req = MLX5_REQ_SCAT_DATA64_CQE; + else + in->ctx.cs_req = MLX5_REQ_SCAT_DATA32_CQE; + } + } + + if (qp->rq.wqe_cnt) { + in->ctx.rq_size_stride = (qp->rq.wqe_shift - 4); + in->ctx.rq_size_stride |= ilog2(qp->rq.wqe_cnt) << 3; + } + + in->ctx.rq_type_srqn = get_rx_type(qp, init_attr); + + if (qp->sq.wqe_cnt) + in->ctx.sq_crq_size |= cpu_to_be16(ilog2(qp->sq.wqe_cnt) << 11); + else + in->ctx.sq_crq_size |= cpu_to_be16(0x8000); + + /* Set default resources */ + switch (init_attr->qp_type) { + case IB_QPT_XRC_TGT: + in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn); + in->ctx.cqn_send = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn); + in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn); + in->ctx.xrcd = cpu_to_be32(to_mxrcd(init_attr->xrcd)->xrcdn); + break; + case IB_QPT_XRC_INI: + in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn); + in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn); + in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn); + break; + default: + if (init_attr->srq) { + in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x0)->xrcdn); + in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(init_attr->srq)->msrq.srqn); + } else { + in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn); + in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn); + } + } + + if (init_attr->send_cq) + in->ctx.cqn_send = cpu_to_be32(to_mcq(init_attr->send_cq)->mcq.cqn); + + if (init_attr->recv_cq) + in->ctx.cqn_recv = cpu_to_be32(to_mcq(init_attr->recv_cq)->mcq.cqn); + + in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma); + + err = mlx5_core_create_qp(&dev->mdev, &qp->mqp, in, inlen); + if (err) { + mlx5_ib_dbg(dev, "create qp failed\n"); + goto err_create; + } + + mlx5_vfree(in); + /* Hardware wants QPN written in big-endian order (after + * shifting) for send doorbell. Precompute this value to save + * a little bit when posting sends. + */ + qp->doorbell_qpn = swab32(qp->mqp.qpn << 8); + + qp->mqp.event = mlx5_ib_qp_event; + + return 0; + +err_create: + if (qp->create_type == MLX5_QP_USER) + destroy_qp_user(pd, qp); + else if (qp->create_type == MLX5_QP_KERNEL) + destroy_qp_kernel(dev, qp); + + mlx5_vfree(in); + return err; +} + +static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq) + __acquires(&send_cq->lock) __acquires(&recv_cq->lock) +{ + if (send_cq) { + if (recv_cq) { + if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { + spin_lock_irq(&send_cq->lock); + spin_lock_nested(&recv_cq->lock, + SINGLE_DEPTH_NESTING); + } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) { + spin_lock_irq(&send_cq->lock); + __acquire(&recv_cq->lock); + } else { + spin_lock_irq(&recv_cq->lock); + spin_lock_nested(&send_cq->lock, + SINGLE_DEPTH_NESTING); + } + } else { + spin_lock_irq(&send_cq->lock); + } + } else if (recv_cq) { + spin_lock_irq(&recv_cq->lock); + } +} + +static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq) + __releases(&send_cq->lock) __releases(&recv_cq->lock) +{ + if (send_cq) { + if (recv_cq) { + if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { + spin_unlock(&recv_cq->lock); + spin_unlock_irq(&send_cq->lock); + } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) { + __release(&recv_cq->lock); + spin_unlock_irq(&send_cq->lock); + } else { + spin_unlock(&send_cq->lock); + spin_unlock_irq(&recv_cq->lock); + } + } else { + spin_unlock_irq(&send_cq->lock); + } + } else if (recv_cq) { + spin_unlock_irq(&recv_cq->lock); + } +} + +static struct mlx5_ib_pd *get_pd(struct mlx5_ib_qp *qp) +{ + return to_mpd(qp->ibqp.pd); +} + +static void get_cqs(struct mlx5_ib_qp *qp, + struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq) +{ + switch (qp->ibqp.qp_type) { + case IB_QPT_XRC_TGT: + *send_cq = NULL; + *recv_cq = NULL; + break; + case MLX5_IB_QPT_REG_UMR: + case IB_QPT_XRC_INI: + *send_cq = to_mcq(qp->ibqp.send_cq); + *recv_cq = NULL; + break; + + case IB_QPT_SMI: + case IB_QPT_GSI: + case IB_QPT_RC: + case IB_QPT_UC: + case IB_QPT_UD: + case IB_QPT_RAW_IPV6: + case IB_QPT_RAW_ETHERTYPE: + *send_cq = to_mcq(qp->ibqp.send_cq); + *recv_cq = to_mcq(qp->ibqp.recv_cq); + break; + + case IB_QPT_RAW_PACKET: + case IB_QPT_MAX: + default: + *send_cq = NULL; + *recv_cq = NULL; + break; + } +} + +static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) +{ + struct mlx5_ib_cq *send_cq, *recv_cq; + struct mlx5_modify_qp_mbox_in *in; + int err; + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) + return; + if (qp->state != IB_QPS_RESET) + if (mlx5_core_qp_modify(&dev->mdev, to_mlx5_state(qp->state), + MLX5_QP_STATE_RST, in, sizeof(*in), &qp->mqp)) + mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n", + qp->mqp.qpn); + + get_cqs(qp, &send_cq, &recv_cq); + + if (qp->create_type == MLX5_QP_KERNEL) { + mlx5_ib_lock_cqs(send_cq, recv_cq); + __mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn, + qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); + if (send_cq != recv_cq) + __mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); + mlx5_ib_unlock_cqs(send_cq, recv_cq); + } + + err = mlx5_core_destroy_qp(&dev->mdev, &qp->mqp); + if (err) + mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n", qp->mqp.qpn); + kfree(in); + + + if (qp->create_type == MLX5_QP_KERNEL) + destroy_qp_kernel(dev, qp); + else if (qp->create_type == MLX5_QP_USER) + destroy_qp_user(&get_pd(qp)->ibpd, qp); +} + +static const char *ib_qp_type_str(enum ib_qp_type type) +{ + switch (type) { + case IB_QPT_SMI: + return "IB_QPT_SMI"; + case IB_QPT_GSI: + return "IB_QPT_GSI"; + case IB_QPT_RC: + return "IB_QPT_RC"; + case IB_QPT_UC: + return "IB_QPT_UC"; + case IB_QPT_UD: + return "IB_QPT_UD"; + case IB_QPT_RAW_IPV6: + return "IB_QPT_RAW_IPV6"; + case IB_QPT_RAW_ETHERTYPE: + return "IB_QPT_RAW_ETHERTYPE"; + case IB_QPT_XRC_INI: + return "IB_QPT_XRC_INI"; + case IB_QPT_XRC_TGT: + return "IB_QPT_XRC_TGT"; + case IB_QPT_RAW_PACKET: + return "IB_QPT_RAW_PACKET"; + case MLX5_IB_QPT_REG_UMR: + return "MLX5_IB_QPT_REG_UMR"; + case IB_QPT_MAX: + default: + return "Invalid QP type"; + } +} + +struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev; + struct mlx5_ib_qp *qp; + u16 xrcdn = 0; + int err; + + if (pd) { + dev = to_mdev(pd->device); + } else { + /* being cautious here */ + if (init_attr->qp_type != IB_QPT_XRC_TGT && + init_attr->qp_type != MLX5_IB_QPT_REG_UMR) { + pr_warn("%s: no PD for transport %s\n", __func__, + ib_qp_type_str(init_attr->qp_type)); + return ERR_PTR(-EINVAL); + } + dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device); + } + + switch (init_attr->qp_type) { + case IB_QPT_XRC_TGT: + case IB_QPT_XRC_INI: + if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_XRC)) { + mlx5_ib_dbg(dev, "XRC not supported\n"); + return ERR_PTR(-ENOSYS); + } + init_attr->recv_cq = NULL; + if (init_attr->qp_type == IB_QPT_XRC_TGT) { + xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn; + init_attr->send_cq = NULL; + } + + /* fall through */ + case IB_QPT_RC: + case IB_QPT_UC: + case IB_QPT_UD: + case IB_QPT_SMI: + case IB_QPT_GSI: + case MLX5_IB_QPT_REG_UMR: + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) + return ERR_PTR(-ENOMEM); + + err = create_qp_common(dev, pd, init_attr, udata, qp); + if (err) { + mlx5_ib_dbg(dev, "create_qp_common failed\n"); + kfree(qp); + return ERR_PTR(err); + } + + if (is_qp0(init_attr->qp_type)) + qp->ibqp.qp_num = 0; + else if (is_qp1(init_attr->qp_type)) + qp->ibqp.qp_num = 1; + else + qp->ibqp.qp_num = qp->mqp.qpn; + + mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n", + qp->ibqp.qp_num, qp->mqp.qpn, to_mcq(init_attr->recv_cq)->mcq.cqn, + to_mcq(init_attr->send_cq)->mcq.cqn); + + qp->xrcdn = xrcdn; + + break; + + case IB_QPT_RAW_IPV6: + case IB_QPT_RAW_ETHERTYPE: + case IB_QPT_RAW_PACKET: + case IB_QPT_MAX: + default: + mlx5_ib_dbg(dev, "unsupported qp type %d\n", + init_attr->qp_type); + /* Don't support raw QPs */ + return ERR_PTR(-EINVAL); + } + + return &qp->ibqp; +} + +int mlx5_ib_destroy_qp(struct ib_qp *qp) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_ib_qp *mqp = to_mqp(qp); + + destroy_qp_common(dev, mqp); + + kfree(mqp); + + return 0; +} + +static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_attr *attr, + int attr_mask) +{ + u32 hw_access_flags = 0; + u8 dest_rd_atomic; + u32 access_flags; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + dest_rd_atomic = attr->max_dest_rd_atomic; + else + dest_rd_atomic = qp->resp_depth; + + if (attr_mask & IB_QP_ACCESS_FLAGS) + access_flags = attr->qp_access_flags; + else + access_flags = qp->atomic_rd_en; + + if (!dest_rd_atomic) + access_flags &= IB_ACCESS_REMOTE_WRITE; + + if (access_flags & IB_ACCESS_REMOTE_READ) + hw_access_flags |= MLX5_QP_BIT_RRE; + if (access_flags & IB_ACCESS_REMOTE_ATOMIC) + hw_access_flags |= (MLX5_QP_BIT_RAE | MLX5_ATOMIC_MODE_CX); + if (access_flags & IB_ACCESS_REMOTE_WRITE) + hw_access_flags |= MLX5_QP_BIT_RWE; + + return cpu_to_be32(hw_access_flags); +} + +enum { + MLX5_PATH_FLAG_FL = 1 << 0, + MLX5_PATH_FLAG_FREE_AR = 1 << 1, + MLX5_PATH_FLAG_COUNTER = 1 << 2, +}; + +static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) +{ + if (rate == IB_RATE_PORT_CURRENT) { + return 0; + } else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) { + return -EINVAL; + } else { + while (rate != IB_RATE_2_5_GBPS && + !(1 << (rate + MLX5_STAT_RATE_OFFSET) & + dev->mdev.caps.stat_rate_support)) + --rate; + } + + return rate + MLX5_STAT_RATE_OFFSET; +} + +static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah, + struct mlx5_qp_path *path, u8 port, int attr_mask, + u32 path_flags, const struct ib_qp_attr *attr) +{ + int err; + + path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0; + path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0; + + if (attr_mask & IB_QP_PKEY_INDEX) + path->pkey_index = attr->pkey_index; + + path->grh_mlid = ah->src_path_bits & 0x7f; + path->rlid = cpu_to_be16(ah->dlid); + + if (ah->ah_flags & IB_AH_GRH) { + path->grh_mlid |= 1 << 7; + path->mgid_index = ah->grh.sgid_index; + path->hop_limit = ah->grh.hop_limit; + path->tclass_flowlabel = + cpu_to_be32((ah->grh.traffic_class << 20) | + (ah->grh.flow_label)); + memcpy(path->rgid, ah->grh.dgid.raw, 16); + } + + err = ib_rate_to_mlx5(dev, ah->static_rate); + if (err < 0) + return err; + path->static_rate = err; + path->port = port; + + if (ah->ah_flags & IB_AH_GRH) { + if (ah->grh.sgid_index >= dev->mdev.caps.port[port - 1].gid_table_len) { + pr_err(KERN_ERR "sgid_index (%u) too large. max is %d\n", + ah->grh.sgid_index, dev->mdev.caps.port[port - 1].gid_table_len); + return -EINVAL; + } + + path->grh_mlid |= 1 << 7; + path->mgid_index = ah->grh.sgid_index; + path->hop_limit = ah->grh.hop_limit; + path->tclass_flowlabel = + cpu_to_be32((ah->grh.traffic_class << 20) | + (ah->grh.flow_label)); + memcpy(path->rgid, ah->grh.dgid.raw, 16); + } + + if (attr_mask & IB_QP_TIMEOUT) + path->ackto_lt = attr->timeout << 3; + + path->sl = ah->sl & 0xf; + + return 0; +} + +static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_QP_ST_MAX] = { + [MLX5_QP_STATE_INIT] = { + [MLX5_QP_STATE_INIT] = { + [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_PRI_PORT, + [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_PRI_PORT, + [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_Q_KEY | + MLX5_QP_OPTPAR_PRI_PORT, + }, + [MLX5_QP_STATE_RTR] = { + [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | + MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PKEY_INDEX, + [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PKEY_INDEX, + [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_Q_KEY, + [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_Q_KEY, + }, + }, + [MLX5_QP_STATE_RTR] = { + [MLX5_QP_STATE_RTS] = { + [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | + MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PM_STATE | + MLX5_QP_OPTPAR_RNR_TIMEOUT, + [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PM_STATE, + [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY, + }, + }, + [MLX5_QP_STATE_RTS] = { + [MLX5_QP_STATE_RTS] = { + [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_RNR_TIMEOUT | + MLX5_QP_OPTPAR_PM_STATE, + [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PM_STATE, + [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY | + MLX5_QP_OPTPAR_SRQN | + MLX5_QP_OPTPAR_CQN_RCV, + }, + }, + [MLX5_QP_STATE_SQER] = { + [MLX5_QP_STATE_RTS] = { + [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY, + [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY, + }, + }, +}; + +static int ib_nr_to_mlx5_nr(int ib_mask) +{ + switch (ib_mask) { + case IB_QP_STATE: + return 0; + case IB_QP_CUR_STATE: + return 0; + case IB_QP_EN_SQD_ASYNC_NOTIFY: + return 0; + case IB_QP_ACCESS_FLAGS: + return MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE; + case IB_QP_PKEY_INDEX: + return MLX5_QP_OPTPAR_PKEY_INDEX; + case IB_QP_PORT: + return MLX5_QP_OPTPAR_PRI_PORT; + case IB_QP_QKEY: + return MLX5_QP_OPTPAR_Q_KEY; + case IB_QP_AV: + return MLX5_QP_OPTPAR_PRIMARY_ADDR_PATH | + MLX5_QP_OPTPAR_PRI_PORT; + case IB_QP_PATH_MTU: + return 0; + case IB_QP_TIMEOUT: + return MLX5_QP_OPTPAR_ACK_TIMEOUT; + case IB_QP_RETRY_CNT: + return MLX5_QP_OPTPAR_RETRY_COUNT; + case IB_QP_RNR_RETRY: + return MLX5_QP_OPTPAR_RNR_RETRY; + case IB_QP_RQ_PSN: + return 0; + case IB_QP_MAX_QP_RD_ATOMIC: + return MLX5_QP_OPTPAR_SRA_MAX; + case IB_QP_ALT_PATH: + return MLX5_QP_OPTPAR_ALT_ADDR_PATH; + case IB_QP_MIN_RNR_TIMER: + return MLX5_QP_OPTPAR_RNR_TIMEOUT; + case IB_QP_SQ_PSN: + return 0; + case IB_QP_MAX_DEST_RD_ATOMIC: + return MLX5_QP_OPTPAR_RRA_MAX | MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_RRE | MLX5_QP_OPTPAR_RAE; + case IB_QP_PATH_MIG_STATE: + return MLX5_QP_OPTPAR_PM_STATE; + case IB_QP_CAP: + return 0; + case IB_QP_DEST_QPN: + return 0; + } + return 0; +} + +static int ib_mask_to_mlx5_opt(int ib_mask) +{ + int result = 0; + int i; + + for (i = 0; i < 8 * sizeof(int); i++) { + if ((1 << i) & ib_mask) + result |= ib_nr_to_mlx5_nr(1 << i); + } + + return result; +} + +static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, + const struct ib_qp_attr *attr, int attr_mask, + enum ib_qp_state cur_state, enum ib_qp_state new_state) +{ + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_ib_cq *send_cq, *recv_cq; + struct mlx5_qp_context *context; + struct mlx5_modify_qp_mbox_in *in; + struct mlx5_ib_pd *pd; + enum mlx5_qp_state mlx5_cur, mlx5_new; + enum mlx5_qp_optpar optpar; + int sqd_event; + int mlx5_st; + int err; + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + context = &in->ctx; + err = to_mlx5_st(ibqp->qp_type); + if (err < 0) + goto out; + + context->flags = cpu_to_be32(err << 16); + + if (!(attr_mask & IB_QP_PATH_MIG_STATE)) { + context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11); + } else { + switch (attr->path_mig_state) { + case IB_MIG_MIGRATED: + context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11); + break; + case IB_MIG_REARM: + context->flags |= cpu_to_be32(MLX5_QP_PM_REARM << 11); + break; + case IB_MIG_ARMED: + context->flags |= cpu_to_be32(MLX5_QP_PM_ARMED << 11); + break; + } + } + + if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) { + context->mtu_msgmax = (IB_MTU_256 << 5) | 8; + } else if (ibqp->qp_type == IB_QPT_UD || + ibqp->qp_type == MLX5_IB_QPT_REG_UMR) { + context->mtu_msgmax = (IB_MTU_4096 << 5) | 12; + } else if (attr_mask & IB_QP_PATH_MTU) { + if (attr->path_mtu < IB_MTU_256 || + attr->path_mtu > IB_MTU_4096) { + mlx5_ib_warn(dev, "invalid mtu %d\n", attr->path_mtu); + err = -EINVAL; + goto out; + } + context->mtu_msgmax = (attr->path_mtu << 5) | dev->mdev.caps.log_max_msg; + } + + if (attr_mask & IB_QP_DEST_QPN) + context->log_pg_sz_remote_qpn = cpu_to_be32(attr->dest_qp_num); + + if (attr_mask & IB_QP_PKEY_INDEX) + context->pri_path.pkey_index = attr->pkey_index; + + /* todo implement counter_index functionality */ + + if (is_sqp(ibqp->qp_type)) + context->pri_path.port = qp->port; + + if (attr_mask & IB_QP_PORT) + context->pri_path.port = attr->port_num; + + if (attr_mask & IB_QP_AV) { + err = mlx5_set_path(dev, &attr->ah_attr, &context->pri_path, + attr_mask & IB_QP_PORT ? attr->port_num : qp->port, + attr_mask, 0, attr); + if (err) + goto out; + } + + if (attr_mask & IB_QP_TIMEOUT) + context->pri_path.ackto_lt |= attr->timeout << 3; + + if (attr_mask & IB_QP_ALT_PATH) { + err = mlx5_set_path(dev, &attr->alt_ah_attr, &context->alt_path, + attr->alt_port_num, attr_mask, 0, attr); + if (err) + goto out; + } + + pd = get_pd(qp); + get_cqs(qp, &send_cq, &recv_cq); + + context->flags_pd = cpu_to_be32(pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn); + context->cqn_send = send_cq ? cpu_to_be32(send_cq->mcq.cqn) : 0; + context->cqn_recv = recv_cq ? cpu_to_be32(recv_cq->mcq.cqn) : 0; + context->params1 = cpu_to_be32(MLX5_IB_ACK_REQ_FREQ << 28); + + if (attr_mask & IB_QP_RNR_RETRY) + context->params1 |= cpu_to_be32(attr->rnr_retry << 13); + + if (attr_mask & IB_QP_RETRY_CNT) + context->params1 |= cpu_to_be32(attr->retry_cnt << 16); + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { + if (attr->max_rd_atomic) + context->params1 |= + cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21); + } + + if (attr_mask & IB_QP_SQ_PSN) + context->next_send_psn = cpu_to_be32(attr->sq_psn); + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { + if (attr->max_dest_rd_atomic) + context->params2 |= + cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21); + } + + if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) + context->params2 |= to_mlx5_access_flags(qp, attr, attr_mask); + + if (attr_mask & IB_QP_MIN_RNR_TIMER) + context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24); + + if (attr_mask & IB_QP_RQ_PSN) + context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn); + + if (attr_mask & IB_QP_QKEY) + context->qkey = cpu_to_be32(attr->qkey); + + if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + context->db_rec_addr = cpu_to_be64(qp->db.dma); + + if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && + attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify) + sqd_event = 1; + else + sqd_event = 0; + + if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + context->sq_crq_size |= cpu_to_be16(1 << 4); + + + mlx5_cur = to_mlx5_state(cur_state); + mlx5_new = to_mlx5_state(new_state); + mlx5_st = to_mlx5_st(ibqp->qp_type); + if (mlx5_cur < 0 || mlx5_new < 0 || mlx5_st < 0) + goto out; + + optpar = ib_mask_to_mlx5_opt(attr_mask); + optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st]; + in->optparam = cpu_to_be32(optpar); + err = mlx5_core_qp_modify(&dev->mdev, to_mlx5_state(cur_state), + to_mlx5_state(new_state), in, sqd_event, + &qp->mqp); + if (err) + goto out; + + qp->state = new_state; + + if (attr_mask & IB_QP_ACCESS_FLAGS) + qp->atomic_rd_en = attr->qp_access_flags; + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + qp->resp_depth = attr->max_dest_rd_atomic; + if (attr_mask & IB_QP_PORT) + qp->port = attr->port_num; + if (attr_mask & IB_QP_ALT_PATH) + qp->alt_port = attr->alt_port_num; + + /* + * If we moved a kernel QP to RESET, clean up all old CQ + * entries and reinitialize the QP. + */ + if (new_state == IB_QPS_RESET && !ibqp->uobject) { + mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn, + ibqp->srq ? to_msrq(ibqp->srq) : NULL); + if (send_cq != recv_cq) + mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); + + qp->rq.head = 0; + qp->rq.tail = 0; + qp->sq.head = 0; + qp->sq.tail = 0; + qp->sq.cur_post = 0; + qp->sq.last_poll = 0; + qp->db.db[MLX5_RCV_DBR] = 0; + qp->db.db[MLX5_SND_DBR] = 0; + } + +out: + kfree(in); + return err; +} + +int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_ib_qp *qp = to_mqp(ibqp); + enum ib_qp_state cur_state, new_state; + int err = -EINVAL; + int port; + + mutex_lock(&qp->mutex); + + cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; + new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; + + if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR && + !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) + goto out; + + if ((attr_mask & IB_QP_PORT) && + (attr->port_num == 0 || attr->port_num > dev->mdev.caps.num_ports)) + goto out; + + if (attr_mask & IB_QP_PKEY_INDEX) { + port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; + if (attr->pkey_index >= dev->mdev.caps.port[port - 1].pkey_table_len) + goto out; + } + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && + attr->max_rd_atomic > dev->mdev.caps.max_ra_res_qp) + goto out; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && + attr->max_dest_rd_atomic > dev->mdev.caps.max_ra_req_qp) + goto out; + + if (cur_state == new_state && cur_state == IB_QPS_RESET) { + err = 0; + goto out; + } + + err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); + +out: + mutex_unlock(&qp->mutex); + return err; +} + +static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq) +{ + struct mlx5_ib_cq *cq; + unsigned cur; + + cur = wq->head - wq->tail; + if (likely(cur + nreq < wq->max_post)) + return 0; + + cq = to_mcq(ib_cq); + spin_lock(&cq->lock); + cur = wq->head - wq->tail; + spin_unlock(&cq->lock); + + return cur + nreq >= wq->max_post; +} + +static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg, + u64 remote_addr, u32 rkey) +{ + rseg->raddr = cpu_to_be64(remote_addr); + rseg->rkey = cpu_to_be32(rkey); + rseg->reserved = 0; +} + +static void set_atomic_seg(struct mlx5_wqe_atomic_seg *aseg, struct ib_send_wr *wr) +{ + if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); + aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); + } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask); + } else { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->compare = 0; + } +} + +static void set_masked_atomic_seg(struct mlx5_wqe_masked_atomic_seg *aseg, + struct ib_send_wr *wr) +{ + aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); + aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask); + aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask); +} + +static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, + struct ib_send_wr *wr) +{ + memcpy(&dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof(struct mlx5_av)); + dseg->av.dqp_dct = cpu_to_be32(wr->wr.ud.remote_qpn | MLX5_EXTENDED_UD_AV); + dseg->av.key.qkey.qkey = cpu_to_be32(wr->wr.ud.remote_qkey); +} + +static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg) +{ + dseg->byte_count = cpu_to_be32(sg->length); + dseg->lkey = cpu_to_be32(sg->lkey); + dseg->addr = cpu_to_be64(sg->addr); +} + +static __be16 get_klm_octo(int npages) +{ + return cpu_to_be16(ALIGN(npages, 8) / 2); +} + +static __be64 frwr_mkey_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_LEN | + MLX5_MKEY_MASK_PAGE_SIZE | + MLX5_MKEY_MASK_START_ADDR | + MLX5_MKEY_MASK_EN_RINVAL | + MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_LR | + MLX5_MKEY_MASK_LW | + MLX5_MKEY_MASK_RR | + MLX5_MKEY_MASK_RW | + MLX5_MKEY_MASK_A | + MLX5_MKEY_MASK_SMALL_FENCE | + MLX5_MKEY_MASK_FREE; + + return cpu_to_be64(result); +} + +static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, + struct ib_send_wr *wr, int li) +{ + memset(umr, 0, sizeof(*umr)); + + if (li) { + umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); + umr->flags = 1 << 7; + return; + } + + umr->flags = (1 << 5); /* fail if not free */ + umr->klm_octowords = get_klm_octo(wr->wr.fast_reg.page_list_len); + umr->mkey_mask = frwr_mkey_mask(); +} + +static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, + struct ib_send_wr *wr) +{ + struct umr_wr *umrwr = (struct umr_wr *)&wr->wr.fast_reg; + u64 mask; + + memset(umr, 0, sizeof(*umr)); + + if (!(wr->send_flags & MLX5_IB_SEND_UMR_UNREG)) { + umr->flags = 1 << 5; /* fail if not free */ + umr->klm_octowords = get_klm_octo(umrwr->npages); + mask = MLX5_MKEY_MASK_LEN | + MLX5_MKEY_MASK_PAGE_SIZE | + MLX5_MKEY_MASK_START_ADDR | + MLX5_MKEY_MASK_PD | + MLX5_MKEY_MASK_LR | + MLX5_MKEY_MASK_LW | + MLX5_MKEY_MASK_RR | + MLX5_MKEY_MASK_RW | + MLX5_MKEY_MASK_A | + MLX5_MKEY_MASK_FREE; + umr->mkey_mask = cpu_to_be64(mask); + } else { + umr->flags = 2 << 5; /* fail if free */ + mask = MLX5_MKEY_MASK_FREE; + umr->mkey_mask = cpu_to_be64(mask); + } + + if (!wr->num_sge) + umr->flags |= (1 << 7); /* inline */ +} + +static u8 get_umr_flags(int acc) +{ + return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) | + (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) | + (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) | + (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) | + MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT; +} + +static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr, + int li, int *writ) +{ + memset(seg, 0, sizeof(*seg)); + if (li) { + seg->status = 1 << 6; + return; + } + + seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags); + *writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE); + seg->qpn_mkey7_0 = cpu_to_be32((wr->wr.fast_reg.rkey & 0xff) | 0xffffff00); + seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); + seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start); + seg->len = cpu_to_be64(wr->wr.fast_reg.length); + seg->xlt_oct_size = cpu_to_be32((wr->wr.fast_reg.page_list_len + 1) / 2); + seg->log2_page_size = wr->wr.fast_reg.page_shift; +} + +static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr) +{ + memset(seg, 0, sizeof(*seg)); + if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) { + seg->status = 1 << 6; + return; + } + + seg->flags = convert_access(wr->wr.fast_reg.access_flags); + seg->flags_pd = cpu_to_be32(to_mpd((struct ib_pd *)wr->wr.fast_reg.page_list)->pdn); + seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start); + seg->len = cpu_to_be64(wr->wr.fast_reg.length); + seg->log2_page_size = wr->wr.fast_reg.page_shift; + seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); +} + +static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg, + struct ib_send_wr *wr, + struct mlx5_core_dev *mdev, + struct mlx5_ib_pd *pd, + int writ) +{ + struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list); + u64 *page_list = wr->wr.fast_reg.page_list->page_list; + u64 perm = MLX5_EN_RD | (writ ? MLX5_EN_WR : 0); + int i; + + for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) + mfrpl->mapped_page_list[i] = cpu_to_be64(page_list[i] | perm); + dseg->addr = cpu_to_be64(mfrpl->map); + dseg->byte_count = cpu_to_be32(ALIGN(sizeof(u64) * wr->wr.fast_reg.page_list_len, 64)); + dseg->lkey = cpu_to_be32(pd->pa_lkey); +} + +static __be32 send_ieth(struct ib_send_wr *wr) +{ + switch (wr->opcode) { + case IB_WR_SEND_WITH_IMM: + case IB_WR_RDMA_WRITE_WITH_IMM: + return wr->ex.imm_data; + + case IB_WR_SEND_WITH_INV: + return cpu_to_be32(wr->ex.invalidate_rkey); + + default: + return 0; + } +} + +static u8 calc_sig(void *wqe, int size) +{ + u8 *p = wqe; + u8 res = 0; + int i; + + for (i = 0; i < size; i++) + res ^= p[i]; + + return ~res; +} + +static u8 wq_sig(void *wqe) +{ + return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4); +} + +static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr, + void *wqe, int *sz) +{ + struct mlx5_wqe_inline_seg *seg; + void *qend = qp->sq.qend; + void *addr; + int inl = 0; + int copy; + int len; + int i; + + seg = wqe; + wqe += sizeof(*seg); + for (i = 0; i < wr->num_sge; i++) { + addr = (void *)(unsigned long)(wr->sg_list[i].addr); + len = wr->sg_list[i].length; + inl += len; + + if (unlikely(inl > qp->max_inline_data)) + return -ENOMEM; + + if (unlikely(wqe + len > qend)) { + copy = qend - wqe; + memcpy(wqe, addr, copy); + addr += copy; + len -= copy; + wqe = mlx5_get_send_wqe(qp, 0); + } + memcpy(wqe, addr, len); + wqe += len; + } + + seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG); + + *sz = ALIGN(inl + sizeof(seg->byte_count), 16) / 16; + + return 0; +} + +static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size, + struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp) +{ + int writ = 0; + int li; + + li = wr->opcode == IB_WR_LOCAL_INV ? 1 : 0; + if (unlikely(wr->send_flags & IB_SEND_INLINE)) + return -EINVAL; + + set_frwr_umr_segment(*seg, wr, li); + *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); + *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + set_mkey_segment(*seg, wr, li, &writ); + *seg += sizeof(struct mlx5_mkey_seg); + *size += sizeof(struct mlx5_mkey_seg) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + if (!li) { + set_frwr_pages(*seg, wr, mdev, pd, writ); + *seg += sizeof(struct mlx5_wqe_data_seg); + *size += (sizeof(struct mlx5_wqe_data_seg) / 16); + } + return 0; +} + +static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16) +{ + __be32 *p = NULL; + int tidx = idx; + int i, j; + + pr_debug("dump wqe at %p\n", mlx5_get_send_wqe(qp, tidx)); + for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) { + if ((i & 0xf) == 0) { + void *buf = mlx5_get_send_wqe(qp, tidx); + tidx = (tidx + 1) & (qp->sq.wqe_cnt - 1); + p = buf; + j = 0; + } + pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]), + be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]), + be32_to_cpu(p[j + 3])); + } +} + +static void mlx5_bf_copy(u64 __iomem *dst, u64 *src, + unsigned bytecnt, struct mlx5_ib_qp *qp) +{ + while (bytecnt > 0) { + __iowrite64_copy(dst++, src++, 8); + __iowrite64_copy(dst++, src++, 8); + __iowrite64_copy(dst++, src++, 8); + __iowrite64_copy(dst++, src++, 8); + __iowrite64_copy(dst++, src++, 8); + __iowrite64_copy(dst++, src++, 8); + __iowrite64_copy(dst++, src++, 8); + __iowrite64_copy(dst++, src++, 8); + bytecnt -= 64; + if (unlikely(src == qp->sq.qend)) + src = mlx5_get_send_wqe(qp, 0); + } +} + +static u8 get_fence(u8 fence, struct ib_send_wr *wr) +{ + if (unlikely(wr->opcode == IB_WR_LOCAL_INV && + wr->send_flags & IB_SEND_FENCE)) + return MLX5_FENCE_MODE_STRONG_ORDERING; + + if (unlikely(fence)) { + if (wr->send_flags & IB_SEND_FENCE) + return MLX5_FENCE_MODE_SMALL_AND_FENCE; + else + return fence; + + } else { + return 0; + } +} + +int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_core_dev *mdev = &dev->mdev; + struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_wqe_data_seg *dpseg; + struct mlx5_wqe_xrc_seg *xrc; + struct mlx5_bf *bf = qp->bf; + int uninitialized_var(size); + void *qend = qp->sq.qend; + unsigned long flags; + u32 mlx5_opcode; + unsigned idx; + int err = 0; + int inl = 0; + int num_sge; + void *seg; + int nreq; + int i; + u8 next_fence = 0; + u8 opmod = 0; + u8 fence; + + spin_lock_irqsave(&qp->sq.lock, flags); + + for (nreq = 0; wr; nreq++, wr = wr->next) { + if (unlikely(wr->opcode >= sizeof(mlx5_ib_opcode) / sizeof(mlx5_ib_opcode[0]))) { + mlx5_ib_warn(dev, "\n"); + err = -EINVAL; + *bad_wr = wr; + goto out; + } + + if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + fence = qp->fm_cache; + num_sge = wr->num_sge; + if (unlikely(num_sge > qp->sq.max_gs)) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); + seg = mlx5_get_send_wqe(qp, idx); + ctrl = seg; + *(uint32_t *)(seg + 8) = 0; + ctrl->imm = send_ieth(wr); + ctrl->fm_ce_se = qp->sq_signal_bits | + (wr->send_flags & IB_SEND_SIGNALED ? + MLX5_WQE_CTRL_CQ_UPDATE : 0) | + (wr->send_flags & IB_SEND_SOLICITED ? + MLX5_WQE_CTRL_SOLICITED : 0); + + seg += sizeof(*ctrl); + size = sizeof(*ctrl) / 16; + + switch (ibqp->qp_type) { + case IB_QPT_XRC_INI: + xrc = seg; + xrc->xrc_srqn = htonl(wr->xrc_remote_srq_num); + seg += sizeof(*xrc); + size += sizeof(*xrc) / 16; + /* fall through */ + case IB_QPT_RC: + switch (wr->opcode) { + case IB_WR_RDMA_READ: + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + set_raddr_seg(seg, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); + seg += sizeof(struct mlx5_wqe_raddr_seg); + size += sizeof(struct mlx5_wqe_raddr_seg) / 16; + break; + + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + set_raddr_seg(seg, wr->wr.atomic.remote_addr, + wr->wr.atomic.rkey); + seg += sizeof(struct mlx5_wqe_raddr_seg); + + set_atomic_seg(seg, wr); + seg += sizeof(struct mlx5_wqe_atomic_seg); + + size += (sizeof(struct mlx5_wqe_raddr_seg) + + sizeof(struct mlx5_wqe_atomic_seg)) / 16; + break; + + case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: + set_raddr_seg(seg, wr->wr.atomic.remote_addr, + wr->wr.atomic.rkey); + seg += sizeof(struct mlx5_wqe_raddr_seg); + + set_masked_atomic_seg(seg, wr); + seg += sizeof(struct mlx5_wqe_masked_atomic_seg); + + size += (sizeof(struct mlx5_wqe_raddr_seg) + + sizeof(struct mlx5_wqe_masked_atomic_seg)) / 16; + break; + + case IB_WR_LOCAL_INV: + next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; + qp->sq.wr_data[idx] = IB_WR_LOCAL_INV; + ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey); + err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + num_sge = 0; + break; + + case IB_WR_FAST_REG_MR: + next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; + qp->sq.wr_data[idx] = IB_WR_FAST_REG_MR; + ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey); + err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + num_sge = 0; + break; + + default: + break; + } + break; + + case IB_QPT_UC: + switch (wr->opcode) { + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + set_raddr_seg(seg, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); + seg += sizeof(struct mlx5_wqe_raddr_seg); + size += sizeof(struct mlx5_wqe_raddr_seg) / 16; + break; + + default: + break; + } + break; + + case IB_QPT_UD: + case IB_QPT_SMI: + case IB_QPT_GSI: + set_datagram_seg(seg, wr); + seg += sizeof(struct mlx5_wqe_datagram_seg); + size += sizeof(struct mlx5_wqe_datagram_seg) / 16; + if (unlikely((seg == qend))) + seg = mlx5_get_send_wqe(qp, 0); + break; + + case MLX5_IB_QPT_REG_UMR: + if (wr->opcode != MLX5_IB_WR_UMR) { + err = -EINVAL; + mlx5_ib_warn(dev, "bad opcode\n"); + goto out; + } + qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; + ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey); + set_reg_umr_segment(seg, wr); + seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); + size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; + if (unlikely((seg == qend))) + seg = mlx5_get_send_wqe(qp, 0); + set_reg_mkey_segment(seg, wr); + seg += sizeof(struct mlx5_mkey_seg); + size += sizeof(struct mlx5_mkey_seg) / 16; + if (unlikely((seg == qend))) + seg = mlx5_get_send_wqe(qp, 0); + break; + + default: + break; + } + + if (wr->send_flags & IB_SEND_INLINE && num_sge) { + int uninitialized_var(sz); + + err = set_data_inl_seg(qp, wr, seg, &sz); + if (unlikely(err)) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + inl = 1; + size += sz; + } else { + dpseg = seg; + for (i = 0; i < num_sge; i++) { + if (unlikely(dpseg == qend)) { + seg = mlx5_get_send_wqe(qp, 0); + dpseg = seg; + } + if (likely(wr->sg_list[i].length)) { + set_data_ptr_seg(dpseg, wr->sg_list + i); + size += sizeof(struct mlx5_wqe_data_seg) / 16; + dpseg++; + } + } + } + + mlx5_opcode = mlx5_ib_opcode[wr->opcode]; + ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | + mlx5_opcode | + ((u32)opmod << 24)); + ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8)); + ctrl->fm_ce_se |= get_fence(fence, wr); + qp->fm_cache = next_fence; + if (unlikely(qp->wq_sig)) + ctrl->signature = wq_sig(ctrl); + + qp->sq.wrid[idx] = wr->wr_id; + qp->sq.w_list[idx].opcode = mlx5_opcode; + qp->sq.wqe_head[idx] = qp->sq.head + nreq; + qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); + qp->sq.w_list[idx].next = qp->sq.cur_post; + + if (0) + dump_wqe(qp, idx, size); + } + +out: + if (likely(nreq)) { + qp->sq.head += nreq; + + /* Make sure that descriptors are written before + * updating doorbell record and ringing the doorbell + */ + wmb(); + + qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post); + + if (bf->need_lock) + spin_lock(&bf->lock); + + /* TBD enable WC */ + if (0 && nreq == 1 && bf->uuarn && inl && size > 1 && size <= bf->buf_size / 16) { + mlx5_bf_copy(bf->reg + bf->offset, (u64 *)ctrl, ALIGN(size * 16, 64), qp); + /* wc_wmb(); */ + } else { + mlx5_write64((__be32 *)ctrl, bf->regreg + bf->offset, + MLX5_GET_DOORBELL_LOCK(&bf->lock32)); + /* Make sure doorbells don't leak out of SQ spinlock + * and reach the HCA out of order. + */ + mmiowb(); + } + bf->offset ^= bf->buf_size; + if (bf->need_lock) + spin_unlock(&bf->lock); + } + + spin_unlock_irqrestore(&qp->sq.lock, flags); + + return err; +} + +static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size) +{ + sig->signature = calc_sig(sig, size); +} + +int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_wqe_data_seg *scat; + struct mlx5_rwqe_sig *sig; + unsigned long flags; + int err = 0; + int nreq; + int ind; + int i; + + spin_lock_irqsave(&qp->rq.lock, flags); + + ind = qp->rq.head & (qp->rq.wqe_cnt - 1); + + for (nreq = 0; wr; nreq++, wr = wr->next) { + if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + if (unlikely(wr->num_sge > qp->rq.max_gs)) { + err = -EINVAL; + *bad_wr = wr; + goto out; + } + + scat = get_recv_wqe(qp, ind); + if (qp->wq_sig) + scat++; + + for (i = 0; i < wr->num_sge; i++) + set_data_ptr_seg(scat + i, wr->sg_list + i); + + if (i < qp->rq.max_gs) { + scat[i].byte_count = 0; + scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY); + scat[i].addr = 0; + } + + if (qp->wq_sig) { + sig = (struct mlx5_rwqe_sig *)scat; + set_sig_seg(sig, (qp->rq.max_gs + 1) << 2); + } + + qp->rq.wrid[ind] = wr->wr_id; + + ind = (ind + 1) & (qp->rq.wqe_cnt - 1); + } + +out: + if (likely(nreq)) { + qp->rq.head += nreq; + + /* Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + + *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff); + } + + spin_unlock_irqrestore(&qp->rq.lock, flags); + + return err; +} + +static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state) +{ + switch (mlx5_state) { + case MLX5_QP_STATE_RST: return IB_QPS_RESET; + case MLX5_QP_STATE_INIT: return IB_QPS_INIT; + case MLX5_QP_STATE_RTR: return IB_QPS_RTR; + case MLX5_QP_STATE_RTS: return IB_QPS_RTS; + case MLX5_QP_STATE_SQ_DRAINING: + case MLX5_QP_STATE_SQD: return IB_QPS_SQD; + case MLX5_QP_STATE_SQER: return IB_QPS_SQE; + case MLX5_QP_STATE_ERR: return IB_QPS_ERR; + default: return -1; + } +} + +static inline enum ib_mig_state to_ib_mig_state(int mlx5_mig_state) +{ + switch (mlx5_mig_state) { + case MLX5_QP_PM_ARMED: return IB_MIG_ARMED; + case MLX5_QP_PM_REARM: return IB_MIG_REARM; + case MLX5_QP_PM_MIGRATED: return IB_MIG_MIGRATED; + default: return -1; + } +} + +static int to_ib_qp_access_flags(int mlx5_flags) +{ + int ib_flags = 0; + + if (mlx5_flags & MLX5_QP_BIT_RRE) + ib_flags |= IB_ACCESS_REMOTE_READ; + if (mlx5_flags & MLX5_QP_BIT_RWE) + ib_flags |= IB_ACCESS_REMOTE_WRITE; + if (mlx5_flags & MLX5_QP_BIT_RAE) + ib_flags |= IB_ACCESS_REMOTE_ATOMIC; + + return ib_flags; +} + +static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr, + struct mlx5_qp_path *path) +{ + struct mlx5_core_dev *dev = &ibdev->mdev; + + memset(ib_ah_attr, 0, sizeof(*ib_ah_attr)); + ib_ah_attr->port_num = path->port; + + if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports) + return; + + ib_ah_attr->sl = path->sl & 0xf; + + ib_ah_attr->dlid = be16_to_cpu(path->rlid); + ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f; + ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0; + ib_ah_attr->ah_flags = (path->grh_mlid & (1 << 7)) ? IB_AH_GRH : 0; + if (ib_ah_attr->ah_flags) { + ib_ah_attr->grh.sgid_index = path->mgid_index; + ib_ah_attr->grh.hop_limit = path->hop_limit; + ib_ah_attr->grh.traffic_class = + (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff; + ib_ah_attr->grh.flow_label = + be32_to_cpu(path->tclass_flowlabel) & 0xfffff; + memcpy(ib_ah_attr->grh.dgid.raw, + path->rgid, sizeof(ib_ah_attr->grh.dgid.raw)); + } +} + +int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr) +{ + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_query_qp_mbox_out *outb; + struct mlx5_qp_context *context; + int mlx5_state; + int err = 0; + + mutex_lock(&qp->mutex); + outb = kzalloc(sizeof(*outb), GFP_KERNEL); + if (!outb) { + err = -ENOMEM; + goto out; + } + context = &outb->ctx; + err = mlx5_core_qp_query(&dev->mdev, &qp->mqp, outb, sizeof(*outb)); + if (err) + goto out_free; + + mlx5_state = be32_to_cpu(context->flags) >> 28; + + qp->state = to_ib_qp_state(mlx5_state); + qp_attr->qp_state = qp->state; + qp_attr->path_mtu = context->mtu_msgmax >> 5; + qp_attr->path_mig_state = + to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3); + qp_attr->qkey = be32_to_cpu(context->qkey); + qp_attr->rq_psn = be32_to_cpu(context->rnr_nextrecvpsn) & 0xffffff; + qp_attr->sq_psn = be32_to_cpu(context->next_send_psn) & 0xffffff; + qp_attr->dest_qp_num = be32_to_cpu(context->log_pg_sz_remote_qpn) & 0xffffff; + qp_attr->qp_access_flags = + to_ib_qp_access_flags(be32_to_cpu(context->params2)); + + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { + to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path); + to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path); + qp_attr->alt_pkey_index = context->alt_path.pkey_index & 0x7f; + qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; + } + + qp_attr->pkey_index = context->pri_path.pkey_index & 0x7f; + qp_attr->port_num = context->pri_path.port; + + /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ + qp_attr->sq_draining = mlx5_state == MLX5_QP_STATE_SQ_DRAINING; + + qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context->params1) >> 21) & 0x7); + + qp_attr->max_dest_rd_atomic = + 1 << ((be32_to_cpu(context->params2) >> 21) & 0x7); + qp_attr->min_rnr_timer = + (be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f; + qp_attr->timeout = context->pri_path.ackto_lt >> 3; + qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7; + qp_attr->rnr_retry = (be32_to_cpu(context->params1) >> 13) & 0x7; + qp_attr->alt_timeout = context->alt_path.ackto_lt >> 3; + qp_attr->cur_qp_state = qp_attr->qp_state; + qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt; + qp_attr->cap.max_recv_sge = qp->rq.max_gs; + + if (!ibqp->uobject) { + qp_attr->cap.max_send_wr = qp->sq.wqe_cnt; + qp_attr->cap.max_send_sge = qp->sq.max_gs; + } else { + qp_attr->cap.max_send_wr = 0; + qp_attr->cap.max_send_sge = 0; + } + + /* We don't support inline sends for kernel QPs (yet), and we + * don't know what userspace's value should be. + */ + qp_attr->cap.max_inline_data = 0; + + qp_init_attr->cap = qp_attr->cap; + + qp_init_attr->create_flags = 0; + if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK) + qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; + + qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ? + IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; + +out_free: + kfree(outb); + +out: + mutex_unlock(&qp->mutex); + return err; +} + +struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_ib_xrcd *xrcd; + int err; + + if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_XRC)) + return ERR_PTR(-ENOSYS); + + xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL); + if (!xrcd) + return ERR_PTR(-ENOMEM); + + err = mlx5_core_xrcd_alloc(&dev->mdev, &xrcd->xrcdn); + if (err) { + kfree(xrcd); + return ERR_PTR(-ENOMEM); + } + + return &xrcd->ibxrcd; +} + +int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd) +{ + struct mlx5_ib_dev *dev = to_mdev(xrcd->device); + u32 xrcdn = to_mxrcd(xrcd)->xrcdn; + int err; + + err = mlx5_core_xrcd_dealloc(&dev->mdev, xrcdn); + if (err) { + mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn); + return err; + } + + kfree(xrcd); + + return 0; +} diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c new file mode 100644 index 000000000000..84d297afd6a9 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -0,0 +1,473 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "mlx5_ib.h" +#include "user.h" + +/* not supported currently */ +static int srq_signature; + +static void *get_wqe(struct mlx5_ib_srq *srq, int n) +{ + return mlx5_buf_offset(&srq->buf, n << srq->msrq.wqe_shift); +} + +static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type) +{ + struct ib_event event; + struct ib_srq *ibsrq = &to_mibsrq(srq)->ibsrq; + + if (ibsrq->event_handler) { + event.device = ibsrq->device; + event.element.srq = ibsrq; + switch (type) { + case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: + event.event = IB_EVENT_SRQ_LIMIT_REACHED; + break; + case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: + event.event = IB_EVENT_SRQ_ERR; + break; + default: + pr_warn("mlx5_ib: Unexpected event type %d on SRQ %06x\n", + type, srq->srqn); + return; + } + + ibsrq->event_handler(&event, ibsrq->srq_context); + } +} + +static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, + struct mlx5_create_srq_mbox_in **in, + struct ib_udata *udata, int buf_size, int *inlen) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_ib_create_srq ucmd; + int err; + int npages; + int page_shift; + int ncont; + u32 offset; + + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { + mlx5_ib_dbg(dev, "failed copy udata\n"); + return -EFAULT; + } + srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE); + + srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size, + 0, 0); + if (IS_ERR(srq->umem)) { + mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size); + err = PTR_ERR(srq->umem); + return err; + } + + mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, &npages, + &page_shift, &ncont, NULL); + err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, + &offset); + if (err) { + mlx5_ib_warn(dev, "bad offset\n"); + goto err_umem; + } + + *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont; + *in = mlx5_vzalloc(*inlen); + if (!(*in)) { + err = -ENOMEM; + goto err_umem; + } + + mlx5_ib_populate_pas(dev, srq->umem, page_shift, (*in)->pas, 0); + + err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context), + ucmd.db_addr, &srq->db); + if (err) { + mlx5_ib_dbg(dev, "map doorbell failed\n"); + goto err_in; + } + + (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT; + (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26); + + return 0; + +err_in: + mlx5_vfree(*in); + +err_umem: + ib_umem_release(srq->umem); + + return err; +} + +static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, + struct mlx5_create_srq_mbox_in **in, int buf_size, + int *inlen) +{ + int err; + int i; + struct mlx5_wqe_srq_next_seg *next; + int page_shift; + int npages; + + err = mlx5_db_alloc(&dev->mdev, &srq->db); + if (err) { + mlx5_ib_warn(dev, "alloc dbell rec failed\n"); + return err; + } + + *srq->db.db = 0; + + if (mlx5_buf_alloc(&dev->mdev, buf_size, PAGE_SIZE * 2, &srq->buf)) { + mlx5_ib_dbg(dev, "buf alloc failed\n"); + err = -ENOMEM; + goto err_db; + } + page_shift = srq->buf.page_shift; + + srq->head = 0; + srq->tail = srq->msrq.max - 1; + srq->wqe_ctr = 0; + + for (i = 0; i < srq->msrq.max; i++) { + next = get_wqe(srq, i); + next->next_wqe_index = + cpu_to_be16((i + 1) & (srq->msrq.max - 1)); + } + + npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT)); + mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n", + buf_size, page_shift, srq->buf.npages, npages); + *inlen = sizeof(**in) + sizeof(*(*in)->pas) * npages; + *in = mlx5_vzalloc(*inlen); + if (!*in) { + err = -ENOMEM; + goto err_buf; + } + mlx5_fill_page_array(&srq->buf, (*in)->pas); + + srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL); + if (!srq->wrid) { + mlx5_ib_dbg(dev, "kmalloc failed %lu\n", + (unsigned long)(srq->msrq.max * sizeof(u64))); + err = -ENOMEM; + goto err_in; + } + srq->wq_sig = !!srq_signature; + + (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT; + + return 0; + +err_in: + mlx5_vfree(*in); + +err_buf: + mlx5_buf_free(&dev->mdev, &srq->buf); + +err_db: + mlx5_db_free(&dev->mdev, &srq->db); + return err; +} + +static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq) +{ + mlx5_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db); + ib_umem_release(srq->umem); +} + + +static void destroy_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq) +{ + kfree(srq->wrid); + mlx5_buf_free(&dev->mdev, &srq->buf); + mlx5_db_free(&dev->mdev, &srq->db); +} + +struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_ib_srq *srq; + int desc_size; + int buf_size; + int err; + struct mlx5_create_srq_mbox_in *uninitialized_var(in); + int uninitialized_var(inlen); + int is_xrc; + u32 flgs, xrcdn; + + /* Sanity check SRQ size before proceeding */ + if (init_attr->attr.max_wr >= dev->mdev.caps.max_srq_wqes) { + mlx5_ib_dbg(dev, "max_wr %d, cap %d\n", + init_attr->attr.max_wr, + dev->mdev.caps.max_srq_wqes); + return ERR_PTR(-EINVAL); + } + + srq = kmalloc(sizeof(*srq), GFP_KERNEL); + if (!srq) + return ERR_PTR(-ENOMEM); + + mutex_init(&srq->mutex); + spin_lock_init(&srq->lock); + srq->msrq.max = roundup_pow_of_two(init_attr->attr.max_wr + 1); + srq->msrq.max_gs = init_attr->attr.max_sge; + + desc_size = sizeof(struct mlx5_wqe_srq_next_seg) + + srq->msrq.max_gs * sizeof(struct mlx5_wqe_data_seg); + desc_size = roundup_pow_of_two(desc_size); + desc_size = max_t(int, 32, desc_size); + srq->msrq.max_avail_gather = (desc_size - sizeof(struct mlx5_wqe_srq_next_seg)) / + sizeof(struct mlx5_wqe_data_seg); + srq->msrq.wqe_shift = ilog2(desc_size); + buf_size = srq->msrq.max * desc_size; + mlx5_ib_dbg(dev, "desc_size 0x%x, req wr 0x%x, srq size 0x%x, max_gs 0x%x, max_avail_gather 0x%x\n", + desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs, + srq->msrq.max_avail_gather); + + if (pd->uobject) + err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen); + else + err = create_srq_kernel(dev, srq, &in, buf_size, &inlen); + + if (err) { + mlx5_ib_warn(dev, "create srq %s failed, err %d\n", + pd->uobject ? "user" : "kernel", err); + goto err_srq; + } + + is_xrc = (init_attr->srq_type == IB_SRQT_XRC); + in->ctx.state_log_sz = ilog2(srq->msrq.max); + flgs = ((srq->msrq.wqe_shift - 4) | (is_xrc << 5) | (srq->wq_sig << 7)) << 24; + xrcdn = 0; + if (is_xrc) { + xrcdn = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn; + in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(init_attr->ext.xrc.cq)->mcq.cqn); + } else if (init_attr->srq_type == IB_SRQT_BASIC) { + xrcdn = to_mxrcd(dev->devr.x0)->xrcdn; + in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(dev->devr.c0)->mcq.cqn); + } + + in->ctx.flags_xrcd = cpu_to_be32((flgs & 0xFF000000) | (xrcdn & 0xFFFFFF)); + + in->ctx.pd = cpu_to_be32(to_mpd(pd)->pdn); + in->ctx.db_record = cpu_to_be64(srq->db.dma); + err = mlx5_core_create_srq(&dev->mdev, &srq->msrq, in, inlen); + mlx5_vfree(in); + if (err) { + mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err); + goto err_srq; + } + + mlx5_ib_dbg(dev, "create SRQ with srqn 0x%x\n", srq->msrq.srqn); + + srq->msrq.event = mlx5_ib_srq_event; + srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn; + + if (pd->uobject) + if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof(__u32))) { + mlx5_ib_dbg(dev, "copy to user failed\n"); + err = -EFAULT; + goto err_core; + } + + init_attr->attr.max_wr = srq->msrq.max - 1; + + return &srq->ibsrq; + +err_core: + mlx5_core_destroy_srq(&dev->mdev, &srq->msrq); + if (pd->uobject) + destroy_srq_user(pd, srq); + else + destroy_srq_kernel(dev, srq); + +err_srq: + kfree(srq); + + return ERR_PTR(err); +} + +int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(ibsrq->device); + struct mlx5_ib_srq *srq = to_msrq(ibsrq); + int ret; + + /* We don't support resizing SRQs yet */ + if (attr_mask & IB_SRQ_MAX_WR) + return -EINVAL; + + if (attr_mask & IB_SRQ_LIMIT) { + if (attr->srq_limit >= srq->msrq.max) + return -EINVAL; + + mutex_lock(&srq->mutex); + ret = mlx5_core_arm_srq(&dev->mdev, &srq->msrq, attr->srq_limit, 1); + mutex_unlock(&srq->mutex); + + if (ret) + return ret; + } + + return 0; +} + +int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) +{ + struct mlx5_ib_dev *dev = to_mdev(ibsrq->device); + struct mlx5_ib_srq *srq = to_msrq(ibsrq); + int ret; + struct mlx5_query_srq_mbox_out *out; + + out = kzalloc(sizeof(*out), GFP_KERNEL); + if (!out) + return -ENOMEM; + + ret = mlx5_core_query_srq(&dev->mdev, &srq->msrq, out); + if (ret) + goto out_box; + + srq_attr->srq_limit = be16_to_cpu(out->ctx.lwm); + srq_attr->max_wr = srq->msrq.max - 1; + srq_attr->max_sge = srq->msrq.max_gs; + +out_box: + kfree(out); + return ret; +} + +int mlx5_ib_destroy_srq(struct ib_srq *srq) +{ + struct mlx5_ib_dev *dev = to_mdev(srq->device); + struct mlx5_ib_srq *msrq = to_msrq(srq); + + mlx5_core_destroy_srq(&dev->mdev, &msrq->msrq); + + if (srq->uobject) { + mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db); + ib_umem_release(msrq->umem); + } else { + kfree(msrq->wrid); + mlx5_buf_free(&dev->mdev, &msrq->buf); + mlx5_db_free(&dev->mdev, &msrq->db); + } + + kfree(srq); + return 0; +} + +void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index) +{ + struct mlx5_wqe_srq_next_seg *next; + + /* always called with interrupts disabled. */ + spin_lock(&srq->lock); + + next = get_wqe(srq, srq->tail); + next->next_wqe_index = cpu_to_be16(wqe_index); + srq->tail = wqe_index; + + spin_unlock(&srq->lock); +} + +int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct mlx5_ib_srq *srq = to_msrq(ibsrq); + struct mlx5_wqe_srq_next_seg *next; + struct mlx5_wqe_data_seg *scat; + unsigned long flags; + int err = 0; + int nreq; + int i; + + spin_lock_irqsave(&srq->lock, flags); + + for (nreq = 0; wr; nreq++, wr = wr->next) { + if (unlikely(wr->num_sge > srq->msrq.max_gs)) { + err = -EINVAL; + *bad_wr = wr; + break; + } + + if (unlikely(srq->head == srq->tail)) { + err = -ENOMEM; + *bad_wr = wr; + break; + } + + srq->wrid[srq->head] = wr->wr_id; + + next = get_wqe(srq, srq->head); + srq->head = be16_to_cpu(next->next_wqe_index); + scat = (struct mlx5_wqe_data_seg *)(next + 1); + + for (i = 0; i < wr->num_sge; i++) { + scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length); + scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey); + scat[i].addr = cpu_to_be64(wr->sg_list[i].addr); + } + + if (i < srq->msrq.max_avail_gather) { + scat[i].byte_count = 0; + scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY); + scat[i].addr = 0; + } + } + + if (likely(nreq)) { + srq->wqe_ctr += nreq; + + /* Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + + *srq->db.db = cpu_to_be32(srq->wqe_ctr); + } + + spin_unlock_irqrestore(&srq->lock, flags); + + return err; +} diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h new file mode 100644 index 000000000000..a886de3e593c --- /dev/null +++ b/drivers/infiniband/hw/mlx5/user.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_IB_USER_H +#define MLX5_IB_USER_H + +#include + +enum { + MLX5_QP_FLAG_SIGNATURE = 1 << 0, + MLX5_QP_FLAG_SCATTER_CQE = 1 << 1, +}; + +enum { + MLX5_SRQ_FLAG_SIGNATURE = 1 << 0, +}; + + +/* Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define MLX5_IB_UVERBS_ABI_VERSION 1 + +/* Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * In particular do not use pointer types -- pass pointers in __u64 + * instead. + */ + +struct mlx5_ib_alloc_ucontext_req { + __u32 total_num_uuars; + __u32 num_low_latency_uuars; +}; + +struct mlx5_ib_alloc_ucontext_resp { + __u32 qp_tab_size; + __u32 bf_reg_size; + __u32 tot_uuars; + __u32 cache_line_size; + __u16 max_sq_desc_sz; + __u16 max_rq_desc_sz; + __u32 max_send_wqebb; + __u32 max_recv_wr; + __u32 max_srq_recv_wr; + __u16 num_ports; + __u16 reserved; +}; + +struct mlx5_ib_alloc_pd_resp { + __u32 pdn; +}; + +struct mlx5_ib_create_cq { + __u64 buf_addr; + __u64 db_addr; + __u32 cqe_size; +}; + +struct mlx5_ib_create_cq_resp { + __u32 cqn; + __u32 reserved; +}; + +struct mlx5_ib_resize_cq { + __u64 buf_addr; +}; + +struct mlx5_ib_create_srq { + __u64 buf_addr; + __u64 db_addr; + __u32 flags; +}; + +struct mlx5_ib_create_srq_resp { + __u32 srqn; + __u32 reserved; +}; + +struct mlx5_ib_create_qp { + __u64 buf_addr; + __u64 db_addr; + __u32 sq_wqe_count; + __u32 rq_wqe_count; + __u32 rq_wqe_shift; + __u32 flags; +}; + +struct mlx5_ib_create_qp_resp { + __u32 uuar_index; +}; +#endif /* MLX5_IB_USER_H */ diff --git a/drivers/net/ethernet/mellanox/Kconfig b/drivers/net/ethernet/mellanox/Kconfig index bcdbc14aeff0..8cf7563a8d92 100644 --- a/drivers/net/ethernet/mellanox/Kconfig +++ b/drivers/net/ethernet/mellanox/Kconfig @@ -19,5 +19,6 @@ config NET_VENDOR_MELLANOX if NET_VENDOR_MELLANOX source "drivers/net/ethernet/mellanox/mlx4/Kconfig" +source "drivers/net/ethernet/mellanox/mlx5/core/Kconfig" endif # NET_VENDOR_MELLANOX diff --git a/drivers/net/ethernet/mellanox/Makefile b/drivers/net/ethernet/mellanox/Makefile index 37afb9683372..38fe32ef5e5f 100644 --- a/drivers/net/ethernet/mellanox/Makefile +++ b/drivers/net/ethernet/mellanox/Makefile @@ -3,3 +3,4 @@ # obj-$(CONFIG_MLX4_CORE) += mlx4/ +obj-$(CONFIG_MLX5_CORE) += mlx5/core/ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig new file mode 100644 index 000000000000..21962828925a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -0,0 +1,18 @@ +# +# Mellanox driver configuration +# + +config MLX5_CORE + tristate + depends on PCI && X86 + default n + +config MLX5_DEBUG + bool "Verbose debugging output" if (MLX5_CORE && EXPERT) + depends on MLX5_CORE + default y + ---help--- + This option causes debugging code to be compiled into the + mlx5_core driver. The output can be turned on via the + debug_mask module parameter (which can also be set after + the driver is loaded through sysfs). diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile new file mode 100644 index 000000000000..105780bb980b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -0,0 +1,5 @@ +obj-$(CONFIG_MLX5_CORE) += mlx5_core.o + +mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ + health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ + mad.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c new file mode 100644 index 000000000000..b215742b842f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -0,0 +1,238 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mlx5_core.h" + +/* Handling for queue buffers -- we allocate a bunch of memory and + * register it in a memory region at HCA virtual address 0. If the + * requested size is > max_direct, we split the allocation into + * multiple pages, so we don't require too much contiguous memory. + */ + +int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, int max_direct, + struct mlx5_buf *buf) +{ + dma_addr_t t; + + buf->size = size; + if (size <= max_direct) { + buf->nbufs = 1; + buf->npages = 1; + buf->page_shift = get_order(size) + PAGE_SHIFT; + buf->direct.buf = dma_zalloc_coherent(&dev->pdev->dev, + size, &t, GFP_KERNEL); + if (!buf->direct.buf) + return -ENOMEM; + + buf->direct.map = t; + + while (t & ((1 << buf->page_shift) - 1)) { + --buf->page_shift; + buf->npages *= 2; + } + } else { + int i; + + buf->direct.buf = NULL; + buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE; + buf->npages = buf->nbufs; + buf->page_shift = PAGE_SHIFT; + buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list), + GFP_KERNEL); + if (!buf->page_list) + return -ENOMEM; + + for (i = 0; i < buf->nbufs; i++) { + buf->page_list[i].buf = + dma_zalloc_coherent(&dev->pdev->dev, PAGE_SIZE, + &t, GFP_KERNEL); + if (!buf->page_list[i].buf) + goto err_free; + + buf->page_list[i].map = t; + } + + if (BITS_PER_LONG == 64) { + struct page **pages; + pages = kmalloc(sizeof(*pages) * buf->nbufs, GFP_KERNEL); + if (!pages) + goto err_free; + for (i = 0; i < buf->nbufs; i++) + pages[i] = virt_to_page(buf->page_list[i].buf); + buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL); + kfree(pages); + if (!buf->direct.buf) + goto err_free; + } + } + + return 0; + +err_free: + mlx5_buf_free(dev, buf); + + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(mlx5_buf_alloc); + +void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf) +{ + int i; + + if (buf->nbufs == 1) + dma_free_coherent(&dev->pdev->dev, buf->size, buf->direct.buf, + buf->direct.map); + else { + if (BITS_PER_LONG == 64 && buf->direct.buf) + vunmap(buf->direct.buf); + + for (i = 0; i < buf->nbufs; i++) + if (buf->page_list[i].buf) + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + buf->page_list[i].buf, + buf->page_list[i].map); + kfree(buf->page_list); + } +} +EXPORT_SYMBOL_GPL(mlx5_buf_free); + +static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct device *dma_device) +{ + struct mlx5_db_pgdir *pgdir; + + pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL); + if (!pgdir) + return NULL; + + bitmap_fill(pgdir->bitmap, MLX5_DB_PER_PAGE); + pgdir->db_page = dma_alloc_coherent(dma_device, PAGE_SIZE, + &pgdir->db_dma, GFP_KERNEL); + if (!pgdir->db_page) { + kfree(pgdir); + return NULL; + } + + return pgdir; +} + +static int mlx5_alloc_db_from_pgdir(struct mlx5_db_pgdir *pgdir, + struct mlx5_db *db) +{ + int offset; + int i; + + i = find_first_bit(pgdir->bitmap, MLX5_DB_PER_PAGE); + if (i >= MLX5_DB_PER_PAGE) + return -ENOMEM; + + __clear_bit(i, pgdir->bitmap); + + db->u.pgdir = pgdir; + db->index = i; + offset = db->index * L1_CACHE_BYTES; + db->db = pgdir->db_page + offset / sizeof(*pgdir->db_page); + db->dma = pgdir->db_dma + offset; + + return 0; +} + +int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db) +{ + struct mlx5_db_pgdir *pgdir; + int ret = 0; + + mutex_lock(&dev->priv.pgdir_mutex); + + list_for_each_entry(pgdir, &dev->priv.pgdir_list, list) + if (!mlx5_alloc_db_from_pgdir(pgdir, db)) + goto out; + + pgdir = mlx5_alloc_db_pgdir(&(dev->pdev->dev)); + if (!pgdir) { + ret = -ENOMEM; + goto out; + } + + list_add(&pgdir->list, &dev->priv.pgdir_list); + + /* This should never fail -- we just allocated an empty page: */ + WARN_ON(mlx5_alloc_db_from_pgdir(pgdir, db)); + +out: + mutex_unlock(&dev->priv.pgdir_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(mlx5_db_alloc); + +void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db) +{ + mutex_lock(&dev->priv.pgdir_mutex); + + __set_bit(db->index, db->u.pgdir->bitmap); + + if (bitmap_full(db->u.pgdir->bitmap, MLX5_DB_PER_PAGE)) { + dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, + db->u.pgdir->db_page, db->u.pgdir->db_dma); + list_del(&db->u.pgdir->list); + kfree(db->u.pgdir); + } + + mutex_unlock(&dev->priv.pgdir_mutex); +} +EXPORT_SYMBOL_GPL(mlx5_db_free); + + +void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas) +{ + u64 addr; + int i; + + for (i = 0; i < buf->npages; i++) { + if (buf->nbufs == 1) + addr = buf->direct.map + (i << buf->page_shift); + else + addr = buf->page_list[i].map; + + pas[i] = cpu_to_be64(addr); + } +} +EXPORT_SYMBOL_GPL(mlx5_fill_page_array); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c new file mode 100644 index 000000000000..c1c0eef89694 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -0,0 +1,1515 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mlx5_core.h" + +enum { + CMD_IF_REV = 3, +}; + +enum { + CMD_MODE_POLLING, + CMD_MODE_EVENTS +}; + +enum { + NUM_LONG_LISTS = 2, + NUM_MED_LISTS = 64, + LONG_LIST_SIZE = (2ULL * 1024 * 1024 * 1024 / PAGE_SIZE) * 8 + 16 + + MLX5_CMD_DATA_BLOCK_SIZE, + MED_LIST_SIZE = 16 + MLX5_CMD_DATA_BLOCK_SIZE, +}; + +enum { + MLX5_CMD_DELIVERY_STAT_OK = 0x0, + MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR = 0x1, + MLX5_CMD_DELIVERY_STAT_TOK_ERR = 0x2, + MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR = 0x3, + MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR = 0x4, + MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR = 0x5, + MLX5_CMD_DELIVERY_STAT_FW_ERR = 0x6, + MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR = 0x7, + MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR = 0x8, + MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR = 0x9, + MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR = 0x10, +}; + +enum { + MLX5_CMD_STAT_OK = 0x0, + MLX5_CMD_STAT_INT_ERR = 0x1, + MLX5_CMD_STAT_BAD_OP_ERR = 0x2, + MLX5_CMD_STAT_BAD_PARAM_ERR = 0x3, + MLX5_CMD_STAT_BAD_SYS_STATE_ERR = 0x4, + MLX5_CMD_STAT_BAD_RES_ERR = 0x5, + MLX5_CMD_STAT_RES_BUSY = 0x6, + MLX5_CMD_STAT_LIM_ERR = 0x8, + MLX5_CMD_STAT_BAD_RES_STATE_ERR = 0x9, + MLX5_CMD_STAT_IX_ERR = 0xa, + MLX5_CMD_STAT_NO_RES_ERR = 0xf, + MLX5_CMD_STAT_BAD_INP_LEN_ERR = 0x50, + MLX5_CMD_STAT_BAD_OUTP_LEN_ERR = 0x51, + MLX5_CMD_STAT_BAD_QP_STATE_ERR = 0x10, + MLX5_CMD_STAT_BAD_PKT_ERR = 0x30, + MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR = 0x40, +}; + +static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd, + struct mlx5_cmd_msg *in, + struct mlx5_cmd_msg *out, + mlx5_cmd_cbk_t cbk, + void *context, int page_queue) +{ + gfp_t alloc_flags = cbk ? GFP_ATOMIC : GFP_KERNEL; + struct mlx5_cmd_work_ent *ent; + + ent = kzalloc(sizeof(*ent), alloc_flags); + if (!ent) + return ERR_PTR(-ENOMEM); + + ent->in = in; + ent->out = out; + ent->callback = cbk; + ent->context = context; + ent->cmd = cmd; + ent->page_queue = page_queue; + + return ent; +} + +static u8 alloc_token(struct mlx5_cmd *cmd) +{ + u8 token; + + spin_lock(&cmd->token_lock); + token = cmd->token++ % 255 + 1; + spin_unlock(&cmd->token_lock); + + return token; +} + +static int alloc_ent(struct mlx5_cmd *cmd) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&cmd->alloc_lock, flags); + ret = find_first_bit(&cmd->bitmask, cmd->max_reg_cmds); + if (ret < cmd->max_reg_cmds) + clear_bit(ret, &cmd->bitmask); + spin_unlock_irqrestore(&cmd->alloc_lock, flags); + + return ret < cmd->max_reg_cmds ? ret : -ENOMEM; +} + +static void free_ent(struct mlx5_cmd *cmd, int idx) +{ + unsigned long flags; + + spin_lock_irqsave(&cmd->alloc_lock, flags); + set_bit(idx, &cmd->bitmask); + spin_unlock_irqrestore(&cmd->alloc_lock, flags); +} + +static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx) +{ + return cmd->cmd_buf + (idx << cmd->log_stride); +} + +static u8 xor8_buf(void *buf, int len) +{ + u8 *ptr = buf; + u8 sum = 0; + int i; + + for (i = 0; i < len; i++) + sum ^= ptr[i]; + + return sum; +} + +static int verify_block_sig(struct mlx5_cmd_prot_block *block) +{ + if (xor8_buf(block->rsvd0, sizeof(*block) - sizeof(block->data) - 1) != 0xff) + return -EINVAL; + + if (xor8_buf(block, sizeof(*block)) != 0xff) + return -EINVAL; + + return 0; +} + +static void calc_block_sig(struct mlx5_cmd_prot_block *block, u8 token) +{ + block->token = token; + block->ctrl_sig = ~xor8_buf(block->rsvd0, sizeof(*block) - sizeof(block->data) - 2); + block->sig = ~xor8_buf(block, sizeof(*block) - 1); +} + +static void calc_chain_sig(struct mlx5_cmd_msg *msg, u8 token) +{ + struct mlx5_cmd_mailbox *next = msg->next; + + while (next) { + calc_block_sig(next->buf, token); + next = next->next; + } +} + +static void set_signature(struct mlx5_cmd_work_ent *ent) +{ + ent->lay->sig = ~xor8_buf(ent->lay, sizeof(*ent->lay)); + calc_chain_sig(ent->in, ent->token); + calc_chain_sig(ent->out, ent->token); +} + +static void poll_timeout(struct mlx5_cmd_work_ent *ent) +{ + unsigned long poll_end = jiffies + msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC + 1000); + u8 own; + + do { + own = ent->lay->status_own; + if (!(own & CMD_OWNER_HW)) { + ent->ret = 0; + return; + } + usleep_range(5000, 10000); + } while (time_before(jiffies, poll_end)); + + ent->ret = -ETIMEDOUT; +} + +static void free_cmd(struct mlx5_cmd_work_ent *ent) +{ + kfree(ent); +} + + +static int verify_signature(struct mlx5_cmd_work_ent *ent) +{ + struct mlx5_cmd_mailbox *next = ent->out->next; + int err; + u8 sig; + + sig = xor8_buf(ent->lay, sizeof(*ent->lay)); + if (sig != 0xff) + return -EINVAL; + + while (next) { + err = verify_block_sig(next->buf); + if (err) + return err; + + next = next->next; + } + + return 0; +} + +static void dump_buf(void *buf, int size, int data_only, int offset) +{ + __be32 *p = buf; + int i; + + for (i = 0; i < size; i += 16) { + pr_debug("%03x: %08x %08x %08x %08x\n", offset, be32_to_cpu(p[0]), + be32_to_cpu(p[1]), be32_to_cpu(p[2]), + be32_to_cpu(p[3])); + p += 4; + offset += 16; + } + if (!data_only) + pr_debug("\n"); +} + +const char *mlx5_command_str(int command) +{ + switch (command) { + case MLX5_CMD_OP_QUERY_HCA_CAP: + return "QUERY_HCA_CAP"; + + case MLX5_CMD_OP_SET_HCA_CAP: + return "SET_HCA_CAP"; + + case MLX5_CMD_OP_QUERY_ADAPTER: + return "QUERY_ADAPTER"; + + case MLX5_CMD_OP_INIT_HCA: + return "INIT_HCA"; + + case MLX5_CMD_OP_TEARDOWN_HCA: + return "TEARDOWN_HCA"; + + case MLX5_CMD_OP_QUERY_PAGES: + return "QUERY_PAGES"; + + case MLX5_CMD_OP_MANAGE_PAGES: + return "MANAGE_PAGES"; + + case MLX5_CMD_OP_CREATE_MKEY: + return "CREATE_MKEY"; + + case MLX5_CMD_OP_QUERY_MKEY: + return "QUERY_MKEY"; + + case MLX5_CMD_OP_DESTROY_MKEY: + return "DESTROY_MKEY"; + + case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS: + return "QUERY_SPECIAL_CONTEXTS"; + + case MLX5_CMD_OP_CREATE_EQ: + return "CREATE_EQ"; + + case MLX5_CMD_OP_DESTROY_EQ: + return "DESTROY_EQ"; + + case MLX5_CMD_OP_QUERY_EQ: + return "QUERY_EQ"; + + case MLX5_CMD_OP_CREATE_CQ: + return "CREATE_CQ"; + + case MLX5_CMD_OP_DESTROY_CQ: + return "DESTROY_CQ"; + + case MLX5_CMD_OP_QUERY_CQ: + return "QUERY_CQ"; + + case MLX5_CMD_OP_MODIFY_CQ: + return "MODIFY_CQ"; + + case MLX5_CMD_OP_CREATE_QP: + return "CREATE_QP"; + + case MLX5_CMD_OP_DESTROY_QP: + return "DESTROY_QP"; + + case MLX5_CMD_OP_RST2INIT_QP: + return "RST2INIT_QP"; + + case MLX5_CMD_OP_INIT2RTR_QP: + return "INIT2RTR_QP"; + + case MLX5_CMD_OP_RTR2RTS_QP: + return "RTR2RTS_QP"; + + case MLX5_CMD_OP_RTS2RTS_QP: + return "RTS2RTS_QP"; + + case MLX5_CMD_OP_SQERR2RTS_QP: + return "SQERR2RTS_QP"; + + case MLX5_CMD_OP_2ERR_QP: + return "2ERR_QP"; + + case MLX5_CMD_OP_RTS2SQD_QP: + return "RTS2SQD_QP"; + + case MLX5_CMD_OP_SQD2RTS_QP: + return "SQD2RTS_QP"; + + case MLX5_CMD_OP_2RST_QP: + return "2RST_QP"; + + case MLX5_CMD_OP_QUERY_QP: + return "QUERY_QP"; + + case MLX5_CMD_OP_CONF_SQP: + return "CONF_SQP"; + + case MLX5_CMD_OP_MAD_IFC: + return "MAD_IFC"; + + case MLX5_CMD_OP_INIT2INIT_QP: + return "INIT2INIT_QP"; + + case MLX5_CMD_OP_SUSPEND_QP: + return "SUSPEND_QP"; + + case MLX5_CMD_OP_UNSUSPEND_QP: + return "UNSUSPEND_QP"; + + case MLX5_CMD_OP_SQD2SQD_QP: + return "SQD2SQD_QP"; + + case MLX5_CMD_OP_ALLOC_QP_COUNTER_SET: + return "ALLOC_QP_COUNTER_SET"; + + case MLX5_CMD_OP_DEALLOC_QP_COUNTER_SET: + return "DEALLOC_QP_COUNTER_SET"; + + case MLX5_CMD_OP_QUERY_QP_COUNTER_SET: + return "QUERY_QP_COUNTER_SET"; + + case MLX5_CMD_OP_CREATE_PSV: + return "CREATE_PSV"; + + case MLX5_CMD_OP_DESTROY_PSV: + return "DESTROY_PSV"; + + case MLX5_CMD_OP_QUERY_PSV: + return "QUERY_PSV"; + + case MLX5_CMD_OP_QUERY_SIG_RULE_TABLE: + return "QUERY_SIG_RULE_TABLE"; + + case MLX5_CMD_OP_QUERY_BLOCK_SIZE_TABLE: + return "QUERY_BLOCK_SIZE_TABLE"; + + case MLX5_CMD_OP_CREATE_SRQ: + return "CREATE_SRQ"; + + case MLX5_CMD_OP_DESTROY_SRQ: + return "DESTROY_SRQ"; + + case MLX5_CMD_OP_QUERY_SRQ: + return "QUERY_SRQ"; + + case MLX5_CMD_OP_ARM_RQ: + return "ARM_RQ"; + + case MLX5_CMD_OP_RESIZE_SRQ: + return "RESIZE_SRQ"; + + case MLX5_CMD_OP_ALLOC_PD: + return "ALLOC_PD"; + + case MLX5_CMD_OP_DEALLOC_PD: + return "DEALLOC_PD"; + + case MLX5_CMD_OP_ALLOC_UAR: + return "ALLOC_UAR"; + + case MLX5_CMD_OP_DEALLOC_UAR: + return "DEALLOC_UAR"; + + case MLX5_CMD_OP_ATTACH_TO_MCG: + return "ATTACH_TO_MCG"; + + case MLX5_CMD_OP_DETACH_FROM_MCG: + return "DETACH_FROM_MCG"; + + case MLX5_CMD_OP_ALLOC_XRCD: + return "ALLOC_XRCD"; + + case MLX5_CMD_OP_DEALLOC_XRCD: + return "DEALLOC_XRCD"; + + case MLX5_CMD_OP_ACCESS_REG: + return "MLX5_CMD_OP_ACCESS_REG"; + + default: return "unknown command opcode"; + } +} + +static void dump_command(struct mlx5_core_dev *dev, + struct mlx5_cmd_work_ent *ent, int input) +{ + u16 op = be16_to_cpu(((struct mlx5_inbox_hdr *)(ent->lay->in))->opcode); + struct mlx5_cmd_msg *msg = input ? ent->in : ent->out; + struct mlx5_cmd_mailbox *next = msg->next; + int data_only; + int offset = 0; + int dump_len; + + data_only = !!(mlx5_core_debug_mask & (1 << MLX5_CMD_DATA)); + + if (data_only) + mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_DATA, + "dump command data %s(0x%x) %s\n", + mlx5_command_str(op), op, + input ? "INPUT" : "OUTPUT"); + else + mlx5_core_dbg(dev, "dump command %s(0x%x) %s\n", + mlx5_command_str(op), op, + input ? "INPUT" : "OUTPUT"); + + if (data_only) { + if (input) { + dump_buf(ent->lay->in, sizeof(ent->lay->in), 1, offset); + offset += sizeof(ent->lay->in); + } else { + dump_buf(ent->lay->out, sizeof(ent->lay->out), 1, offset); + offset += sizeof(ent->lay->out); + } + } else { + dump_buf(ent->lay, sizeof(*ent->lay), 0, offset); + offset += sizeof(*ent->lay); + } + + while (next && offset < msg->len) { + if (data_only) { + dump_len = min_t(int, MLX5_CMD_DATA_BLOCK_SIZE, msg->len - offset); + dump_buf(next->buf, dump_len, 1, offset); + offset += MLX5_CMD_DATA_BLOCK_SIZE; + } else { + mlx5_core_dbg(dev, "command block:\n"); + dump_buf(next->buf, sizeof(struct mlx5_cmd_prot_block), 0, offset); + offset += sizeof(struct mlx5_cmd_prot_block); + } + next = next->next; + } + + if (data_only) + pr_debug("\n"); +} + +static void cmd_work_handler(struct work_struct *work) +{ + struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work); + struct mlx5_cmd *cmd = ent->cmd; + struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, cmd); + struct mlx5_cmd_layout *lay; + struct semaphore *sem; + + sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; + down(sem); + if (!ent->page_queue) { + ent->idx = alloc_ent(cmd); + if (ent->idx < 0) { + mlx5_core_err(dev, "failed to allocate command entry\n"); + up(sem); + return; + } + } else { + ent->idx = cmd->max_reg_cmds; + } + + ent->token = alloc_token(cmd); + cmd->ent_arr[ent->idx] = ent; + lay = get_inst(cmd, ent->idx); + ent->lay = lay; + memset(lay, 0, sizeof(*lay)); + memcpy(lay->in, ent->in->first.data, sizeof(lay->in)); + if (ent->in->next) + lay->in_ptr = cpu_to_be64(ent->in->next->dma); + lay->inlen = cpu_to_be32(ent->in->len); + if (ent->out->next) + lay->out_ptr = cpu_to_be64(ent->out->next->dma); + lay->outlen = cpu_to_be32(ent->out->len); + lay->type = MLX5_PCI_CMD_XPORT; + lay->token = ent->token; + lay->status_own = CMD_OWNER_HW; + if (!cmd->checksum_disabled) + set_signature(ent); + dump_command(dev, ent, 1); + ktime_get_ts(&ent->ts1); + + /* ring doorbell after the descriptor is valid */ + wmb(); + iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell); + mlx5_core_dbg(dev, "write 0x%x to command doorbell\n", 1 << ent->idx); + mmiowb(); + if (cmd->mode == CMD_MODE_POLLING) { + poll_timeout(ent); + /* make sure we read the descriptor after ownership is SW */ + rmb(); + mlx5_cmd_comp_handler(dev, 1UL << ent->idx); + } +} + +static const char *deliv_status_to_str(u8 status) +{ + switch (status) { + case MLX5_CMD_DELIVERY_STAT_OK: + return "no errors"; + case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR: + return "signature error"; + case MLX5_CMD_DELIVERY_STAT_TOK_ERR: + return "token error"; + case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR: + return "bad block number"; + case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR: + return "output pointer not aligned to block size"; + case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR: + return "input pointer not aligned to block size"; + case MLX5_CMD_DELIVERY_STAT_FW_ERR: + return "firmware internal error"; + case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR: + return "command input length error"; + case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR: + return "command ouput length error"; + case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR: + return "reserved fields not cleared"; + case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR: + return "bad command descriptor type"; + default: + return "unknown status code"; + } +} + +static u16 msg_to_opcode(struct mlx5_cmd_msg *in) +{ + struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data); + + return be16_to_cpu(hdr->opcode); +} + +static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) +{ + unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC); + struct mlx5_cmd *cmd = &dev->cmd; + int err; + + if (cmd->mode == CMD_MODE_POLLING) { + wait_for_completion(&ent->done); + err = ent->ret; + } else { + if (!wait_for_completion_timeout(&ent->done, timeout)) + err = -ETIMEDOUT; + else + err = 0; + } + if (err == -ETIMEDOUT) { + mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", + mlx5_command_str(msg_to_opcode(ent->in)), + msg_to_opcode(ent->in)); + } + mlx5_core_dbg(dev, "err %d, delivery status %s(%d)\n", err, + deliv_status_to_str(ent->status), ent->status); + + return err; +} + +/* Notes: + * 1. Callback functions may not sleep + * 2. page queue commands do not support asynchrous completion + */ +static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, + struct mlx5_cmd_msg *out, mlx5_cmd_cbk_t callback, + void *context, int page_queue, u8 *status) +{ + struct mlx5_cmd *cmd = &dev->cmd; + struct mlx5_cmd_work_ent *ent; + ktime_t t1, t2, delta; + struct mlx5_cmd_stats *stats; + int err = 0; + s64 ds; + u16 op; + + if (callback && page_queue) + return -EINVAL; + + ent = alloc_cmd(cmd, in, out, callback, context, page_queue); + if (IS_ERR(ent)) + return PTR_ERR(ent); + + if (!callback) + init_completion(&ent->done); + + INIT_WORK(&ent->work, cmd_work_handler); + if (page_queue) { + cmd_work_handler(&ent->work); + } else if (!queue_work(cmd->wq, &ent->work)) { + mlx5_core_warn(dev, "failed to queue work\n"); + err = -ENOMEM; + goto out_free; + } + + if (!callback) { + err = wait_func(dev, ent); + if (err == -ETIMEDOUT) + goto out; + + t1 = timespec_to_ktime(ent->ts1); + t2 = timespec_to_ktime(ent->ts2); + delta = ktime_sub(t2, t1); + ds = ktime_to_ns(delta); + op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode); + if (op < ARRAY_SIZE(cmd->stats)) { + stats = &cmd->stats[op]; + spin_lock(&stats->lock); + stats->sum += ds; + ++stats->n; + spin_unlock(&stats->lock); + } + mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME, + "fw exec time for %s is %lld nsec\n", + mlx5_command_str(op), ds); + *status = ent->status; + free_cmd(ent); + } + + return err; + +out_free: + free_cmd(ent); +out: + return err; +} + +static ssize_t dbg_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_core_dev *dev = filp->private_data; + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + char lbuf[3]; + int err; + + if (!dbg->in_msg || !dbg->out_msg) + return -ENOMEM; + + if (copy_from_user(lbuf, buf, sizeof(lbuf))) + return -EPERM; + + lbuf[sizeof(lbuf) - 1] = 0; + + if (strcmp(lbuf, "go")) + return -EINVAL; + + err = mlx5_cmd_exec(dev, dbg->in_msg, dbg->inlen, dbg->out_msg, dbg->outlen); + + return err ? err : count; +} + + +static const struct file_operations fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = dbg_write, +}; + +static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size) +{ + struct mlx5_cmd_prot_block *block; + struct mlx5_cmd_mailbox *next; + int copy; + + if (!to || !from) + return -ENOMEM; + + copy = min_t(int, size, sizeof(to->first.data)); + memcpy(to->first.data, from, copy); + size -= copy; + from += copy; + + next = to->next; + while (size) { + if (!next) { + /* this is a BUG */ + return -ENOMEM; + } + + copy = min_t(int, size, MLX5_CMD_DATA_BLOCK_SIZE); + block = next->buf; + memcpy(block->data, from, copy); + from += copy; + size -= copy; + next = next->next; + } + + return 0; +} + +static int mlx5_copy_from_msg(void *to, struct mlx5_cmd_msg *from, int size) +{ + struct mlx5_cmd_prot_block *block; + struct mlx5_cmd_mailbox *next; + int copy; + + if (!to || !from) + return -ENOMEM; + + copy = min_t(int, size, sizeof(from->first.data)); + memcpy(to, from->first.data, copy); + size -= copy; + to += copy; + + next = from->next; + while (size) { + if (!next) { + /* this is a BUG */ + return -ENOMEM; + } + + copy = min_t(int, size, MLX5_CMD_DATA_BLOCK_SIZE); + block = next->buf; + if (xor8_buf(block, sizeof(*block)) != 0xff) + return -EINVAL; + + memcpy(to, block->data, copy); + to += copy; + size -= copy; + next = next->next; + } + + return 0; +} + +static struct mlx5_cmd_mailbox *alloc_cmd_box(struct mlx5_core_dev *dev, + gfp_t flags) +{ + struct mlx5_cmd_mailbox *mailbox; + + mailbox = kmalloc(sizeof(*mailbox), flags); + if (!mailbox) + return ERR_PTR(-ENOMEM); + + mailbox->buf = pci_pool_alloc(dev->cmd.pool, flags, + &mailbox->dma); + if (!mailbox->buf) { + mlx5_core_dbg(dev, "failed allocation\n"); + kfree(mailbox); + return ERR_PTR(-ENOMEM); + } + memset(mailbox->buf, 0, sizeof(struct mlx5_cmd_prot_block)); + mailbox->next = NULL; + + return mailbox; +} + +static void free_cmd_box(struct mlx5_core_dev *dev, + struct mlx5_cmd_mailbox *mailbox) +{ + pci_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma); + kfree(mailbox); +} + +static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev, + gfp_t flags, int size) +{ + struct mlx5_cmd_mailbox *tmp, *head = NULL; + struct mlx5_cmd_prot_block *block; + struct mlx5_cmd_msg *msg; + int blen; + int err; + int n; + int i; + + msg = kzalloc(sizeof(*msg), GFP_KERNEL); + if (!msg) + return ERR_PTR(-ENOMEM); + + blen = size - min_t(int, sizeof(msg->first.data), size); + n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1) / MLX5_CMD_DATA_BLOCK_SIZE; + + for (i = 0; i < n; i++) { + tmp = alloc_cmd_box(dev, flags); + if (IS_ERR(tmp)) { + mlx5_core_warn(dev, "failed allocating block\n"); + err = PTR_ERR(tmp); + goto err_alloc; + } + + block = tmp->buf; + tmp->next = head; + block->next = cpu_to_be64(tmp->next ? tmp->next->dma : 0); + block->block_num = cpu_to_be32(n - i - 1); + head = tmp; + } + msg->next = head; + msg->len = size; + return msg; + +err_alloc: + while (head) { + tmp = head->next; + free_cmd_box(dev, head); + head = tmp; + } + kfree(msg); + + return ERR_PTR(err); +} + +static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev, + struct mlx5_cmd_msg *msg) +{ + struct mlx5_cmd_mailbox *head = msg->next; + struct mlx5_cmd_mailbox *next; + + while (head) { + next = head->next; + free_cmd_box(dev, head); + head = next; + } + kfree(msg); +} + +static ssize_t data_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_core_dev *dev = filp->private_data; + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + void *ptr; + int err; + + if (*pos != 0) + return -EINVAL; + + kfree(dbg->in_msg); + dbg->in_msg = NULL; + dbg->inlen = 0; + + ptr = kzalloc(count, GFP_KERNEL); + if (!ptr) + return -ENOMEM; + + if (copy_from_user(ptr, buf, count)) { + err = -EPERM; + goto out; + } + dbg->in_msg = ptr; + dbg->inlen = count; + + *pos = count; + + return count; + +out: + kfree(ptr); + return err; +} + +static ssize_t data_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_core_dev *dev = filp->private_data; + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + int copy; + + if (*pos) + return 0; + + if (!dbg->out_msg) + return -ENOMEM; + + copy = min_t(int, count, dbg->outlen); + if (copy_to_user(buf, dbg->out_msg, copy)) + return -EPERM; + + *pos += copy; + + return copy; +} + +static const struct file_operations dfops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = data_write, + .read = data_read, +}; + +static ssize_t outlen_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_core_dev *dev = filp->private_data; + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + char outlen[8]; + int err; + + if (*pos) + return 0; + + err = snprintf(outlen, sizeof(outlen), "%d", dbg->outlen); + if (err < 0) + return err; + + if (copy_to_user(buf, &outlen, err)) + return -EPERM; + + *pos += err; + + return err; +} + +static ssize_t outlen_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_core_dev *dev = filp->private_data; + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + char outlen_str[8]; + int outlen; + void *ptr; + int err; + + if (*pos != 0 || count > 6) + return -EINVAL; + + kfree(dbg->out_msg); + dbg->out_msg = NULL; + dbg->outlen = 0; + + if (copy_from_user(outlen_str, buf, count)) + return -EPERM; + + outlen_str[7] = 0; + + err = sscanf(outlen_str, "%d", &outlen); + if (err < 0) + return err; + + ptr = kzalloc(outlen, GFP_KERNEL); + if (!ptr) + return -ENOMEM; + + dbg->out_msg = ptr; + dbg->outlen = outlen; + + *pos = count; + + return count; +} + +static const struct file_operations olfops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = outlen_write, + .read = outlen_read, +}; + +static void set_wqname(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + + snprintf(cmd->wq_name, sizeof(cmd->wq_name), "mlx5_cmd_%s", + dev_name(&dev->pdev->dev)); +} + +static void clean_debug_files(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + + if (!mlx5_debugfs_root) + return; + + mlx5_cmdif_debugfs_cleanup(dev); + debugfs_remove_recursive(dbg->dbg_root); +} + +static int create_debugfs_files(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + int err = -ENOMEM; + + if (!mlx5_debugfs_root) + return 0; + + dbg->dbg_root = debugfs_create_dir("cmd", dev->priv.dbg_root); + if (!dbg->dbg_root) + return err; + + dbg->dbg_in = debugfs_create_file("in", 0400, dbg->dbg_root, + dev, &dfops); + if (!dbg->dbg_in) + goto err_dbg; + + dbg->dbg_out = debugfs_create_file("out", 0200, dbg->dbg_root, + dev, &dfops); + if (!dbg->dbg_out) + goto err_dbg; + + dbg->dbg_outlen = debugfs_create_file("out_len", 0600, dbg->dbg_root, + dev, &olfops); + if (!dbg->dbg_outlen) + goto err_dbg; + + dbg->dbg_status = debugfs_create_u8("status", 0600, dbg->dbg_root, + &dbg->status); + if (!dbg->dbg_status) + goto err_dbg; + + dbg->dbg_run = debugfs_create_file("run", 0200, dbg->dbg_root, dev, &fops); + if (!dbg->dbg_run) + goto err_dbg; + + mlx5_cmdif_debugfs_init(dev); + + return 0; + +err_dbg: + clean_debug_files(dev); + return err; +} + +void mlx5_cmd_use_events(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + int i; + + for (i = 0; i < cmd->max_reg_cmds; i++) + down(&cmd->sem); + + down(&cmd->pages_sem); + + flush_workqueue(cmd->wq); + + cmd->mode = CMD_MODE_EVENTS; + + up(&cmd->pages_sem); + for (i = 0; i < cmd->max_reg_cmds; i++) + up(&cmd->sem); +} + +void mlx5_cmd_use_polling(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + int i; + + for (i = 0; i < cmd->max_reg_cmds; i++) + down(&cmd->sem); + + down(&cmd->pages_sem); + + flush_workqueue(cmd->wq); + cmd->mode = CMD_MODE_POLLING; + + up(&cmd->pages_sem); + for (i = 0; i < cmd->max_reg_cmds; i++) + up(&cmd->sem); +} + +void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector) +{ + struct mlx5_cmd *cmd = &dev->cmd; + struct mlx5_cmd_work_ent *ent; + mlx5_cmd_cbk_t callback; + void *context; + int err; + int i; + + for (i = 0; i < (1 << cmd->log_sz); i++) { + if (test_bit(i, &vector)) { + ent = cmd->ent_arr[i]; + ktime_get_ts(&ent->ts2); + memcpy(ent->out->first.data, ent->lay->out, sizeof(ent->lay->out)); + dump_command(dev, ent, 0); + if (!ent->ret) { + if (!cmd->checksum_disabled) + ent->ret = verify_signature(ent); + else + ent->ret = 0; + ent->status = ent->lay->status_own >> 1; + mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n", + ent->ret, deliv_status_to_str(ent->status), ent->status); + } + free_ent(cmd, ent->idx); + if (ent->callback) { + callback = ent->callback; + context = ent->context; + err = ent->ret; + free_cmd(ent); + callback(err, context); + } else { + complete(&ent->done); + } + if (ent->page_queue) + up(&cmd->pages_sem); + else + up(&cmd->sem); + } + } +} +EXPORT_SYMBOL(mlx5_cmd_comp_handler); + +static int status_to_err(u8 status) +{ + return status ? -1 : 0; /* TBD more meaningful codes */ +} + +static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size) +{ + struct mlx5_cmd_msg *msg = ERR_PTR(-ENOMEM); + struct mlx5_cmd *cmd = &dev->cmd; + struct cache_ent *ent = NULL; + + if (in_size > MED_LIST_SIZE && in_size <= LONG_LIST_SIZE) + ent = &cmd->cache.large; + else if (in_size > 16 && in_size <= MED_LIST_SIZE) + ent = &cmd->cache.med; + + if (ent) { + spin_lock(&ent->lock); + if (!list_empty(&ent->head)) { + msg = list_entry(ent->head.next, typeof(*msg), list); + /* For cached lists, we must explicitly state what is + * the real size + */ + msg->len = in_size; + list_del(&msg->list); + } + spin_unlock(&ent->lock); + } + + if (IS_ERR(msg)) + msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, in_size); + + return msg; +} + +static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg) +{ + if (msg->cache) { + spin_lock(&msg->cache->lock); + list_add_tail(&msg->list, &msg->cache->head); + spin_unlock(&msg->cache->lock); + } else { + mlx5_free_cmd_msg(dev, msg); + } +} + +static int is_manage_pages(struct mlx5_inbox_hdr *in) +{ + return be16_to_cpu(in->opcode) == MLX5_CMD_OP_MANAGE_PAGES; +} + +int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, + int out_size) +{ + struct mlx5_cmd_msg *inb; + struct mlx5_cmd_msg *outb; + int pages_queue; + int err; + u8 status = 0; + + pages_queue = is_manage_pages(in); + + inb = alloc_msg(dev, in_size); + if (IS_ERR(inb)) { + err = PTR_ERR(inb); + return err; + } + + err = mlx5_copy_to_msg(inb, in, in_size); + if (err) { + mlx5_core_warn(dev, "err %d\n", err); + goto out_in; + } + + outb = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, out_size); + if (IS_ERR(outb)) { + err = PTR_ERR(outb); + goto out_in; + } + + err = mlx5_cmd_invoke(dev, inb, outb, NULL, NULL, pages_queue, &status); + if (err) + goto out_out; + + mlx5_core_dbg(dev, "err %d, status %d\n", err, status); + if (status) { + err = status_to_err(status); + goto out_out; + } + + err = mlx5_copy_from_msg(out, outb, out_size); + +out_out: + mlx5_free_cmd_msg(dev, outb); + +out_in: + free_msg(dev, inb); + return err; +} +EXPORT_SYMBOL(mlx5_cmd_exec); + +static void destroy_msg_cache(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + struct mlx5_cmd_msg *msg; + struct mlx5_cmd_msg *n; + + list_for_each_entry_safe(msg, n, &cmd->cache.large.head, list) { + list_del(&msg->list); + mlx5_free_cmd_msg(dev, msg); + } + + list_for_each_entry_safe(msg, n, &cmd->cache.med.head, list) { + list_del(&msg->list); + mlx5_free_cmd_msg(dev, msg); + } +} + +static int create_msg_cache(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + struct mlx5_cmd_msg *msg; + int err; + int i; + + spin_lock_init(&cmd->cache.large.lock); + INIT_LIST_HEAD(&cmd->cache.large.head); + spin_lock_init(&cmd->cache.med.lock); + INIT_LIST_HEAD(&cmd->cache.med.head); + + for (i = 0; i < NUM_LONG_LISTS; i++) { + msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE); + if (IS_ERR(msg)) { + err = PTR_ERR(msg); + goto ex_err; + } + msg->cache = &cmd->cache.large; + list_add_tail(&msg->list, &cmd->cache.large.head); + } + + for (i = 0; i < NUM_MED_LISTS; i++) { + msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE); + if (IS_ERR(msg)) { + err = PTR_ERR(msg); + goto ex_err; + } + msg->cache = &cmd->cache.med; + list_add_tail(&msg->list, &cmd->cache.med.head); + } + + return 0; + +ex_err: + destroy_msg_cache(dev); + return err; +} + +int mlx5_cmd_init(struct mlx5_core_dev *dev) +{ + int size = sizeof(struct mlx5_cmd_prot_block); + int align = roundup_pow_of_two(size); + struct mlx5_cmd *cmd = &dev->cmd; + u32 cmd_h, cmd_l; + u16 cmd_if_rev; + int err; + int i; + + cmd_if_rev = cmdif_rev(dev); + if (cmd_if_rev != CMD_IF_REV) { + dev_err(&dev->pdev->dev, + "Driver cmdif rev(%d) differs from firmware's(%d)\n", + CMD_IF_REV, cmd_if_rev); + return -EINVAL; + } + + cmd->pool = pci_pool_create("mlx5_cmd", dev->pdev, size, align, 0); + if (!cmd->pool) + return -ENOMEM; + + cmd->cmd_buf = (void *)__get_free_pages(GFP_ATOMIC, 0); + if (!cmd->cmd_buf) { + err = -ENOMEM; + goto err_free_pool; + } + cmd->dma = dma_map_single(&dev->pdev->dev, cmd->cmd_buf, PAGE_SIZE, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(&dev->pdev->dev, cmd->dma)) { + err = -ENOMEM; + goto err_free; + } + + cmd_l = ioread32be(&dev->iseg->cmdq_addr_l_sz) & 0xff; + cmd->log_sz = cmd_l >> 4 & 0xf; + cmd->log_stride = cmd_l & 0xf; + if (1 << cmd->log_sz > MLX5_MAX_COMMANDS) { + dev_err(&dev->pdev->dev, "firmware reports too many outstanding commands %d\n", + 1 << cmd->log_sz); + err = -EINVAL; + goto err_map; + } + + if (cmd->log_sz + cmd->log_stride > PAGE_SHIFT) { + dev_err(&dev->pdev->dev, "command queue size overflow\n"); + err = -EINVAL; + goto err_map; + } + + cmd->max_reg_cmds = (1 << cmd->log_sz) - 1; + cmd->bitmask = (1 << cmd->max_reg_cmds) - 1; + + cmd->cmdif_rev = ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16; + if (cmd->cmdif_rev > CMD_IF_REV) { + dev_err(&dev->pdev->dev, "driver does not support command interface version. driver %d, firmware %d\n", + CMD_IF_REV, cmd->cmdif_rev); + err = -ENOTSUPP; + goto err_map; + } + + spin_lock_init(&cmd->alloc_lock); + spin_lock_init(&cmd->token_lock); + for (i = 0; i < ARRAY_SIZE(cmd->stats); i++) + spin_lock_init(&cmd->stats[i].lock); + + sema_init(&cmd->sem, cmd->max_reg_cmds); + sema_init(&cmd->pages_sem, 1); + + cmd_h = (u32)((u64)(cmd->dma) >> 32); + cmd_l = (u32)(cmd->dma); + if (cmd_l & 0xfff) { + dev_err(&dev->pdev->dev, "invalid command queue address\n"); + err = -ENOMEM; + goto err_map; + } + + iowrite32be(cmd_h, &dev->iseg->cmdq_addr_h); + iowrite32be(cmd_l, &dev->iseg->cmdq_addr_l_sz); + + /* Make sure firmware sees the complete address before we proceed */ + wmb(); + + mlx5_core_dbg(dev, "descriptor at dma 0x%llx\n", (unsigned long long)(cmd->dma)); + + cmd->mode = CMD_MODE_POLLING; + + err = create_msg_cache(dev); + if (err) { + dev_err(&dev->pdev->dev, "failed to create command cache\n"); + goto err_map; + } + + set_wqname(dev); + cmd->wq = create_singlethread_workqueue(cmd->wq_name); + if (!cmd->wq) { + dev_err(&dev->pdev->dev, "failed to create command workqueue\n"); + err = -ENOMEM; + goto err_cache; + } + + err = create_debugfs_files(dev); + if (err) { + err = -ENOMEM; + goto err_wq; + } + + return 0; + +err_wq: + destroy_workqueue(cmd->wq); + +err_cache: + destroy_msg_cache(dev); + +err_map: + dma_unmap_single(&dev->pdev->dev, cmd->dma, PAGE_SIZE, + DMA_BIDIRECTIONAL); +err_free: + free_pages((unsigned long)cmd->cmd_buf, 0); + +err_free_pool: + pci_pool_destroy(cmd->pool); + + return err; +} +EXPORT_SYMBOL(mlx5_cmd_init); + +void mlx5_cmd_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + + clean_debug_files(dev); + destroy_workqueue(cmd->wq); + destroy_msg_cache(dev); + dma_unmap_single(&dev->pdev->dev, cmd->dma, PAGE_SIZE, + DMA_BIDIRECTIONAL); + free_pages((unsigned long)cmd->cmd_buf, 0); + pci_pool_destroy(cmd->pool); +} +EXPORT_SYMBOL(mlx5_cmd_cleanup); + +static const char *cmd_status_str(u8 status) +{ + switch (status) { + case MLX5_CMD_STAT_OK: + return "OK"; + case MLX5_CMD_STAT_INT_ERR: + return "internal error"; + case MLX5_CMD_STAT_BAD_OP_ERR: + return "bad operation"; + case MLX5_CMD_STAT_BAD_PARAM_ERR: + return "bad parameter"; + case MLX5_CMD_STAT_BAD_SYS_STATE_ERR: + return "bad system state"; + case MLX5_CMD_STAT_BAD_RES_ERR: + return "bad resource"; + case MLX5_CMD_STAT_RES_BUSY: + return "resource busy"; + case MLX5_CMD_STAT_LIM_ERR: + return "limits exceeded"; + case MLX5_CMD_STAT_BAD_RES_STATE_ERR: + return "bad resource state"; + case MLX5_CMD_STAT_IX_ERR: + return "bad index"; + case MLX5_CMD_STAT_NO_RES_ERR: + return "no resources"; + case MLX5_CMD_STAT_BAD_INP_LEN_ERR: + return "bad input length"; + case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR: + return "bad output length"; + case MLX5_CMD_STAT_BAD_QP_STATE_ERR: + return "bad QP state"; + case MLX5_CMD_STAT_BAD_PKT_ERR: + return "bad packet (discarded)"; + case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR: + return "bad size too many outstanding CQEs"; + default: + return "unknown status"; + } +} + +int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr) +{ + if (!hdr->status) + return 0; + + pr_warn("command failed, status %s(0x%x), syndrome 0x%x\n", + cmd_status_str(hdr->status), hdr->status, + be32_to_cpu(hdr->syndrome)); + + switch (hdr->status) { + case MLX5_CMD_STAT_OK: return 0; + case MLX5_CMD_STAT_INT_ERR: return -EIO; + case MLX5_CMD_STAT_BAD_OP_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_PARAM_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_SYS_STATE_ERR: return -EIO; + case MLX5_CMD_STAT_BAD_RES_ERR: return -EINVAL; + case MLX5_CMD_STAT_RES_BUSY: return -EBUSY; + case MLX5_CMD_STAT_LIM_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_RES_STATE_ERR: return -EINVAL; + case MLX5_CMD_STAT_IX_ERR: return -EINVAL; + case MLX5_CMD_STAT_NO_RES_ERR: return -EAGAIN; + case MLX5_CMD_STAT_BAD_INP_LEN_ERR: return -EIO; + case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR: return -EIO; + case MLX5_CMD_STAT_BAD_QP_STATE_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_PKT_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR: return -EINVAL; + default: return -EIO; + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c new file mode 100644 index 000000000000..c2d660be6f76 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "mlx5_core.h" + +void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn) +{ + struct mlx5_core_cq *cq; + struct mlx5_cq_table *table = &dev->priv.cq_table; + + spin_lock(&table->lock); + cq = radix_tree_lookup(&table->tree, cqn); + if (likely(cq)) + atomic_inc(&cq->refcount); + spin_unlock(&table->lock); + + if (!cq) { + mlx5_core_warn(dev, "Completion event for bogus CQ 0x%x\n", cqn); + return; + } + + ++cq->arm_sn; + + cq->comp(cq); + + if (atomic_dec_and_test(&cq->refcount)) + complete(&cq->free); +} + +void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type) +{ + struct mlx5_cq_table *table = &dev->priv.cq_table; + struct mlx5_core_cq *cq; + + spin_lock(&table->lock); + + cq = radix_tree_lookup(&table->tree, cqn); + if (cq) + atomic_inc(&cq->refcount); + + spin_unlock(&table->lock); + + if (!cq) { + mlx5_core_warn(dev, "Async event for bogus CQ 0x%x\n", cqn); + return; + } + + cq->event(cq, event_type); + + if (atomic_dec_and_test(&cq->refcount)) + complete(&cq->free); +} + + +int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + struct mlx5_create_cq_mbox_in *in, int inlen) +{ + int err; + struct mlx5_cq_table *table = &dev->priv.cq_table; + struct mlx5_create_cq_mbox_out out; + struct mlx5_destroy_cq_mbox_in din; + struct mlx5_destroy_cq_mbox_out dout; + + in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_CQ); + memset(&out, 0, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + return mlx5_cmd_status_to_err(&out.hdr); + + cq->cqn = be32_to_cpu(out.cqn) & 0xffffff; + cq->cons_index = 0; + cq->arm_sn = 0; + atomic_set(&cq->refcount, 1); + init_completion(&cq->free); + + spin_lock_irq(&table->lock); + err = radix_tree_insert(&table->tree, cq->cqn, cq); + spin_unlock_irq(&table->lock); + if (err) + goto err_cmd; + + cq->pid = current->pid; + err = mlx5_debug_cq_add(dev, cq); + if (err) + mlx5_core_dbg(dev, "failed adding CP 0x%x to debug file system\n", + cq->cqn); + + return 0; + +err_cmd: + memset(&din, 0, sizeof(din)); + memset(&dout, 0, sizeof(dout)); + din.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_CQ); + mlx5_cmd_exec(dev, &din, sizeof(din), &dout, sizeof(dout)); + return err; +} +EXPORT_SYMBOL(mlx5_core_create_cq); + +int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) +{ + struct mlx5_cq_table *table = &dev->priv.cq_table; + struct mlx5_destroy_cq_mbox_in in; + struct mlx5_destroy_cq_mbox_out out; + struct mlx5_core_cq *tmp; + int err; + + spin_lock_irq(&table->lock); + tmp = radix_tree_delete(&table->tree, cq->cqn); + spin_unlock_irq(&table->lock); + if (!tmp) { + mlx5_core_warn(dev, "cq 0x%x not found in tree\n", cq->cqn); + return -EINVAL; + } + if (tmp != cq) { + mlx5_core_warn(dev, "corruption on srqn 0x%x\n", cq->cqn); + return -EINVAL; + } + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_CQ); + in.cqn = cpu_to_be32(cq->cqn); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + return mlx5_cmd_status_to_err(&out.hdr); + + synchronize_irq(cq->irqn); + + mlx5_debug_cq_remove(dev, cq); + if (atomic_dec_and_test(&cq->refcount)) + complete(&cq->free); + wait_for_completion(&cq->free); + + return 0; +} +EXPORT_SYMBOL(mlx5_core_destroy_cq); + +int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + struct mlx5_query_cq_mbox_out *out) +{ + struct mlx5_query_cq_mbox_in in; + int err; + + memset(&in, 0, sizeof(in)); + memset(out, 0, sizeof(*out)); + + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_CQ); + in.cqn = cpu_to_be32(cq->cqn); + err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out)); + if (err) + return err; + + if (out->hdr.status) + return mlx5_cmd_status_to_err(&out->hdr); + + return err; +} +EXPORT_SYMBOL(mlx5_core_query_cq); + + +int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + int type, struct mlx5_cq_modify_params *params) +{ + return -ENOSYS; +} + +int mlx5_init_cq_table(struct mlx5_core_dev *dev) +{ + struct mlx5_cq_table *table = &dev->priv.cq_table; + int err; + + spin_lock_init(&table->lock); + INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); + err = mlx5_cq_debugfs_init(dev); + + return err; +} + +void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev) +{ + mlx5_cq_debugfs_cleanup(dev); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c new file mode 100644 index 000000000000..5e9cf2b9aaf7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -0,0 +1,587 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include "mlx5_core.h" + +enum { + QP_PID, + QP_STATE, + QP_XPORT, + QP_MTU, + QP_N_RECV, + QP_RECV_SZ, + QP_N_SEND, + QP_LOG_PG_SZ, + QP_RQPN, +}; + +static char *qp_fields[] = { + [QP_PID] = "pid", + [QP_STATE] = "state", + [QP_XPORT] = "transport", + [QP_MTU] = "mtu", + [QP_N_RECV] = "num_recv", + [QP_RECV_SZ] = "rcv_wqe_sz", + [QP_N_SEND] = "num_send", + [QP_LOG_PG_SZ] = "log2_page_sz", + [QP_RQPN] = "remote_qpn", +}; + +enum { + EQ_NUM_EQES, + EQ_INTR, + EQ_LOG_PG_SZ, +}; + +static char *eq_fields[] = { + [EQ_NUM_EQES] = "num_eqes", + [EQ_INTR] = "intr", + [EQ_LOG_PG_SZ] = "log_page_size", +}; + +enum { + CQ_PID, + CQ_NUM_CQES, + CQ_LOG_PG_SZ, +}; + +static char *cq_fields[] = { + [CQ_PID] = "pid", + [CQ_NUM_CQES] = "num_cqes", + [CQ_LOG_PG_SZ] = "log_page_size", +}; + +struct dentry *mlx5_debugfs_root; +EXPORT_SYMBOL(mlx5_debugfs_root); + +void mlx5_register_debugfs(void) +{ + mlx5_debugfs_root = debugfs_create_dir("mlx5", NULL); + if (IS_ERR_OR_NULL(mlx5_debugfs_root)) + mlx5_debugfs_root = NULL; +} + +void mlx5_unregister_debugfs(void) +{ + debugfs_remove(mlx5_debugfs_root); +} + +int mlx5_qp_debugfs_init(struct mlx5_core_dev *dev) +{ + if (!mlx5_debugfs_root) + return 0; + + atomic_set(&dev->num_qps, 0); + + dev->priv.qp_debugfs = debugfs_create_dir("QPs", dev->priv.dbg_root); + if (!dev->priv.qp_debugfs) + return -ENOMEM; + + return 0; +} + +void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev) +{ + if (!mlx5_debugfs_root) + return; + + debugfs_remove_recursive(dev->priv.qp_debugfs); +} + +int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev) +{ + if (!mlx5_debugfs_root) + return 0; + + dev->priv.eq_debugfs = debugfs_create_dir("EQs", dev->priv.dbg_root); + if (!dev->priv.eq_debugfs) + return -ENOMEM; + + return 0; +} + +void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev) +{ + if (!mlx5_debugfs_root) + return; + + debugfs_remove_recursive(dev->priv.eq_debugfs); +} + +static ssize_t average_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_cmd_stats *stats; + u64 field = 0; + int ret; + int err; + char tbuf[22]; + + if (*pos) + return 0; + + stats = filp->private_data; + spin_lock(&stats->lock); + if (stats->n) + field = stats->sum / stats->n; + spin_unlock(&stats->lock); + ret = snprintf(tbuf, sizeof(tbuf), "%llu\n", field); + if (ret > 0) { + err = copy_to_user(buf, tbuf, ret); + if (err) + return err; + } + + *pos += ret; + return ret; +} + + +static ssize_t average_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_cmd_stats *stats; + + stats = filp->private_data; + spin_lock(&stats->lock); + stats->sum = 0; + stats->n = 0; + spin_unlock(&stats->lock); + + *pos += count; + + return count; +} + +static const struct file_operations stats_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = average_read, + .write = average_write, +}; + +int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd_stats *stats; + struct dentry **cmd; + const char *namep; + int err; + int i; + + if (!mlx5_debugfs_root) + return 0; + + cmd = &dev->priv.cmdif_debugfs; + *cmd = debugfs_create_dir("commands", dev->priv.dbg_root); + if (!*cmd) + return -ENOMEM; + + for (i = 0; i < ARRAY_SIZE(dev->cmd.stats); i++) { + stats = &dev->cmd.stats[i]; + namep = mlx5_command_str(i); + if (strcmp(namep, "unknown command opcode")) { + stats->root = debugfs_create_dir(namep, *cmd); + if (!stats->root) { + mlx5_core_warn(dev, "failed adding command %d\n", + i); + err = -ENOMEM; + goto out; + } + + stats->avg = debugfs_create_file("average", 0400, + stats->root, stats, + &stats_fops); + if (!stats->avg) { + mlx5_core_warn(dev, "failed creating debugfs file\n"); + err = -ENOMEM; + goto out; + } + + stats->count = debugfs_create_u64("n", 0400, + stats->root, + &stats->n); + if (!stats->count) { + mlx5_core_warn(dev, "failed creating debugfs file\n"); + err = -ENOMEM; + goto out; + } + } + } + + return 0; +out: + debugfs_remove_recursive(dev->priv.cmdif_debugfs); + return err; +} + +void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev) +{ + if (!mlx5_debugfs_root) + return; + + debugfs_remove_recursive(dev->priv.cmdif_debugfs); +} + +int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev) +{ + if (!mlx5_debugfs_root) + return 0; + + dev->priv.cq_debugfs = debugfs_create_dir("CQs", dev->priv.dbg_root); + if (!dev->priv.cq_debugfs) + return -ENOMEM; + + return 0; +} + +void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev) +{ + if (!mlx5_debugfs_root) + return; + + debugfs_remove_recursive(dev->priv.cq_debugfs); +} + +static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, + int index) +{ + struct mlx5_query_qp_mbox_out *out; + struct mlx5_qp_context *ctx; + u64 param = 0; + int err; + int no_sq; + + out = kzalloc(sizeof(*out), GFP_KERNEL); + if (!out) + return param; + + err = mlx5_core_qp_query(dev, qp, out, sizeof(*out)); + if (err) { + mlx5_core_warn(dev, "failed to query qp\n"); + goto out; + } + + ctx = &out->ctx; + switch (index) { + case QP_PID: + param = qp->pid; + break; + case QP_STATE: + param = be32_to_cpu(ctx->flags) >> 28; + break; + case QP_XPORT: + param = (be32_to_cpu(ctx->flags) >> 16) & 0xff; + break; + case QP_MTU: + param = ctx->mtu_msgmax >> 5; + break; + case QP_N_RECV: + param = 1 << ((ctx->rq_size_stride >> 3) & 0xf); + break; + case QP_RECV_SZ: + param = 1 << ((ctx->rq_size_stride & 7) + 4); + break; + case QP_N_SEND: + no_sq = be16_to_cpu(ctx->sq_crq_size) >> 15; + if (!no_sq) + param = 1 << (be16_to_cpu(ctx->sq_crq_size) >> 11); + else + param = 0; + break; + case QP_LOG_PG_SZ: + param = ((cpu_to_be32(ctx->log_pg_sz_remote_qpn) >> 24) & 0x1f); + param += 12; + break; + case QP_RQPN: + param = cpu_to_be32(ctx->log_pg_sz_remote_qpn) & 0xffffff; + break; + } + +out: + kfree(out); + return param; +} + +static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + int index) +{ + struct mlx5_query_eq_mbox_out *out; + struct mlx5_eq_context *ctx; + u64 param = 0; + int err; + + out = kzalloc(sizeof(*out), GFP_KERNEL); + if (!out) + return param; + + ctx = &out->ctx; + + err = mlx5_core_eq_query(dev, eq, out, sizeof(*out)); + if (err) { + mlx5_core_warn(dev, "failed to query eq\n"); + goto out; + } + + switch (index) { + case EQ_NUM_EQES: + param = 1 << ((be32_to_cpu(ctx->log_sz_usr_page) >> 24) & 0x1f); + break; + case EQ_INTR: + param = ctx->intr; + break; + case EQ_LOG_PG_SZ: + param = (ctx->log_page_size & 0x1f) + 12; + break; + } + +out: + kfree(out); + return param; +} + +static u64 cq_read_field(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + int index) +{ + struct mlx5_query_cq_mbox_out *out; + struct mlx5_cq_context *ctx; + u64 param = 0; + int err; + + out = kzalloc(sizeof(*out), GFP_KERNEL); + if (!out) + return param; + + ctx = &out->ctx; + + err = mlx5_core_query_cq(dev, cq, out); + if (err) { + mlx5_core_warn(dev, "failed to query cq\n"); + goto out; + } + + switch (index) { + case CQ_PID: + param = cq->pid; + break; + case CQ_NUM_CQES: + param = 1 << ((be32_to_cpu(ctx->log_sz_usr_page) >> 24) & 0x1f); + break; + case CQ_LOG_PG_SZ: + param = (ctx->log_pg_sz & 0x1f) + 12; + break; + } + +out: + kfree(out); + return param; +} + +static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_field_desc *desc; + struct mlx5_rsc_debug *d; + char tbuf[18]; + u64 field; + int ret; + int err; + + if (*pos) + return 0; + + desc = filp->private_data; + d = (void *)(desc - desc->i) - sizeof(*d); + switch (d->type) { + case MLX5_DBG_RSC_QP: + field = qp_read_field(d->dev, d->object, desc->i); + break; + + case MLX5_DBG_RSC_EQ: + field = eq_read_field(d->dev, d->object, desc->i); + break; + + case MLX5_DBG_RSC_CQ: + field = cq_read_field(d->dev, d->object, desc->i); + break; + + default: + mlx5_core_warn(d->dev, "invalid resource type %d\n", d->type); + return -EINVAL; + } + + ret = snprintf(tbuf, sizeof(tbuf), "0x%llx\n", field); + if (ret > 0) { + err = copy_to_user(buf, tbuf, ret); + if (err) + return err; + } + + *pos += ret; + return ret; +} + +static const struct file_operations fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = dbg_read, +}; + +static int add_res_tree(struct mlx5_core_dev *dev, enum dbg_rsc_type type, + struct dentry *root, struct mlx5_rsc_debug **dbg, + int rsn, char **field, int nfile, void *data) +{ + struct mlx5_rsc_debug *d; + char resn[32]; + int err; + int i; + + d = kzalloc(sizeof(*d) + nfile * sizeof(d->fields[0]), GFP_KERNEL); + if (!d) + return -ENOMEM; + + d->dev = dev; + d->object = data; + d->type = type; + sprintf(resn, "0x%x", rsn); + d->root = debugfs_create_dir(resn, root); + if (!d->root) { + err = -ENOMEM; + goto out_free; + } + + for (i = 0; i < nfile; i++) { + d->fields[i].i = i; + d->fields[i].dent = debugfs_create_file(field[i], 0400, + d->root, &d->fields[i], + &fops); + if (!d->fields[i].dent) { + err = -ENOMEM; + goto out_rem; + } + } + *dbg = d; + + return 0; +out_rem: + debugfs_remove_recursive(d->root); + +out_free: + kfree(d); + return err; +} + +static void rem_res_tree(struct mlx5_rsc_debug *d) +{ + debugfs_remove_recursive(d->root); + kfree(d); +} + +int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp) +{ + int err; + + if (!mlx5_debugfs_root) + return 0; + + err = add_res_tree(dev, MLX5_DBG_RSC_QP, dev->priv.qp_debugfs, + &qp->dbg, qp->qpn, qp_fields, + ARRAY_SIZE(qp_fields), qp); + if (err) + qp->dbg = NULL; + + return err; +} + +void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp) +{ + if (!mlx5_debugfs_root) + return; + + if (qp->dbg) + rem_res_tree(qp->dbg); +} + + +int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + int err; + + if (!mlx5_debugfs_root) + return 0; + + err = add_res_tree(dev, MLX5_DBG_RSC_EQ, dev->priv.eq_debugfs, + &eq->dbg, eq->eqn, eq_fields, + ARRAY_SIZE(eq_fields), eq); + if (err) + eq->dbg = NULL; + + return err; +} + +void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + if (!mlx5_debugfs_root) + return; + + if (eq->dbg) + rem_res_tree(eq->dbg); +} + +int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) +{ + int err; + + if (!mlx5_debugfs_root) + return 0; + + err = add_res_tree(dev, MLX5_DBG_RSC_CQ, dev->priv.cq_debugfs, + &cq->dbg, cq->cqn, cq_fields, + ARRAY_SIZE(cq_fields), cq); + if (err) + cq->dbg = NULL; + + return err; +} + +void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) +{ + if (!mlx5_debugfs_root) + return; + + if (cq->dbg) + rem_res_tree(cq->dbg); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c new file mode 100644 index 000000000000..c02cbcfd0fb8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -0,0 +1,521 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include "mlx5_core.h" + +enum { + MLX5_EQE_SIZE = sizeof(struct mlx5_eqe), + MLX5_EQE_OWNER_INIT_VAL = 0x1, +}; + +enum { + MLX5_EQ_STATE_ARMED = 0x9, + MLX5_EQ_STATE_FIRED = 0xa, + MLX5_EQ_STATE_ALWAYS_ARMED = 0xb, +}; + +enum { + MLX5_NUM_SPARE_EQE = 0x80, + MLX5_NUM_ASYNC_EQE = 0x100, + MLX5_NUM_CMD_EQE = 32, +}; + +enum { + MLX5_EQ_DOORBEL_OFFSET = 0x40, +}; + +#define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \ + (1ull << MLX5_EVENT_TYPE_COMM_EST) | \ + (1ull << MLX5_EVENT_TYPE_SQ_DRAINED) | \ + (1ull << MLX5_EVENT_TYPE_CQ_ERROR) | \ + (1ull << MLX5_EVENT_TYPE_WQ_CATAS_ERROR) | \ + (1ull << MLX5_EVENT_TYPE_PATH_MIG_FAILED) | \ + (1ull << MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \ + (1ull << MLX5_EVENT_TYPE_WQ_ACCESS_ERROR) | \ + (1ull << MLX5_EVENT_TYPE_PORT_CHANGE) | \ + (1ull << MLX5_EVENT_TYPE_SRQ_CATAS_ERROR) | \ + (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \ + (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT)) + +struct map_eq_in { + u64 mask; + u32 reserved; + u32 unmap_eqn; +}; + +struct cre_des_eq { + u8 reserved[15]; + u8 eqn; +}; + +static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) +{ + struct mlx5_destroy_eq_mbox_in in; + struct mlx5_destroy_eq_mbox_out out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_EQ); + in.eqn = eqn; + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (!err) + goto ex; + + if (out.hdr.status) + err = mlx5_cmd_status_to_err(&out.hdr); + +ex: + return err; +} + +static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry) +{ + return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE); +} + +static struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq) +{ + struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1)); + + return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe; +} + +static const char *eqe_type_str(u8 type) +{ + switch (type) { + case MLX5_EVENT_TYPE_COMP: + return "MLX5_EVENT_TYPE_COMP"; + case MLX5_EVENT_TYPE_PATH_MIG: + return "MLX5_EVENT_TYPE_PATH_MIG"; + case MLX5_EVENT_TYPE_COMM_EST: + return "MLX5_EVENT_TYPE_COMM_EST"; + case MLX5_EVENT_TYPE_SQ_DRAINED: + return "MLX5_EVENT_TYPE_SQ_DRAINED"; + case MLX5_EVENT_TYPE_SRQ_LAST_WQE: + return "MLX5_EVENT_TYPE_SRQ_LAST_WQE"; + case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: + return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT"; + case MLX5_EVENT_TYPE_CQ_ERROR: + return "MLX5_EVENT_TYPE_CQ_ERROR"; + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR"; + case MLX5_EVENT_TYPE_PATH_MIG_FAILED: + return "MLX5_EVENT_TYPE_PATH_MIG_FAILED"; + case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR"; + case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: + return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR"; + case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: + return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR"; + case MLX5_EVENT_TYPE_INTERNAL_ERROR: + return "MLX5_EVENT_TYPE_INTERNAL_ERROR"; + case MLX5_EVENT_TYPE_PORT_CHANGE: + return "MLX5_EVENT_TYPE_PORT_CHANGE"; + case MLX5_EVENT_TYPE_GPIO_EVENT: + return "MLX5_EVENT_TYPE_GPIO_EVENT"; + case MLX5_EVENT_TYPE_REMOTE_CONFIG: + return "MLX5_EVENT_TYPE_REMOTE_CONFIG"; + case MLX5_EVENT_TYPE_DB_BF_CONGESTION: + return "MLX5_EVENT_TYPE_DB_BF_CONGESTION"; + case MLX5_EVENT_TYPE_STALL_EVENT: + return "MLX5_EVENT_TYPE_STALL_EVENT"; + case MLX5_EVENT_TYPE_CMD: + return "MLX5_EVENT_TYPE_CMD"; + case MLX5_EVENT_TYPE_PAGE_REQUEST: + return "MLX5_EVENT_TYPE_PAGE_REQUEST"; + default: + return "Unrecognized event"; + } +} + +static enum mlx5_dev_event port_subtype_event(u8 subtype) +{ + switch (subtype) { + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + return MLX5_DEV_EVENT_PORT_DOWN; + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: + return MLX5_DEV_EVENT_PORT_UP; + case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: + return MLX5_DEV_EVENT_PORT_INITIALIZED; + case MLX5_PORT_CHANGE_SUBTYPE_LID: + return MLX5_DEV_EVENT_LID_CHANGE; + case MLX5_PORT_CHANGE_SUBTYPE_PKEY: + return MLX5_DEV_EVENT_PKEY_CHANGE; + case MLX5_PORT_CHANGE_SUBTYPE_GUID: + return MLX5_DEV_EVENT_GUID_CHANGE; + case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: + return MLX5_DEV_EVENT_CLIENT_REREG; + } + return -1; +} + +static void eq_update_ci(struct mlx5_eq *eq, int arm) +{ + __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2); + u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24); + __raw_writel((__force u32) cpu_to_be32(val), addr); + /* We still want ordering, just not swabbing, so add a barrier */ + mb(); +} + +static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + struct mlx5_eqe *eqe; + int eqes_found = 0; + int set_ci = 0; + u32 cqn; + u32 srqn; + u8 port; + + while ((eqe = next_eqe_sw(eq))) { + /* + * Make sure we read EQ entry contents after we've + * checked the ownership bit. + */ + rmb(); + + mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n", eq->eqn, eqe_type_str(eqe->type)); + switch (eqe->type) { + case MLX5_EVENT_TYPE_COMP: + cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff; + mlx5_cq_completion(dev, cqn); + break; + + case MLX5_EVENT_TYPE_PATH_MIG: + case MLX5_EVENT_TYPE_COMM_EST: + case MLX5_EVENT_TYPE_SQ_DRAINED: + case MLX5_EVENT_TYPE_SRQ_LAST_WQE: + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + case MLX5_EVENT_TYPE_PATH_MIG_FAILED: + case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: + mlx5_core_dbg(dev, "event %s(%d) arrived\n", + eqe_type_str(eqe->type), eqe->type); + mlx5_qp_event(dev, be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff, + eqe->type); + break; + + case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: + case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: + srqn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; + mlx5_core_dbg(dev, "SRQ event %s(%d): srqn 0x%x\n", + eqe_type_str(eqe->type), eqe->type, srqn); + mlx5_srq_event(dev, srqn, eqe->type); + break; + + case MLX5_EVENT_TYPE_CMD: + mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector)); + break; + + case MLX5_EVENT_TYPE_PORT_CHANGE: + port = (eqe->data.port.port >> 4) & 0xf; + switch (eqe->sub_type) { + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: + case MLX5_PORT_CHANGE_SUBTYPE_LID: + case MLX5_PORT_CHANGE_SUBTYPE_PKEY: + case MLX5_PORT_CHANGE_SUBTYPE_GUID: + case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: + case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: + dev->event(dev, port_subtype_event(eqe->sub_type), &port); + break; + default: + mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n", + port, eqe->sub_type); + } + break; + case MLX5_EVENT_TYPE_CQ_ERROR: + cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; + mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrom 0x%x\n", + cqn, eqe->data.cq_err.syndrome); + mlx5_cq_event(dev, cqn, eqe->type); + break; + + case MLX5_EVENT_TYPE_PAGE_REQUEST: + { + u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id); + s16 npages = be16_to_cpu(eqe->data.req_pages.num_pages); + + mlx5_core_dbg(dev, "page request for func 0x%x, napges %d\n", func_id, npages); + mlx5_core_req_pages_handler(dev, func_id, npages); + } + break; + + + default: + mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n", eqe->type, eq->eqn); + break; + } + + ++eq->cons_index; + eqes_found = 1; + ++set_ci; + + /* The HCA will think the queue has overflowed if we + * don't tell it we've been processing events. We + * create our EQs with MLX5_NUM_SPARE_EQE extra + * entries, so we must update our consumer index at + * least that often. + */ + if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { + eq_update_ci(eq, 0); + set_ci = 0; + } + } + + eq_update_ci(eq, 1); + + return eqes_found; +} + +static irqreturn_t mlx5_msix_handler(int irq, void *eq_ptr) +{ + struct mlx5_eq *eq = eq_ptr; + struct mlx5_core_dev *dev = eq->dev; + + mlx5_eq_int(dev, eq); + + /* MSI-X vectors always belong to us */ + return IRQ_HANDLED; +} + +static void init_eq_buf(struct mlx5_eq *eq) +{ + struct mlx5_eqe *eqe; + int i; + + for (i = 0; i < eq->nent; i++) { + eqe = get_eqe(eq, i); + eqe->owner = MLX5_EQE_OWNER_INIT_VAL; + } +} + +int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, + int nent, u64 mask, const char *name, struct mlx5_uar *uar) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_create_eq_mbox_in *in; + struct mlx5_create_eq_mbox_out out; + int err; + int inlen; + + eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE); + err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, 2 * PAGE_SIZE, + &eq->buf); + if (err) + return err; + + init_eq_buf(eq); + + inlen = sizeof(*in) + sizeof(in->pas[0]) * eq->buf.npages; + in = mlx5_vzalloc(inlen); + if (!in) { + err = -ENOMEM; + goto err_buf; + } + memset(&out, 0, sizeof(out)); + + mlx5_fill_page_array(&eq->buf, in->pas); + + in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_EQ); + in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(eq->nent) << 24 | uar->index); + in->ctx.intr = vecidx; + in->ctx.log_page_size = PAGE_SHIFT - 12; + in->events_mask = cpu_to_be64(mask); + + err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); + if (err) + goto err_in; + + if (out.hdr.status) { + err = mlx5_cmd_status_to_err(&out.hdr); + goto err_in; + } + + eq->eqn = out.eq_number; + err = request_irq(table->msix_arr[vecidx].vector, mlx5_msix_handler, 0, + name, eq); + if (err) + goto err_eq; + + eq->irqn = vecidx; + eq->dev = dev; + eq->doorbell = uar->map + MLX5_EQ_DOORBEL_OFFSET; + + err = mlx5_debug_eq_add(dev, eq); + if (err) + goto err_irq; + + /* EQs are created in ARMED state + */ + eq_update_ci(eq, 1); + + mlx5_vfree(in); + return 0; + +err_irq: + free_irq(table->msix_arr[vecidx].vector, eq); + +err_eq: + mlx5_cmd_destroy_eq(dev, eq->eqn); + +err_in: + mlx5_vfree(in); + +err_buf: + mlx5_buf_free(dev, &eq->buf); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_create_map_eq); + +int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + int err; + + mlx5_debug_eq_remove(dev, eq); + free_irq(table->msix_arr[eq->irqn].vector, eq); + err = mlx5_cmd_destroy_eq(dev, eq->eqn); + if (err) + mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", + eq->eqn); + mlx5_buf_free(dev, &eq->buf); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_destroy_unmap_eq); + +int mlx5_eq_init(struct mlx5_core_dev *dev) +{ + int err; + + spin_lock_init(&dev->priv.eq_table.lock); + + err = mlx5_eq_debugfs_init(dev); + + return err; +} + + +void mlx5_eq_cleanup(struct mlx5_core_dev *dev) +{ + mlx5_eq_debugfs_cleanup(dev); +} + +int mlx5_start_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + int err; + + err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, + MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD, + "mlx5_cmd_eq", &dev->priv.uuari.uars[0]); + if (err) { + mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); + return err; + } + + mlx5_cmd_use_events(dev); + + err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC, + MLX5_NUM_ASYNC_EQE, MLX5_ASYNC_EVENT_MASK, + "mlx5_async_eq", &dev->priv.uuari.uars[0]); + if (err) { + mlx5_core_warn(dev, "failed to create async EQ %d\n", err); + goto err1; + } + + err = mlx5_create_map_eq(dev, &table->pages_eq, + MLX5_EQ_VEC_PAGES, + dev->caps.max_vf + 1, + 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq", + &dev->priv.uuari.uars[0]); + if (err) { + mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); + goto err2; + } + + return err; + +err2: + mlx5_destroy_unmap_eq(dev, &table->async_eq); + +err1: + mlx5_cmd_use_polling(dev); + mlx5_destroy_unmap_eq(dev, &table->cmd_eq); + return err; +} + +int mlx5_stop_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + int err; + + err = mlx5_destroy_unmap_eq(dev, &table->pages_eq); + if (err) + return err; + + mlx5_destroy_unmap_eq(dev, &table->async_eq); + mlx5_cmd_use_polling(dev); + + err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq); + if (err) + mlx5_cmd_use_events(dev); + + return err; +} + +int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct mlx5_query_eq_mbox_out *out, int outlen) +{ + struct mlx5_query_eq_mbox_in in; + int err; + + memset(&in, 0, sizeof(in)); + memset(out, 0, outlen); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_EQ); + in.eqn = eq->eqn; + err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen); + if (err) + return err; + + if (out->hdr.status) + err = mlx5_cmd_status_to_err(&out->hdr); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_eq_query); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c new file mode 100644 index 000000000000..72a5222447f5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include "mlx5_core.h" + +int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd_query_adapter_mbox_out *out; + struct mlx5_cmd_query_adapter_mbox_in in; + int err; + + out = kzalloc(sizeof(*out), GFP_KERNEL); + if (!out) + return -ENOMEM; + + memset(&in, 0, sizeof(in)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_ADAPTER); + err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out)); + if (err) + goto out_out; + + if (out->hdr.status) { + err = mlx5_cmd_status_to_err(&out->hdr); + goto out_out; + } + + memcpy(dev->board_id, out->vsd_psid, sizeof(out->vsd_psid)); + +out_out: + kfree(out); + + return err; +} + +int mlx5_cmd_query_hca_cap(struct mlx5_core_dev *dev, + struct mlx5_caps *caps) +{ + struct mlx5_cmd_query_hca_cap_mbox_out *out; + struct mlx5_cmd_query_hca_cap_mbox_in in; + struct mlx5_query_special_ctxs_mbox_out ctx_out; + struct mlx5_query_special_ctxs_mbox_in ctx_in; + int err; + u16 t16; + + out = kzalloc(sizeof(*out), GFP_KERNEL); + if (!out) + return -ENOMEM; + + memset(&in, 0, sizeof(in)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_HCA_CAP); + in.hdr.opmod = cpu_to_be16(0x1); + err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out)); + if (err) + goto out_out; + + if (out->hdr.status) { + err = mlx5_cmd_status_to_err(&out->hdr); + goto out_out; + } + + + caps->log_max_eq = out->hca_cap.log_max_eq & 0xf; + caps->max_cqes = 1 << out->hca_cap.log_max_cq_sz; + caps->max_wqes = 1 << out->hca_cap.log_max_qp_sz; + caps->max_sq_desc_sz = be16_to_cpu(out->hca_cap.max_desc_sz_sq); + caps->max_rq_desc_sz = be16_to_cpu(out->hca_cap.max_desc_sz_rq); + caps->flags = be64_to_cpu(out->hca_cap.flags); + caps->stat_rate_support = be16_to_cpu(out->hca_cap.stat_rate_support); + caps->log_max_msg = out->hca_cap.log_max_msg & 0x1f; + caps->num_ports = out->hca_cap.num_ports & 0xf; + caps->log_max_cq = out->hca_cap.log_max_cq & 0x1f; + if (caps->num_ports > MLX5_MAX_PORTS) { + mlx5_core_err(dev, "device has %d ports while the driver supports max %d ports\n", + caps->num_ports, MLX5_MAX_PORTS); + err = -EINVAL; + goto out_out; + } + caps->log_max_qp = out->hca_cap.log_max_qp & 0x1f; + caps->log_max_mkey = out->hca_cap.log_max_mkey & 0x3f; + caps->log_max_pd = out->hca_cap.log_max_pd & 0x1f; + caps->log_max_srq = out->hca_cap.log_max_srqs & 0x1f; + caps->local_ca_ack_delay = out->hca_cap.local_ca_ack_delay & 0x1f; + caps->log_max_mcg = out->hca_cap.log_max_mcg; + caps->max_qp_mcg = be16_to_cpu(out->hca_cap.max_qp_mcg); + caps->max_ra_res_qp = 1 << (out->hca_cap.log_max_ra_res_qp & 0x3f); + caps->max_ra_req_qp = 1 << (out->hca_cap.log_max_ra_req_qp & 0x3f); + caps->max_srq_wqes = 1 << out->hca_cap.log_max_srq_sz; + t16 = be16_to_cpu(out->hca_cap.bf_log_bf_reg_size); + if (t16 & 0x8000) { + caps->bf_reg_size = 1 << (t16 & 0x1f); + caps->bf_regs_per_page = MLX5_BF_REGS_PER_PAGE; + } else { + caps->bf_reg_size = 0; + caps->bf_regs_per_page = 0; + } + caps->min_page_sz = ~(u32)((1 << out->hca_cap.log_pg_sz) - 1); + + memset(&ctx_in, 0, sizeof(ctx_in)); + memset(&ctx_out, 0, sizeof(ctx_out)); + ctx_in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); + err = mlx5_cmd_exec(dev, &ctx_in, sizeof(ctx_in), + &ctx_out, sizeof(ctx_out)); + if (err) + goto out_out; + + if (ctx_out.hdr.status) + err = mlx5_cmd_status_to_err(&ctx_out.hdr); + + caps->reserved_lkey = be32_to_cpu(ctx_out.reserved_lkey); + +out_out: + kfree(out); + + return err; +} + +int mlx5_cmd_init_hca(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd_init_hca_mbox_in in; + struct mlx5_cmd_init_hca_mbox_out out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_INIT_HCA); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + err = mlx5_cmd_status_to_err(&out.hdr); + + return err; +} + +int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd_teardown_hca_mbox_in in; + struct mlx5_cmd_teardown_hca_mbox_out out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_TEARDOWN_HCA); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + err = mlx5_cmd_status_to_err(&out.hdr); + + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c new file mode 100644 index 000000000000..ea4b9bca6d4a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include "mlx5_core.h" + +enum { + MLX5_HEALTH_POLL_INTERVAL = 2 * HZ, + MAX_MISSES = 3, +}; + +enum { + MLX5_HEALTH_SYNDR_FW_ERR = 0x1, + MLX5_HEALTH_SYNDR_IRISC_ERR = 0x7, + MLX5_HEALTH_SYNDR_CRC_ERR = 0x9, + MLX5_HEALTH_SYNDR_FETCH_PCI_ERR = 0xa, + MLX5_HEALTH_SYNDR_HW_FTL_ERR = 0xb, + MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR = 0xc, + MLX5_HEALTH_SYNDR_EQ_ERR = 0xd, + MLX5_HEALTH_SYNDR_FFSER_ERR = 0xf, +}; + +static DEFINE_SPINLOCK(health_lock); + +static LIST_HEAD(health_list); +static struct work_struct health_work; + +static health_handler_t reg_handler; +int mlx5_register_health_report_handler(health_handler_t handler) +{ + spin_lock_irq(&health_lock); + if (reg_handler) { + spin_unlock_irq(&health_lock); + return -EEXIST; + } + reg_handler = handler; + spin_unlock_irq(&health_lock); + + return 0; +} +EXPORT_SYMBOL(mlx5_register_health_report_handler); + +void mlx5_unregister_health_report_handler(void) +{ + spin_lock_irq(&health_lock); + reg_handler = NULL; + spin_unlock_irq(&health_lock); +} +EXPORT_SYMBOL(mlx5_unregister_health_report_handler); + +static void health_care(struct work_struct *work) +{ + struct mlx5_core_health *health, *n; + struct mlx5_core_dev *dev; + struct mlx5_priv *priv; + LIST_HEAD(tlist); + + spin_lock_irq(&health_lock); + list_splice_init(&health_list, &tlist); + + spin_unlock_irq(&health_lock); + + list_for_each_entry_safe(health, n, &tlist, list) { + priv = container_of(health, struct mlx5_priv, health); + dev = container_of(priv, struct mlx5_core_dev, priv); + mlx5_core_warn(dev, "handling bad device here\n"); + spin_lock_irq(&health_lock); + if (reg_handler) + reg_handler(dev->pdev, health->health, + sizeof(health->health)); + + list_del_init(&health->list); + spin_unlock_irq(&health_lock); + } +} + +static const char *hsynd_str(u8 synd) +{ + switch (synd) { + case MLX5_HEALTH_SYNDR_FW_ERR: + return "firmware internal error"; + case MLX5_HEALTH_SYNDR_IRISC_ERR: + return "irisc not responding"; + case MLX5_HEALTH_SYNDR_CRC_ERR: + return "firmware CRC error"; + case MLX5_HEALTH_SYNDR_FETCH_PCI_ERR: + return "ICM fetch PCI error"; + case MLX5_HEALTH_SYNDR_HW_FTL_ERR: + return "HW fatal error\n"; + case MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR: + return "async EQ buffer overrun"; + case MLX5_HEALTH_SYNDR_EQ_ERR: + return "EQ error"; + case MLX5_HEALTH_SYNDR_FFSER_ERR: + return "FFSER error"; + default: + return "unrecognized error"; + } +} + +static void print_health_info(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + int i; + + for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) + pr_info("assert_var[%d] 0x%08x\n", i, be32_to_cpu(h->assert_var[i])); + + pr_info("assert_exit_ptr 0x%08x\n", be32_to_cpu(h->assert_exit_ptr)); + pr_info("assert_callra 0x%08x\n", be32_to_cpu(h->assert_callra)); + pr_info("fw_ver 0x%08x\n", be32_to_cpu(h->fw_ver)); + pr_info("hw_id 0x%08x\n", be32_to_cpu(h->hw_id)); + pr_info("irisc_index %d\n", h->irisc_index); + pr_info("synd 0x%x: %s\n", h->synd, hsynd_str(h->synd)); + pr_info("ext_sync 0x%04x\n", be16_to_cpu(h->ext_sync)); +} + +static void poll_health(unsigned long data) +{ + struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data; + struct mlx5_core_health *health = &dev->priv.health; + unsigned long next; + u32 count; + + count = ioread32be(health->health_counter); + if (count == health->prev) + ++health->miss_counter; + else + health->miss_counter = 0; + + health->prev = count; + if (health->miss_counter == MAX_MISSES) { + mlx5_core_err(dev, "device's health compromised\n"); + print_health_info(dev); + spin_lock_irq(&health_lock); + list_add_tail(&health->list, &health_list); + spin_unlock_irq(&health_lock); + + queue_work(mlx5_core_wq, &health_work); + } else { + get_random_bytes(&next, sizeof(next)); + next %= HZ; + next += jiffies + MLX5_HEALTH_POLL_INTERVAL; + mod_timer(&health->timer, next); + } +} + +void mlx5_start_health_poll(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + + INIT_LIST_HEAD(&health->list); + init_timer(&health->timer); + health->health = &dev->iseg->health; + health->health_counter = &dev->iseg->health_counter; + + health->timer.data = (unsigned long)dev; + health->timer.function = poll_health; + health->timer.expires = round_jiffies(jiffies + MLX5_HEALTH_POLL_INTERVAL); + add_timer(&health->timer); +} + +void mlx5_stop_health_poll(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + + del_timer_sync(&health->timer); + + spin_lock_irq(&health_lock); + if (!list_empty(&health->list)) + list_del_init(&health->list); + spin_unlock_irq(&health_lock); +} + +void mlx5_health_cleanup(void) +{ +} + +void __init mlx5_health_init(void) +{ + INIT_WORK(&health_work, health_care); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/drivers/net/ethernet/mellanox/mlx5/core/mad.c new file mode 100644 index 000000000000..18d6fd5dd90b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/mad.c @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include "mlx5_core.h" + +int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, void *inb, void *outb, + u16 opmod, int port) +{ + struct mlx5_mad_ifc_mbox_in *in = NULL; + struct mlx5_mad_ifc_mbox_out *out = NULL; + int err; + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + out = kzalloc(sizeof(*out), GFP_KERNEL); + if (!out) { + err = -ENOMEM; + goto out; + } + + in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MAD_IFC); + in->hdr.opmod = cpu_to_be16(opmod); + in->port = port; + + memcpy(in->data, inb, sizeof(in->data)); + + err = mlx5_cmd_exec(dev, in, sizeof(*in), out, sizeof(*out)); + if (err) + goto out; + + if (out->hdr.status) { + err = mlx5_cmd_status_to_err(&out->hdr); + goto out; + } + + memcpy(outb, out->data, sizeof(out->data)); + +out: + kfree(out); + kfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_mad_ifc); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c new file mode 100644 index 000000000000..f21cc397d1bc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -0,0 +1,475 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mlx5_core.h" + +#define DRIVER_NAME "mlx5_core" +#define DRIVER_VERSION "1.0" +#define DRIVER_RELDATE "June 2013" + +MODULE_AUTHOR("Eli Cohen "); +MODULE_DESCRIPTION("Mellanox ConnectX-IB HCA core library"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(DRIVER_VERSION); + +int mlx5_core_debug_mask; +module_param_named(debug_mask, mlx5_core_debug_mask, int, 0644); +MODULE_PARM_DESC(debug_mask, "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0"); + +struct workqueue_struct *mlx5_core_wq; + +static int set_dma_caps(struct pci_dev *pdev) +{ + int err; + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) { + dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n"); + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n"); + return err; + } + } + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) { + dev_warn(&pdev->dev, + "Warning: couldn't set 64-bit consistent PCI DMA mask.\n"); + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&pdev->dev, + "Can't set consistent PCI DMA mask, aborting.\n"); + return err; + } + } + + dma_set_max_seg_size(&pdev->dev, 2u * 1024 * 1024 * 1024); + return err; +} + +static int request_bar(struct pci_dev *pdev) +{ + int err = 0; + + if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { + dev_err(&pdev->dev, "Missing registers BAR, aborting.\n"); + return -ENODEV; + } + + err = pci_request_regions(pdev, DRIVER_NAME); + if (err) + dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n"); + + return err; +} + +static void release_bar(struct pci_dev *pdev) +{ + pci_release_regions(pdev); +} + +static int mlx5_enable_msix(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + int num_eqs = 1 << dev->caps.log_max_eq; + int nvec; + int err; + int i; + + nvec = dev->caps.num_ports * num_online_cpus() + MLX5_EQ_VEC_COMP_BASE; + nvec = min_t(int, nvec, num_eqs); + if (nvec <= MLX5_EQ_VEC_COMP_BASE) + return -ENOMEM; + + table->msix_arr = kzalloc(nvec * sizeof(*table->msix_arr), GFP_KERNEL); + if (!table->msix_arr) + return -ENOMEM; + + for (i = 0; i < nvec; i++) + table->msix_arr[i].entry = i; + +retry: + table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE; + err = pci_enable_msix(dev->pdev, table->msix_arr, nvec); + if (err <= 0) { + return err; + } else if (err > 2) { + nvec = err; + goto retry; + } + + mlx5_core_dbg(dev, "received %d MSI vectors out of %d requested\n", err, nvec); + + return 0; +} + +static void mlx5_disable_msix(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + + pci_disable_msix(dev->pdev); + kfree(table->msix_arr); +} + +struct mlx5_reg_host_endianess { + u8 he; + u8 rsvd[15]; +}; + +static int handle_hca_cap(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd_query_hca_cap_mbox_out *query_out = NULL; + struct mlx5_cmd_set_hca_cap_mbox_in *set_ctx = NULL; + struct mlx5_cmd_query_hca_cap_mbox_in query_ctx; + struct mlx5_cmd_set_hca_cap_mbox_out set_out; + struct mlx5_profile *prof = dev->profile; + u64 flags; + int csum = 1; + int err; + + memset(&query_ctx, 0, sizeof(query_ctx)); + query_out = kzalloc(sizeof(*query_out), GFP_KERNEL); + if (!query_out) + return -ENOMEM; + + set_ctx = kzalloc(sizeof(*set_ctx), GFP_KERNEL); + if (!set_ctx) { + err = -ENOMEM; + goto query_ex; + } + + query_ctx.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_HCA_CAP); + query_ctx.hdr.opmod = cpu_to_be16(0x1); + err = mlx5_cmd_exec(dev, &query_ctx, sizeof(query_ctx), + query_out, sizeof(*query_out)); + if (err) + goto query_ex; + + err = mlx5_cmd_status_to_err(&query_out->hdr); + if (err) { + mlx5_core_warn(dev, "query hca cap failed, %d\n", err); + goto query_ex; + } + + memcpy(&set_ctx->hca_cap, &query_out->hca_cap, + sizeof(set_ctx->hca_cap)); + + if (prof->mask & MLX5_PROF_MASK_CMDIF_CSUM) { + csum = !!prof->cmdif_csum; + flags = be64_to_cpu(set_ctx->hca_cap.flags); + if (csum) + flags |= MLX5_DEV_CAP_FLAG_CMDIF_CSUM; + else + flags &= ~MLX5_DEV_CAP_FLAG_CMDIF_CSUM; + + set_ctx->hca_cap.flags = cpu_to_be64(flags); + } + + if (dev->profile->mask & MLX5_PROF_MASK_QP_SIZE) + set_ctx->hca_cap.log_max_qp = dev->profile->log_max_qp; + + memset(&set_out, 0, sizeof(set_out)); + set_ctx->hca_cap.uar_page_sz = cpu_to_be16(PAGE_SHIFT - 12); + set_ctx->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_SET_HCA_CAP); + err = mlx5_cmd_exec(dev, set_ctx, sizeof(*set_ctx), + &set_out, sizeof(set_out)); + if (err) { + mlx5_core_warn(dev, "set hca cap failed, %d\n", err); + goto query_ex; + } + + err = mlx5_cmd_status_to_err(&set_out.hdr); + if (err) + goto query_ex; + + if (!csum) + dev->cmd.checksum_disabled = 1; + +query_ex: + kfree(query_out); + kfree(set_ctx); + + return err; +} + +static int set_hca_ctrl(struct mlx5_core_dev *dev) +{ + struct mlx5_reg_host_endianess he_in; + struct mlx5_reg_host_endianess he_out; + int err; + + memset(&he_in, 0, sizeof(he_in)); + he_in.he = MLX5_SET_HOST_ENDIANNESS; + err = mlx5_core_access_reg(dev, &he_in, sizeof(he_in), + &he_out, sizeof(he_out), + MLX5_REG_HOST_ENDIANNESS, 0, 1); + return err; +} + +int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) +{ + struct mlx5_priv *priv = &dev->priv; + int err; + + dev->pdev = pdev; + pci_set_drvdata(dev->pdev, dev); + strncpy(priv->name, dev_name(&pdev->dev), MLX5_MAX_NAME_LEN); + priv->name[MLX5_MAX_NAME_LEN - 1] = 0; + + mutex_init(&priv->pgdir_mutex); + INIT_LIST_HEAD(&priv->pgdir_list); + spin_lock_init(&priv->mkey_lock); + + priv->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), mlx5_debugfs_root); + if (!priv->dbg_root) + return -ENOMEM; + + err = pci_enable_device(pdev); + if (err) { + dev_err(&pdev->dev, "Cannot enable PCI device, aborting.\n"); + goto err_dbg; + } + + err = request_bar(pdev); + if (err) { + dev_err(&pdev->dev, "error requesting BARs, aborting.\n"); + goto err_disable; + } + + pci_set_master(pdev); + + err = set_dma_caps(pdev); + if (err) { + dev_err(&pdev->dev, "Failed setting DMA capabilities mask, aborting\n"); + goto err_clr_master; + } + + dev->iseg_base = pci_resource_start(dev->pdev, 0); + dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg)); + if (!dev->iseg) { + err = -ENOMEM; + dev_err(&pdev->dev, "Failed mapping initialization segment, aborting\n"); + goto err_clr_master; + } + dev_info(&pdev->dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev), + fw_rev_min(dev), fw_rev_sub(dev)); + + err = mlx5_cmd_init(dev); + if (err) { + dev_err(&pdev->dev, "Failed initializing command interface, aborting\n"); + goto err_unmap; + } + + mlx5_pagealloc_init(dev); + err = set_hca_ctrl(dev); + if (err) { + dev_err(&pdev->dev, "set_hca_ctrl failed\n"); + goto err_pagealloc_cleanup; + } + + err = handle_hca_cap(dev); + if (err) { + dev_err(&pdev->dev, "handle_hca_cap failed\n"); + goto err_pagealloc_cleanup; + } + + err = mlx5_satisfy_startup_pages(dev); + if (err) { + dev_err(&pdev->dev, "failed to allocate startup pages\n"); + goto err_pagealloc_cleanup; + } + + err = mlx5_pagealloc_start(dev); + if (err) { + dev_err(&pdev->dev, "mlx5_pagealloc_start failed\n"); + goto err_reclaim_pages; + } + + err = mlx5_cmd_init_hca(dev); + if (err) { + dev_err(&pdev->dev, "init hca failed\n"); + goto err_pagealloc_stop; + } + + mlx5_start_health_poll(dev); + + err = mlx5_cmd_query_hca_cap(dev, &dev->caps); + if (err) { + dev_err(&pdev->dev, "query hca failed\n"); + goto err_stop_poll; + } + + err = mlx5_cmd_query_adapter(dev); + if (err) { + dev_err(&pdev->dev, "query adapter failed\n"); + goto err_stop_poll; + } + + err = mlx5_enable_msix(dev); + if (err) { + dev_err(&pdev->dev, "enable msix failed\n"); + goto err_stop_poll; + } + + err = mlx5_eq_init(dev); + if (err) { + dev_err(&pdev->dev, "failed to initialize eq\n"); + goto disable_msix; + } + + err = mlx5_alloc_uuars(dev, &priv->uuari); + if (err) { + dev_err(&pdev->dev, "Failed allocating uar, aborting\n"); + goto err_eq_cleanup; + } + + err = mlx5_start_eqs(dev); + if (err) { + dev_err(&pdev->dev, "Failed to start pages and async EQs\n"); + goto err_free_uar; + } + + MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock); + + mlx5_init_cq_table(dev); + mlx5_init_qp_table(dev); + mlx5_init_srq_table(dev); + + return 0; + +err_free_uar: + mlx5_free_uuars(dev, &priv->uuari); + +err_eq_cleanup: + mlx5_eq_cleanup(dev); + +disable_msix: + mlx5_disable_msix(dev); + +err_stop_poll: + mlx5_stop_health_poll(dev); + mlx5_cmd_teardown_hca(dev); + +err_pagealloc_stop: + mlx5_pagealloc_stop(dev); + +err_reclaim_pages: + mlx5_reclaim_startup_pages(dev); + +err_pagealloc_cleanup: + mlx5_pagealloc_cleanup(dev); + mlx5_cmd_cleanup(dev); + +err_unmap: + iounmap(dev->iseg); + +err_clr_master: + pci_clear_master(dev->pdev); + release_bar(dev->pdev); + +err_disable: + pci_disable_device(dev->pdev); + +err_dbg: + debugfs_remove(priv->dbg_root); + return err; +} +EXPORT_SYMBOL(mlx5_dev_init); + +void mlx5_dev_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + + mlx5_cleanup_srq_table(dev); + mlx5_cleanup_qp_table(dev); + mlx5_cleanup_cq_table(dev); + mlx5_stop_eqs(dev); + mlx5_free_uuars(dev, &priv->uuari); + mlx5_eq_cleanup(dev); + mlx5_disable_msix(dev); + mlx5_stop_health_poll(dev); + mlx5_cmd_teardown_hca(dev); + mlx5_pagealloc_stop(dev); + mlx5_reclaim_startup_pages(dev); + mlx5_pagealloc_cleanup(dev); + mlx5_cmd_cleanup(dev); + iounmap(dev->iseg); + pci_clear_master(dev->pdev); + release_bar(dev->pdev); + pci_disable_device(dev->pdev); + debugfs_remove(priv->dbg_root); +} +EXPORT_SYMBOL(mlx5_dev_cleanup); + +static int __init init(void) +{ + int err; + + mlx5_register_debugfs(); + mlx5_core_wq = create_singlethread_workqueue("mlx5_core_wq"); + if (!mlx5_core_wq) { + err = -ENOMEM; + goto err_debug; + } + mlx5_health_init(); + + return 0; + + mlx5_health_cleanup(); +err_debug: + mlx5_unregister_debugfs(); + return err; +} + +static void __exit cleanup(void) +{ + mlx5_health_cleanup(); + destroy_workqueue(mlx5_core_wq); + mlx5_unregister_debugfs(); +} + +module_init(init); +module_exit(cleanup); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c new file mode 100644 index 000000000000..44837640bd7c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include "mlx5_core.h" + +struct mlx5_attach_mcg_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 qpn; + __be32 rsvd; + u8 gid[16]; +}; + +struct mlx5_attach_mcg_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvf[8]; +}; + +struct mlx5_detach_mcg_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 qpn; + __be32 rsvd; + u8 gid[16]; +}; + +struct mlx5_detach_mcg_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvf[8]; +}; + +int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn) +{ + struct mlx5_attach_mcg_mbox_in in; + struct mlx5_attach_mcg_mbox_out out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ATTACH_TO_MCG); + memcpy(in.gid, mgid, sizeof(*mgid)); + in.qpn = cpu_to_be32(qpn); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + err = mlx5_cmd_status_to_err(&out.hdr); + + return err; +} +EXPORT_SYMBOL(mlx5_core_attach_mcg); + +int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn) +{ + struct mlx5_detach_mcg_mbox_in in; + struct mlx5_detach_mcg_mbox_out out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DETACH_FROM_MCG); + memcpy(in.gid, mgid, sizeof(*mgid)); + in.qpn = cpu_to_be32(qpn); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + err = mlx5_cmd_status_to_err(&out.hdr); + + return err; +} +EXPORT_SYMBOL(mlx5_core_detach_mcg); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h new file mode 100644 index 000000000000..68b74e1ae1b0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_CORE_H__ +#define __MLX5_CORE_H__ + +#include +#include +#include + +extern int mlx5_core_debug_mask; + +#define mlx5_core_dbg(dev, format, arg...) \ +pr_debug("%s:%s:%d:(pid %d): " format, (dev)->priv.name, __func__, __LINE__, \ + current->pid, ##arg) + +#define mlx5_core_dbg_mask(dev, mask, format, arg...) \ +do { \ + if ((mask) & mlx5_core_debug_mask) \ + pr_debug("%s:%s:%d:(pid %d): " format, (dev)->priv.name, \ + __func__, __LINE__, current->pid, ##arg); \ +} while (0) + +#define mlx5_core_err(dev, format, arg...) \ +pr_err("%s:%s:%d:(pid %d): " format, (dev)->priv.name, __func__, __LINE__, \ + current->pid, ##arg) + +#define mlx5_core_warn(dev, format, arg...) \ +pr_warn("%s:%s:%d:(pid %d): " format, (dev)->priv.name, __func__, __LINE__, \ + current->pid, ##arg) + +enum { + MLX5_CMD_DATA, /* print command payload only */ + MLX5_CMD_TIME, /* print command execution time */ +}; + + +int mlx5_cmd_query_hca_cap(struct mlx5_core_dev *dev, + struct mlx5_caps *caps); +int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev); +int mlx5_cmd_init_hca(struct mlx5_core_dev *dev); +int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); + +#endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c new file mode 100644 index 000000000000..5b44e2e46daf --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include "mlx5_core.h" + +int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, + struct mlx5_create_mkey_mbox_in *in, int inlen) +{ + struct mlx5_create_mkey_mbox_out out; + int err; + u8 key; + + memset(&out, 0, sizeof(out)); + spin_lock(&dev->priv.mkey_lock); + key = dev->priv.mkey_key++; + spin_unlock(&dev->priv.mkey_lock); + in->seg.qpn_mkey7_0 |= cpu_to_be32(key); + in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_MKEY); + err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); + if (err) { + mlx5_core_dbg(dev, "cmd exec faile %d\n", err); + return err; + } + + if (out.hdr.status) { + mlx5_core_dbg(dev, "status %d\n", out.hdr.status); + return mlx5_cmd_status_to_err(&out.hdr); + } + + mr->key = mlx5_idx_to_mkey(be32_to_cpu(out.mkey) & 0xffffff) | key; + mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", be32_to_cpu(out.mkey), key, mr->key); + + return err; +} +EXPORT_SYMBOL(mlx5_core_create_mkey); + +int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr) +{ + struct mlx5_destroy_mkey_mbox_in in; + struct mlx5_destroy_mkey_mbox_out out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_MKEY); + in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mr->key)); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + return mlx5_cmd_status_to_err(&out.hdr); + + return err; +} +EXPORT_SYMBOL(mlx5_core_destroy_mkey); + +int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, + struct mlx5_query_mkey_mbox_out *out, int outlen) +{ + struct mlx5_destroy_mkey_mbox_in in; + int err; + + memset(&in, 0, sizeof(in)); + memset(out, 0, outlen); + + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_MKEY); + in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mr->key)); + err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen); + if (err) + return err; + + if (out->hdr.status) + return mlx5_cmd_status_to_err(&out->hdr); + + return err; +} +EXPORT_SYMBOL(mlx5_core_query_mkey); + +int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, + u32 *mkey) +{ + struct mlx5_query_special_ctxs_mbox_in in; + struct mlx5_query_special_ctxs_mbox_out out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + return mlx5_cmd_status_to_err(&out.hdr); + + *mkey = be32_to_cpu(out.dump_fill_mkey); + + return err; +} +EXPORT_SYMBOL(mlx5_core_dump_fill_mkey); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c new file mode 100644 index 000000000000..f0bf46339b28 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -0,0 +1,435 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include "mlx5_core.h" + +enum { + MLX5_PAGES_CANT_GIVE = 0, + MLX5_PAGES_GIVE = 1, + MLX5_PAGES_TAKE = 2 +}; + +struct mlx5_pages_req { + struct mlx5_core_dev *dev; + u32 func_id; + s16 npages; + struct work_struct work; +}; + +struct fw_page { + struct rb_node rb_node; + u64 addr; + struct page *page; + u16 func_id; +}; + +struct mlx5_query_pages_inbox { + struct mlx5_inbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_query_pages_outbox { + struct mlx5_outbox_hdr hdr; + u8 reserved[2]; + __be16 func_id; + __be16 init_pages; + __be16 num_pages; +}; + +struct mlx5_manage_pages_inbox { + struct mlx5_inbox_hdr hdr; + __be16 rsvd0; + __be16 func_id; + __be16 rsvd1; + __be16 num_entries; + u8 rsvd2[16]; + __be64 pas[0]; +}; + +struct mlx5_manage_pages_outbox { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[2]; + __be16 num_entries; + u8 rsvd1[20]; + __be64 pas[0]; +}; + +static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id) +{ + struct rb_root *root = &dev->priv.page_root; + struct rb_node **new = &root->rb_node; + struct rb_node *parent = NULL; + struct fw_page *nfp; + struct fw_page *tfp; + + while (*new) { + parent = *new; + tfp = rb_entry(parent, struct fw_page, rb_node); + if (tfp->addr < addr) + new = &parent->rb_left; + else if (tfp->addr > addr) + new = &parent->rb_right; + else + return -EEXIST; + } + + nfp = kmalloc(sizeof(*nfp), GFP_KERNEL); + if (!nfp) + return -ENOMEM; + + nfp->addr = addr; + nfp->page = page; + nfp->func_id = func_id; + + rb_link_node(&nfp->rb_node, parent, new); + rb_insert_color(&nfp->rb_node, root); + + return 0; +} + +static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr) +{ + struct rb_root *root = &dev->priv.page_root; + struct rb_node *tmp = root->rb_node; + struct page *result = NULL; + struct fw_page *tfp; + + while (tmp) { + tfp = rb_entry(tmp, struct fw_page, rb_node); + if (tfp->addr < addr) { + tmp = tmp->rb_left; + } else if (tfp->addr > addr) { + tmp = tmp->rb_right; + } else { + rb_erase(&tfp->rb_node, root); + result = tfp->page; + kfree(tfp); + break; + } + } + + return result; +} + +static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, + s16 *pages, s16 *init_pages) +{ + struct mlx5_query_pages_inbox in; + struct mlx5_query_pages_outbox out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_PAGES); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + return mlx5_cmd_status_to_err(&out.hdr); + + if (pages) + *pages = be16_to_cpu(out.num_pages); + if (init_pages) + *init_pages = be16_to_cpu(out.init_pages); + *func_id = be16_to_cpu(out.func_id); + + return err; +} + +static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, + int notify_fail) +{ + struct mlx5_manage_pages_inbox *in; + struct mlx5_manage_pages_outbox out; + struct page *page; + int inlen; + u64 addr; + int err; + int i; + + inlen = sizeof(*in) + npages * sizeof(in->pas[0]); + in = mlx5_vzalloc(inlen); + if (!in) { + mlx5_core_warn(dev, "vzalloc failed %d\n", inlen); + return -ENOMEM; + } + memset(&out, 0, sizeof(out)); + + for (i = 0; i < npages; i++) { + page = alloc_page(GFP_HIGHUSER); + if (!page) { + err = -ENOMEM; + mlx5_core_warn(dev, "failed to allocate page\n"); + goto out_alloc; + } + addr = dma_map_page(&dev->pdev->dev, page, 0, + PAGE_SIZE, DMA_BIDIRECTIONAL); + if (dma_mapping_error(&dev->pdev->dev, addr)) { + mlx5_core_warn(dev, "failed dma mapping page\n"); + __free_page(page); + err = -ENOMEM; + goto out_alloc; + } + err = insert_page(dev, addr, page, func_id); + if (err) { + mlx5_core_err(dev, "failed to track allocated page\n"); + dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + __free_page(page); + err = -ENOMEM; + goto out_alloc; + } + in->pas[i] = cpu_to_be64(addr); + } + + in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES); + in->hdr.opmod = cpu_to_be16(MLX5_PAGES_GIVE); + in->func_id = cpu_to_be16(func_id); + in->num_entries = cpu_to_be16(npages); + err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); + mlx5_core_dbg(dev, "err %d\n", err); + if (err) { + mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", func_id, npages, err); + goto out_alloc; + } + dev->priv.fw_pages += npages; + + if (out.hdr.status) { + err = mlx5_cmd_status_to_err(&out.hdr); + if (err) { + mlx5_core_warn(dev, "func_id 0x%x, npages %d, status %d\n", func_id, npages, out.hdr.status); + goto out_alloc; + } + } + + mlx5_core_dbg(dev, "err %d\n", err); + + goto out_free; + +out_alloc: + if (notify_fail) { + memset(in, 0, inlen); + memset(&out, 0, sizeof(out)); + in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES); + in->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE); + if (mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out))) + mlx5_core_warn(dev, "\n"); + } + for (i--; i >= 0; i--) { + addr = be64_to_cpu(in->pas[i]); + page = remove_page(dev, addr); + if (!page) { + mlx5_core_err(dev, "BUG: can't remove page at addr 0x%llx\n", + addr); + continue; + } + dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + __free_page(page); + } + +out_free: + mlx5_vfree(in); + return err; +} + +static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, + int *nclaimed) +{ + struct mlx5_manage_pages_inbox in; + struct mlx5_manage_pages_outbox *out; + struct page *page; + int num_claimed; + int outlen; + u64 addr; + int err; + int i; + + memset(&in, 0, sizeof(in)); + outlen = sizeof(*out) + npages * sizeof(out->pas[0]); + out = mlx5_vzalloc(outlen); + if (!out) + return -ENOMEM; + + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES); + in.hdr.opmod = cpu_to_be16(MLX5_PAGES_TAKE); + in.func_id = cpu_to_be16(func_id); + in.num_entries = cpu_to_be16(npages); + mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen); + err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen); + if (err) { + mlx5_core_err(dev, "failed recliaming pages\n"); + goto out_free; + } + dev->priv.fw_pages -= npages; + + if (out->hdr.status) { + err = mlx5_cmd_status_to_err(&out->hdr); + goto out_free; + } + + num_claimed = be16_to_cpu(out->num_entries); + if (nclaimed) + *nclaimed = num_claimed; + + for (i = 0; i < num_claimed; i++) { + addr = be64_to_cpu(out->pas[i]); + page = remove_page(dev, addr); + if (!page) { + mlx5_core_warn(dev, "FW reported unknown DMA address 0x%llx\n", addr); + } else { + dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + __free_page(page); + } + } + +out_free: + mlx5_vfree(out); + return err; +} + +static void pages_work_handler(struct work_struct *work) +{ + struct mlx5_pages_req *req = container_of(work, struct mlx5_pages_req, work); + struct mlx5_core_dev *dev = req->dev; + int err = 0; + + if (req->npages < 0) + err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL); + else if (req->npages > 0) + err = give_pages(dev, req->func_id, req->npages, 1); + + if (err) + mlx5_core_warn(dev, "%s fail %d\n", req->npages < 0 ? + "reclaim" : "give", err); + + kfree(req); +} + +void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, + s16 npages) +{ + struct mlx5_pages_req *req; + + req = kzalloc(sizeof(*req), GFP_ATOMIC); + if (!req) { + mlx5_core_warn(dev, "failed to allocate pages request\n"); + return; + } + + req->dev = dev; + req->func_id = func_id; + req->npages = npages; + INIT_WORK(&req->work, pages_work_handler); + queue_work(dev->priv.pg_wq, &req->work); +} + +int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev) +{ + s16 uninitialized_var(init_pages); + u16 uninitialized_var(func_id); + int err; + + err = mlx5_cmd_query_pages(dev, &func_id, NULL, &init_pages); + if (err) + return err; + + mlx5_core_dbg(dev, "requested %d init pages for func_id 0x%x\n", init_pages, func_id); + + return give_pages(dev, func_id, init_pages, 0); +} + +static int optimal_reclaimed_pages(void) +{ + struct mlx5_cmd_prot_block *block; + struct mlx5_cmd_layout *lay; + int ret; + + ret = (sizeof(lay->in) + sizeof(block->data) - + sizeof(struct mlx5_manage_pages_outbox)) / 8; + + return ret; +} + +int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) +{ + unsigned long end = jiffies + msecs_to_jiffies(5000); + struct fw_page *fwp; + struct rb_node *p; + int err; + + do { + p = rb_first(&dev->priv.page_root); + if (p) { + fwp = rb_entry(p, struct fw_page, rb_node); + err = reclaim_pages(dev, fwp->func_id, optimal_reclaimed_pages(), NULL); + if (err) { + mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", err); + return err; + } + } + if (time_after(jiffies, end)) { + mlx5_core_warn(dev, "FW did not return all pages. giving up...\n"); + break; + } + } while (p); + + return 0; +} + +void mlx5_pagealloc_init(struct mlx5_core_dev *dev) +{ + dev->priv.page_root = RB_ROOT; +} + +void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev) +{ + /* nothing */ +} + +int mlx5_pagealloc_start(struct mlx5_core_dev *dev) +{ + dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator"); + if (!dev->priv.pg_wq) + return -ENOMEM; + + return 0; +} + +void mlx5_pagealloc_stop(struct mlx5_core_dev *dev) +{ + destroy_workqueue(dev->priv.pg_wq); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pd.c b/drivers/net/ethernet/mellanox/mlx5/core/pd.c new file mode 100644 index 000000000000..790da5c4ca4f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/pd.c @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include "mlx5_core.h" + +struct mlx5_alloc_pd_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_alloc_pd_mbox_out { + struct mlx5_outbox_hdr hdr; + __be32 pdn; + u8 rsvd[4]; +}; + +struct mlx5_dealloc_pd_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 pdn; + u8 rsvd[4]; +}; + +struct mlx5_dealloc_pd_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn) +{ + struct mlx5_alloc_pd_mbox_in in; + struct mlx5_alloc_pd_mbox_out out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_PD); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + return mlx5_cmd_status_to_err(&out.hdr); + + *pdn = be32_to_cpu(out.pdn) & 0xffffff; + return err; +} +EXPORT_SYMBOL(mlx5_core_alloc_pd); + +int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn) +{ + struct mlx5_dealloc_pd_mbox_in in; + struct mlx5_dealloc_pd_mbox_out out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_PD); + in.pdn = cpu_to_be32(pdn); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + return mlx5_cmd_status_to_err(&out.hdr); + + return err; +} +EXPORT_SYMBOL(mlx5_core_dealloc_pd); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c new file mode 100644 index 000000000000..f6afe7b5a675 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include "mlx5_core.h" + +int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, + int size_in, void *data_out, int size_out, + u16 reg_num, int arg, int write) +{ + struct mlx5_access_reg_mbox_in *in = NULL; + struct mlx5_access_reg_mbox_out *out = NULL; + int err = -ENOMEM; + + in = mlx5_vzalloc(sizeof(*in) + size_in); + if (!in) + return -ENOMEM; + + out = mlx5_vzalloc(sizeof(*out) + size_out); + if (!out) + goto ex1; + + memcpy(in->data, data_in, size_in); + in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ACCESS_REG); + in->hdr.opmod = cpu_to_be16(!write); + in->arg = cpu_to_be32(arg); + in->register_id = cpu_to_be16(reg_num); + err = mlx5_cmd_exec(dev, in, sizeof(*in) + size_in, out, + sizeof(out) + size_out); + if (err) + goto ex2; + + if (out->hdr.status) + err = mlx5_cmd_status_to_err(&out->hdr); + + if (!err) + memcpy(data_out, out->data, size_out); + +ex2: + mlx5_vfree(out); +ex1: + mlx5_vfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_access_reg); + + +struct mlx5_reg_pcap { + u8 rsvd0; + u8 port_num; + u8 rsvd1[2]; + __be32 caps_127_96; + __be32 caps_95_64; + __be32 caps_63_32; + __be32 caps_31_0; +}; + +int mlx5_set_port_caps(struct mlx5_core_dev *dev, int port_num, u32 caps) +{ + struct mlx5_reg_pcap in; + struct mlx5_reg_pcap out; + int err; + + memset(&in, 0, sizeof(in)); + in.caps_127_96 = cpu_to_be32(caps); + in.port_num = port_num; + + err = mlx5_core_access_reg(dev, &in, sizeof(in), &out, + sizeof(out), MLX5_REG_PCAP, 0, 1); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_set_port_caps); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c new file mode 100644 index 000000000000..54faf8bfcaf4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -0,0 +1,301 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#include +#include +#include +#include +#include + +#include "mlx5_core.h" + +void mlx5_qp_event(struct mlx5_core_dev *dev, u32 qpn, int event_type) +{ + struct mlx5_qp_table *table = &dev->priv.qp_table; + struct mlx5_core_qp *qp; + + spin_lock(&table->lock); + + qp = radix_tree_lookup(&table->tree, qpn); + if (qp) + atomic_inc(&qp->refcount); + + spin_unlock(&table->lock); + + if (!qp) { + mlx5_core_warn(dev, "Async event for bogus QP 0x%x\n", qpn); + return; + } + + qp->event(qp, event_type); + + if (atomic_dec_and_test(&qp->refcount)) + complete(&qp->free); +} + +int mlx5_core_create_qp(struct mlx5_core_dev *dev, + struct mlx5_core_qp *qp, + struct mlx5_create_qp_mbox_in *in, + int inlen) +{ + struct mlx5_qp_table *table = &dev->priv.qp_table; + struct mlx5_create_qp_mbox_out out; + struct mlx5_destroy_qp_mbox_in din; + struct mlx5_destroy_qp_mbox_out dout; + int err; + + memset(&dout, 0, sizeof(dout)); + in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_QP); + + err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); + if (err) { + mlx5_core_warn(dev, "ret %d", err); + return err; + } + + if (out.hdr.status) { + pr_warn("current num of QPs 0x%x\n", atomic_read(&dev->num_qps)); + return mlx5_cmd_status_to_err(&out.hdr); + } + + qp->qpn = be32_to_cpu(out.qpn) & 0xffffff; + mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn); + + spin_lock_irq(&table->lock); + err = radix_tree_insert(&table->tree, qp->qpn, qp); + spin_unlock_irq(&table->lock); + if (err) { + mlx5_core_warn(dev, "err %d", err); + goto err_cmd; + } + + err = mlx5_debug_qp_add(dev, qp); + if (err) + mlx5_core_dbg(dev, "failed adding QP 0x%x to debug file system\n", + qp->qpn); + + qp->pid = current->pid; + atomic_set(&qp->refcount, 1); + atomic_inc(&dev->num_qps); + init_completion(&qp->free); + + return 0; + +err_cmd: + memset(&din, 0, sizeof(din)); + memset(&dout, 0, sizeof(dout)); + din.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_QP); + din.qpn = cpu_to_be32(qp->qpn); + mlx5_cmd_exec(dev, &din, sizeof(din), &out, sizeof(dout)); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_create_qp); + +int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, + struct mlx5_core_qp *qp) +{ + struct mlx5_destroy_qp_mbox_in in; + struct mlx5_destroy_qp_mbox_out out; + struct mlx5_qp_table *table = &dev->priv.qp_table; + unsigned long flags; + int err; + + mlx5_debug_qp_remove(dev, qp); + + spin_lock_irqsave(&table->lock, flags); + radix_tree_delete(&table->tree, qp->qpn); + spin_unlock_irqrestore(&table->lock, flags); + + if (atomic_dec_and_test(&qp->refcount)) + complete(&qp->free); + wait_for_completion(&qp->free); + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_QP); + in.qpn = cpu_to_be32(qp->qpn); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + return mlx5_cmd_status_to_err(&out.hdr); + + atomic_dec(&dev->num_qps); + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_core_destroy_qp); + +int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state, + enum mlx5_qp_state new_state, + struct mlx5_modify_qp_mbox_in *in, int sqd_event, + struct mlx5_core_qp *qp) +{ + static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = { + [MLX5_QP_STATE_RST] = { + [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, + [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_RST2INIT_QP, + }, + [MLX5_QP_STATE_INIT] = { + [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, + [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_INIT2INIT_QP, + [MLX5_QP_STATE_RTR] = MLX5_CMD_OP_INIT2RTR_QP, + }, + [MLX5_QP_STATE_RTR] = { + [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, + [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTR2RTS_QP, + }, + [MLX5_QP_STATE_RTS] = { + [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, + [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTS2RTS_QP, + [MLX5_QP_STATE_SQD] = MLX5_CMD_OP_RTS2SQD_QP, + }, + [MLX5_QP_STATE_SQD] = { + [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, + [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_SQD2RTS_QP, + [MLX5_QP_STATE_SQD] = MLX5_CMD_OP_SQD2SQD_QP, + }, + [MLX5_QP_STATE_SQER] = { + [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, + [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_SQERR2RTS_QP, + }, + [MLX5_QP_STATE_ERR] = { + [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, + [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + } + }; + + struct mlx5_modify_qp_mbox_out out; + int err = 0; + u16 op; + + if (cur_state >= MLX5_QP_NUM_STATE || new_state >= MLX5_QP_NUM_STATE || + !optab[cur_state][new_state]) + return -EINVAL; + + memset(&out, 0, sizeof(out)); + op = optab[cur_state][new_state]; + in->hdr.opcode = cpu_to_be16(op); + in->qpn = cpu_to_be32(qp->qpn); + err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out)); + if (err) + return err; + + return mlx5_cmd_status_to_err(&out.hdr); +} +EXPORT_SYMBOL_GPL(mlx5_core_qp_modify); + +void mlx5_init_qp_table(struct mlx5_core_dev *dev) +{ + struct mlx5_qp_table *table = &dev->priv.qp_table; + + spin_lock_init(&table->lock); + INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); + mlx5_qp_debugfs_init(dev); +} + +void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev) +{ + mlx5_qp_debugfs_cleanup(dev); +} + +int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, + struct mlx5_query_qp_mbox_out *out, int outlen) +{ + struct mlx5_query_qp_mbox_in in; + int err; + + memset(&in, 0, sizeof(in)); + memset(out, 0, outlen); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_QP); + in.qpn = cpu_to_be32(qp->qpn); + err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen); + if (err) + return err; + + if (out->hdr.status) + return mlx5_cmd_status_to_err(&out->hdr); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_qp_query); + +int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn) +{ + struct mlx5_alloc_xrcd_mbox_in in; + struct mlx5_alloc_xrcd_mbox_out out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_XRCD); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + err = mlx5_cmd_status_to_err(&out.hdr); + else + *xrcdn = be32_to_cpu(out.xrcdn); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_xrcd_alloc); + +int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn) +{ + struct mlx5_dealloc_xrcd_mbox_in in; + struct mlx5_dealloc_xrcd_mbox_out out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_XRCD); + in.xrcdn = cpu_to_be32(xrcdn); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + err = mlx5_cmd_status_to_err(&out.hdr); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c new file mode 100644 index 000000000000..38bce93f8314 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include "mlx5_core.h" + +void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type) +{ + struct mlx5_srq_table *table = &dev->priv.srq_table; + struct mlx5_core_srq *srq; + + spin_lock(&table->lock); + + srq = radix_tree_lookup(&table->tree, srqn); + if (srq) + atomic_inc(&srq->refcount); + + spin_unlock(&table->lock); + + if (!srq) { + mlx5_core_warn(dev, "Async event for bogus SRQ 0x%08x\n", srqn); + return; + } + + srq->event(srq, event_type); + + if (atomic_dec_and_test(&srq->refcount)) + complete(&srq->free); +} + +struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn) +{ + struct mlx5_srq_table *table = &dev->priv.srq_table; + struct mlx5_core_srq *srq; + + spin_lock(&table->lock); + + srq = radix_tree_lookup(&table->tree, srqn); + if (srq) + atomic_inc(&srq->refcount); + + spin_unlock(&table->lock); + + return srq; +} +EXPORT_SYMBOL(mlx5_core_get_srq); + +int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_create_srq_mbox_in *in, int inlen) +{ + struct mlx5_create_srq_mbox_out out; + struct mlx5_srq_table *table = &dev->priv.srq_table; + struct mlx5_destroy_srq_mbox_in din; + struct mlx5_destroy_srq_mbox_out dout; + int err; + + memset(&out, 0, sizeof(out)); + in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_SRQ); + err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + return mlx5_cmd_status_to_err(&out.hdr); + + srq->srqn = be32_to_cpu(out.srqn) & 0xffffff; + + atomic_set(&srq->refcount, 1); + init_completion(&srq->free); + + spin_lock_irq(&table->lock); + err = radix_tree_insert(&table->tree, srq->srqn, srq); + spin_unlock_irq(&table->lock); + if (err) { + mlx5_core_warn(dev, "err %d, srqn 0x%x\n", err, srq->srqn); + goto err_cmd; + } + + return 0; + +err_cmd: + memset(&din, 0, sizeof(din)); + memset(&dout, 0, sizeof(dout)); + din.srqn = cpu_to_be32(srq->srqn); + din.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_SRQ); + mlx5_cmd_exec(dev, &din, sizeof(din), &dout, sizeof(dout)); + return err; +} +EXPORT_SYMBOL(mlx5_core_create_srq); + +int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) +{ + struct mlx5_destroy_srq_mbox_in in; + struct mlx5_destroy_srq_mbox_out out; + struct mlx5_srq_table *table = &dev->priv.srq_table; + struct mlx5_core_srq *tmp; + int err; + + spin_lock_irq(&table->lock); + tmp = radix_tree_delete(&table->tree, srq->srqn); + spin_unlock_irq(&table->lock); + if (!tmp) { + mlx5_core_warn(dev, "srq 0x%x not found in tree\n", srq->srqn); + return -EINVAL; + } + if (tmp != srq) { + mlx5_core_warn(dev, "corruption on srqn 0x%x\n", srq->srqn); + return -EINVAL; + } + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_SRQ); + in.srqn = cpu_to_be32(srq->srqn); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + return mlx5_cmd_status_to_err(&out.hdr); + + if (atomic_dec_and_test(&srq->refcount)) + complete(&srq->free); + wait_for_completion(&srq->free); + + return 0; +} +EXPORT_SYMBOL(mlx5_core_destroy_srq); + +int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_query_srq_mbox_out *out) +{ + struct mlx5_query_srq_mbox_in in; + int err; + + memset(&in, 0, sizeof(in)); + memset(out, 0, sizeof(*out)); + + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SRQ); + in.srqn = cpu_to_be32(srq->srqn); + err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out)); + if (err) + return err; + + if (out->hdr.status) + return mlx5_cmd_status_to_err(&out->hdr); + + return err; +} +EXPORT_SYMBOL(mlx5_core_query_srq); + +int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + u16 lwm, int is_srq) +{ + struct mlx5_arm_srq_mbox_in in; + struct mlx5_arm_srq_mbox_out out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ARM_RQ); + in.hdr.opmod = cpu_to_be16(!!is_srq); + in.srqn = cpu_to_be32(srq->srqn); + in.lwm = cpu_to_be16(lwm); + + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + return mlx5_cmd_status_to_err(&out.hdr); + + return err; +} +EXPORT_SYMBOL(mlx5_core_arm_srq); + +void mlx5_init_srq_table(struct mlx5_core_dev *dev) +{ + struct mlx5_srq_table *table = &dev->priv.srq_table; + + spin_lock_init(&table->lock); + INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); +} + +void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev) +{ + /* nothing */ +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c new file mode 100644 index 000000000000..71d4a3937200 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include "mlx5_core.h" + +enum { + NUM_DRIVER_UARS = 4, + NUM_LOW_LAT_UUARS = 4, +}; + + +struct mlx5_alloc_uar_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_alloc_uar_mbox_out { + struct mlx5_outbox_hdr hdr; + __be32 uarn; + u8 rsvd[4]; +}; + +struct mlx5_free_uar_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 uarn; + u8 rsvd[4]; +}; + +struct mlx5_free_uar_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn) +{ + struct mlx5_alloc_uar_mbox_in in; + struct mlx5_alloc_uar_mbox_out out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_UAR); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + goto ex; + + if (out.hdr.status) { + err = mlx5_cmd_status_to_err(&out.hdr); + goto ex; + } + + *uarn = be32_to_cpu(out.uarn) & 0xffffff; + +ex: + return err; +} +EXPORT_SYMBOL(mlx5_cmd_alloc_uar); + +int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn) +{ + struct mlx5_free_uar_mbox_in in; + struct mlx5_free_uar_mbox_out out; + int err; + + memset(&in, 0, sizeof(in)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_UAR); + in.uarn = cpu_to_be32(uarn); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + goto ex; + + if (out.hdr.status) + err = mlx5_cmd_status_to_err(&out.hdr); + +ex: + return err; +} +EXPORT_SYMBOL(mlx5_cmd_free_uar); + +static int need_uuar_lock(int uuarn) +{ + int tot_uuars = NUM_DRIVER_UARS * MLX5_BF_REGS_PER_PAGE; + + if (uuarn == 0 || tot_uuars - NUM_LOW_LAT_UUARS) + return 0; + + return 1; +} + +int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari) +{ + int tot_uuars = NUM_DRIVER_UARS * MLX5_BF_REGS_PER_PAGE; + struct mlx5_bf *bf; + phys_addr_t addr; + int err; + int i; + + uuari->num_uars = NUM_DRIVER_UARS; + uuari->num_low_latency_uuars = NUM_LOW_LAT_UUARS; + + mutex_init(&uuari->lock); + uuari->uars = kcalloc(uuari->num_uars, sizeof(*uuari->uars), GFP_KERNEL); + if (!uuari->uars) + return -ENOMEM; + + uuari->bfs = kcalloc(tot_uuars, sizeof(*uuari->bfs), GFP_KERNEL); + if (!uuari->bfs) { + err = -ENOMEM; + goto out_uars; + } + + uuari->bitmap = kcalloc(BITS_TO_LONGS(tot_uuars), sizeof(*uuari->bitmap), + GFP_KERNEL); + if (!uuari->bitmap) { + err = -ENOMEM; + goto out_bfs; + } + + uuari->count = kcalloc(tot_uuars, sizeof(*uuari->count), GFP_KERNEL); + if (!uuari->count) { + err = -ENOMEM; + goto out_bitmap; + } + + for (i = 0; i < uuari->num_uars; i++) { + err = mlx5_cmd_alloc_uar(dev, &uuari->uars[i].index); + if (err) + goto out_count; + + addr = dev->iseg_base + ((phys_addr_t)(uuari->uars[i].index) << PAGE_SHIFT); + uuari->uars[i].map = ioremap(addr, PAGE_SIZE); + if (!uuari->uars[i].map) { + mlx5_cmd_free_uar(dev, uuari->uars[i].index); + goto out_count; + } + mlx5_core_dbg(dev, "allocated uar index 0x%x, mmaped at %p\n", + uuari->uars[i].index, uuari->uars[i].map); + } + + for (i = 0; i < tot_uuars; i++) { + bf = &uuari->bfs[i]; + + bf->buf_size = dev->caps.bf_reg_size / 2; + bf->uar = &uuari->uars[i / MLX5_BF_REGS_PER_PAGE]; + bf->regreg = uuari->uars[i / MLX5_BF_REGS_PER_PAGE].map; + bf->reg = NULL; /* Add WC support */ + bf->offset = (i % MLX5_BF_REGS_PER_PAGE) * dev->caps.bf_reg_size + + MLX5_BF_OFFSET; + bf->need_lock = need_uuar_lock(i); + spin_lock_init(&bf->lock); + spin_lock_init(&bf->lock32); + bf->uuarn = i; + } + + return 0; + +out_count: + for (i--; i >= 0; i--) { + iounmap(uuari->uars[i].map); + mlx5_cmd_free_uar(dev, uuari->uars[i].index); + } + kfree(uuari->count); + +out_bitmap: + kfree(uuari->bitmap); + +out_bfs: + kfree(uuari->bfs); + +out_uars: + kfree(uuari->uars); + return err; +} + +int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari) +{ + int i = uuari->num_uars; + + for (i--; i >= 0; i--) { + iounmap(uuari->uars[i].map); + mlx5_cmd_free_uar(dev, uuari->uars[i].index); + } + + kfree(uuari->count); + kfree(uuari->bitmap); + kfree(uuari->bfs); + kfree(uuari->uars); + + return 0; +} diff --git a/include/linux/mlx5/cmd.h b/include/linux/mlx5/cmd.h new file mode 100644 index 000000000000..2826a4b6071e --- /dev/null +++ b/include/linux/mlx5/cmd.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_CMD_H +#define MLX5_CMD_H + +#include + +struct manage_pages_layout { + u64 ptr; + u32 reserved; + u16 num_entries; + u16 func_id; +}; + + +struct mlx5_cmd_alloc_uar_imm_out { + u32 rsvd[3]; + u32 uarn; +}; + +#endif /* MLX5_CMD_H */ diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h new file mode 100644 index 000000000000..3db67f73d96d --- /dev/null +++ b/include/linux/mlx5/cq.h @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_CORE_CQ_H +#define MLX5_CORE_CQ_H + +#include +#include + + +struct mlx5_core_cq { + u32 cqn; + int cqe_sz; + __be32 *set_ci_db; + __be32 *arm_db; + atomic_t refcount; + struct completion free; + unsigned vector; + int irqn; + void (*comp) (struct mlx5_core_cq *); + void (*event) (struct mlx5_core_cq *, enum mlx5_event); + struct mlx5_uar *uar; + u32 cons_index; + unsigned arm_sn; + struct mlx5_rsc_debug *dbg; + int pid; +}; + + +enum { + MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR = 0x01, + MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR = 0x02, + MLX5_CQE_SYNDROME_LOCAL_PROT_ERR = 0x04, + MLX5_CQE_SYNDROME_WR_FLUSH_ERR = 0x05, + MLX5_CQE_SYNDROME_MW_BIND_ERR = 0x06, + MLX5_CQE_SYNDROME_BAD_RESP_ERR = 0x10, + MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR = 0x11, + MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR = 0x12, + MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR = 0x13, + MLX5_CQE_SYNDROME_REMOTE_OP_ERR = 0x14, + MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR = 0x15, + MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR = 0x16, + MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR = 0x22, +}; + +enum { + MLX5_CQE_OWNER_MASK = 1, + MLX5_CQE_REQ = 0, + MLX5_CQE_RESP_WR_IMM = 1, + MLX5_CQE_RESP_SEND = 2, + MLX5_CQE_RESP_SEND_IMM = 3, + MLX5_CQE_RESP_SEND_INV = 4, + MLX5_CQE_RESIZE_CQ = 0xff, /* TBD */ + MLX5_CQE_REQ_ERR = 13, + MLX5_CQE_RESP_ERR = 14, +}; + +enum { + MLX5_CQ_MODIFY_RESEIZE = 0, + MLX5_CQ_MODIFY_MODER = 1, + MLX5_CQ_MODIFY_MAPPING = 2, +}; + +struct mlx5_cq_modify_params { + int type; + union { + struct { + u32 page_offset; + u8 log_cq_size; + } resize; + + struct { + } moder; + + struct { + } mapping; + } params; +}; + +enum { + CQE_SIZE_64 = 0, + CQE_SIZE_128 = 1, +}; + +static inline int cqe_sz_to_mlx_sz(u8 size) +{ + return size == 64 ? CQE_SIZE_64 : CQE_SIZE_128; +} + +static inline void mlx5_cq_set_ci(struct mlx5_core_cq *cq) +{ + *cq->set_ci_db = cpu_to_be32(cq->cons_index & 0xffffff); +} + +enum { + MLX5_CQ_DB_REQ_NOT_SOL = 1 << 24, + MLX5_CQ_DB_REQ_NOT = 0 << 24 +}; + +static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd, + void __iomem *uar_page, + spinlock_t *doorbell_lock) +{ + __be32 doorbell[2]; + u32 sn; + u32 ci; + + sn = cq->arm_sn & 3; + ci = cq->cons_index & 0xffffff; + + *cq->arm_db = cpu_to_be32(sn << 28 | cmd | ci); + + /* Make sure that the doorbell record in host memory is + * written before ringing the doorbell via PCI MMIO. + */ + wmb(); + + doorbell[0] = cpu_to_be32(sn << 28 | cmd | ci); + doorbell[1] = cpu_to_be32(cq->cqn); + + mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL, doorbell_lock); +} + +int mlx5_init_cq_table(struct mlx5_core_dev *dev); +void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev); +int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + struct mlx5_create_cq_mbox_in *in, int inlen); +int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); +int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + struct mlx5_query_cq_mbox_out *out); +int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + int type, struct mlx5_cq_modify_params *params); +int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); +void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); + +#endif /* MLX5_CORE_CQ_H */ diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h new file mode 100644 index 000000000000..51390915e538 --- /dev/null +++ b/include/linux/mlx5/device.h @@ -0,0 +1,893 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_DEVICE_H +#define MLX5_DEVICE_H + +#include +#include + +#if defined(__LITTLE_ENDIAN) +#define MLX5_SET_HOST_ENDIANNESS 0 +#elif defined(__BIG_ENDIAN) +#define MLX5_SET_HOST_ENDIANNESS 0x80 +#else +#error Host endianness not defined +#endif + +enum { + MLX5_MAX_COMMANDS = 32, + MLX5_CMD_DATA_BLOCK_SIZE = 512, + MLX5_PCI_CMD_XPORT = 7, +}; + +enum { + MLX5_EXTENDED_UD_AV = 0x80000000, +}; + +enum { + MLX5_CQ_STATE_ARMED = 9, + MLX5_CQ_STATE_ALWAYS_ARMED = 0xb, + MLX5_CQ_STATE_FIRED = 0xa, +}; + +enum { + MLX5_STAT_RATE_OFFSET = 5, +}; + +enum { + MLX5_INLINE_SEG = 0x80000000, +}; + +enum { + MLX5_PERM_LOCAL_READ = 1 << 2, + MLX5_PERM_LOCAL_WRITE = 1 << 3, + MLX5_PERM_REMOTE_READ = 1 << 4, + MLX5_PERM_REMOTE_WRITE = 1 << 5, + MLX5_PERM_ATOMIC = 1 << 6, + MLX5_PERM_UMR_EN = 1 << 7, +}; + +enum { + MLX5_PCIE_CTRL_SMALL_FENCE = 1 << 0, + MLX5_PCIE_CTRL_RELAXED_ORDERING = 1 << 2, + MLX5_PCIE_CTRL_NO_SNOOP = 1 << 3, + MLX5_PCIE_CTRL_TLP_PROCE_EN = 1 << 6, + MLX5_PCIE_CTRL_TPH_MASK = 3 << 4, +}; + +enum { + MLX5_ACCESS_MODE_PA = 0, + MLX5_ACCESS_MODE_MTT = 1, + MLX5_ACCESS_MODE_KLM = 2 +}; + +enum { + MLX5_MKEY_REMOTE_INVAL = 1 << 24, + MLX5_MKEY_FLAG_SYNC_UMR = 1 << 29, + MLX5_MKEY_BSF_EN = 1 << 30, + MLX5_MKEY_LEN64 = 1 << 31, +}; + +enum { + MLX5_EN_RD = (u64)1, + MLX5_EN_WR = (u64)2 +}; + +enum { + MLX5_BF_REGS_PER_PAGE = 4, + MLX5_MAX_UAR_PAGES = 1 << 8, + MLX5_MAX_UUARS = MLX5_MAX_UAR_PAGES * MLX5_BF_REGS_PER_PAGE, +}; + +enum { + MLX5_MKEY_MASK_LEN = 1ull << 0, + MLX5_MKEY_MASK_PAGE_SIZE = 1ull << 1, + MLX5_MKEY_MASK_START_ADDR = 1ull << 6, + MLX5_MKEY_MASK_PD = 1ull << 7, + MLX5_MKEY_MASK_EN_RINVAL = 1ull << 8, + MLX5_MKEY_MASK_BSF_EN = 1ull << 12, + MLX5_MKEY_MASK_KEY = 1ull << 13, + MLX5_MKEY_MASK_QPN = 1ull << 14, + MLX5_MKEY_MASK_LR = 1ull << 17, + MLX5_MKEY_MASK_LW = 1ull << 18, + MLX5_MKEY_MASK_RR = 1ull << 19, + MLX5_MKEY_MASK_RW = 1ull << 20, + MLX5_MKEY_MASK_A = 1ull << 21, + MLX5_MKEY_MASK_SMALL_FENCE = 1ull << 23, + MLX5_MKEY_MASK_FREE = 1ull << 29, +}; + +enum mlx5_event { + MLX5_EVENT_TYPE_COMP = 0x0, + + MLX5_EVENT_TYPE_PATH_MIG = 0x01, + MLX5_EVENT_TYPE_COMM_EST = 0x02, + MLX5_EVENT_TYPE_SQ_DRAINED = 0x03, + MLX5_EVENT_TYPE_SRQ_LAST_WQE = 0x13, + MLX5_EVENT_TYPE_SRQ_RQ_LIMIT = 0x14, + + MLX5_EVENT_TYPE_CQ_ERROR = 0x04, + MLX5_EVENT_TYPE_WQ_CATAS_ERROR = 0x05, + MLX5_EVENT_TYPE_PATH_MIG_FAILED = 0x07, + MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10, + MLX5_EVENT_TYPE_WQ_ACCESS_ERROR = 0x11, + MLX5_EVENT_TYPE_SRQ_CATAS_ERROR = 0x12, + + MLX5_EVENT_TYPE_INTERNAL_ERROR = 0x08, + MLX5_EVENT_TYPE_PORT_CHANGE = 0x09, + MLX5_EVENT_TYPE_GPIO_EVENT = 0x15, + MLX5_EVENT_TYPE_REMOTE_CONFIG = 0x19, + + MLX5_EVENT_TYPE_DB_BF_CONGESTION = 0x1a, + MLX5_EVENT_TYPE_STALL_EVENT = 0x1b, + + MLX5_EVENT_TYPE_CMD = 0x0a, + MLX5_EVENT_TYPE_PAGE_REQUEST = 0xb, +}; + +enum { + MLX5_PORT_CHANGE_SUBTYPE_DOWN = 1, + MLX5_PORT_CHANGE_SUBTYPE_ACTIVE = 4, + MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED = 5, + MLX5_PORT_CHANGE_SUBTYPE_LID = 6, + MLX5_PORT_CHANGE_SUBTYPE_PKEY = 7, + MLX5_PORT_CHANGE_SUBTYPE_GUID = 8, + MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG = 9, +}; + +enum { + MLX5_DEV_CAP_FLAG_RC = 1LL << 0, + MLX5_DEV_CAP_FLAG_UC = 1LL << 1, + MLX5_DEV_CAP_FLAG_UD = 1LL << 2, + MLX5_DEV_CAP_FLAG_XRC = 1LL << 3, + MLX5_DEV_CAP_FLAG_SRQ = 1LL << 6, + MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1LL << 8, + MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1LL << 9, + MLX5_DEV_CAP_FLAG_APM = 1LL << 17, + MLX5_DEV_CAP_FLAG_ATOMIC = 1LL << 18, + MLX5_DEV_CAP_FLAG_ON_DMND_PG = 1LL << 24, + MLX5_DEV_CAP_FLAG_RESIZE_SRQ = 1LL << 32, + MLX5_DEV_CAP_FLAG_REMOTE_FENCE = 1LL << 38, + MLX5_DEV_CAP_FLAG_TLP_HINTS = 1LL << 39, + MLX5_DEV_CAP_FLAG_SIG_HAND_OVER = 1LL << 40, + MLX5_DEV_CAP_FLAG_DCT = 1LL << 41, + MLX5_DEV_CAP_FLAG_CMDIF_CSUM = 1LL << 46, +}; + +enum { + MLX5_OPCODE_NOP = 0x00, + MLX5_OPCODE_SEND_INVAL = 0x01, + MLX5_OPCODE_RDMA_WRITE = 0x08, + MLX5_OPCODE_RDMA_WRITE_IMM = 0x09, + MLX5_OPCODE_SEND = 0x0a, + MLX5_OPCODE_SEND_IMM = 0x0b, + MLX5_OPCODE_RDMA_READ = 0x10, + MLX5_OPCODE_ATOMIC_CS = 0x11, + MLX5_OPCODE_ATOMIC_FA = 0x12, + MLX5_OPCODE_ATOMIC_MASKED_CS = 0x14, + MLX5_OPCODE_ATOMIC_MASKED_FA = 0x15, + MLX5_OPCODE_BIND_MW = 0x18, + MLX5_OPCODE_CONFIG_CMD = 0x1f, + + MLX5_RECV_OPCODE_RDMA_WRITE_IMM = 0x00, + MLX5_RECV_OPCODE_SEND = 0x01, + MLX5_RECV_OPCODE_SEND_IMM = 0x02, + MLX5_RECV_OPCODE_SEND_INVAL = 0x03, + + MLX5_CQE_OPCODE_ERROR = 0x1e, + MLX5_CQE_OPCODE_RESIZE = 0x16, + + MLX5_OPCODE_SET_PSV = 0x20, + MLX5_OPCODE_GET_PSV = 0x21, + MLX5_OPCODE_CHECK_PSV = 0x22, + MLX5_OPCODE_RGET_PSV = 0x26, + MLX5_OPCODE_RCHECK_PSV = 0x27, + + MLX5_OPCODE_UMR = 0x25, + +}; + +enum { + MLX5_SET_PORT_RESET_QKEY = 0, + MLX5_SET_PORT_GUID0 = 16, + MLX5_SET_PORT_NODE_GUID = 17, + MLX5_SET_PORT_SYS_GUID = 18, + MLX5_SET_PORT_GID_TABLE = 19, + MLX5_SET_PORT_PKEY_TABLE = 20, +}; + +enum { + MLX5_MAX_PAGE_SHIFT = 31 +}; + +struct mlx5_inbox_hdr { + __be16 opcode; + u8 rsvd[4]; + __be16 opmod; +}; + +struct mlx5_outbox_hdr { + u8 status; + u8 rsvd[3]; + __be32 syndrome; +}; + +struct mlx5_cmd_query_adapter_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_cmd_query_adapter_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[24]; + u8 intapin; + u8 rsvd1[13]; + __be16 vsd_vendor_id; + u8 vsd[208]; + u8 vsd_psid[16]; +}; + +struct mlx5_hca_cap { + u8 rsvd1[16]; + u8 log_max_srq_sz; + u8 log_max_qp_sz; + u8 rsvd2; + u8 log_max_qp; + u8 log_max_strq_sz; + u8 log_max_srqs; + u8 rsvd4[2]; + u8 rsvd5; + u8 log_max_cq_sz; + u8 rsvd6; + u8 log_max_cq; + u8 log_max_eq_sz; + u8 log_max_mkey; + u8 rsvd7; + u8 log_max_eq; + u8 max_indirection; + u8 log_max_mrw_sz; + u8 log_max_bsf_list_sz; + u8 log_max_klm_list_sz; + u8 rsvd_8_0; + u8 log_max_ra_req_dc; + u8 rsvd_8_1; + u8 log_max_ra_res_dc; + u8 rsvd9; + u8 log_max_ra_req_qp; + u8 rsvd10; + u8 log_max_ra_res_qp; + u8 rsvd11[4]; + __be16 max_qp_count; + __be16 rsvd12; + u8 rsvd13; + u8 local_ca_ack_delay; + u8 rsvd14; + u8 num_ports; + u8 log_max_msg; + u8 rsvd15[3]; + __be16 stat_rate_support; + u8 rsvd16[2]; + __be64 flags; + u8 rsvd17; + u8 uar_sz; + u8 rsvd18; + u8 log_pg_sz; + __be16 bf_log_bf_reg_size; + u8 rsvd19[4]; + __be16 max_desc_sz_sq; + u8 rsvd20[2]; + __be16 max_desc_sz_rq; + u8 rsvd21[2]; + __be16 max_desc_sz_sq_dc; + u8 rsvd22[4]; + __be16 max_qp_mcg; + u8 rsvd23; + u8 log_max_mcg; + u8 rsvd24; + u8 log_max_pd; + u8 rsvd25; + u8 log_max_xrcd; + u8 rsvd26[40]; + __be32 uar_page_sz; + u8 rsvd27[28]; + u8 log_msx_atomic_size_qp; + u8 rsvd28[2]; + u8 log_msx_atomic_size_dc; + u8 rsvd29[76]; +}; + + +struct mlx5_cmd_query_hca_cap_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd[8]; +}; + + +struct mlx5_cmd_query_hca_cap_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[8]; + struct mlx5_hca_cap hca_cap; +}; + + +struct mlx5_cmd_set_hca_cap_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd[8]; + struct mlx5_hca_cap hca_cap; +}; + + +struct mlx5_cmd_set_hca_cap_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[8]; +}; + + +struct mlx5_cmd_init_hca_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd0[2]; + __be16 profile; + u8 rsvd1[4]; +}; + +struct mlx5_cmd_init_hca_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_cmd_teardown_hca_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd0[2]; + __be16 profile; + u8 rsvd1[4]; +}; + +struct mlx5_cmd_teardown_hca_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_cmd_layout { + u8 type; + u8 rsvd0[3]; + __be32 inlen; + __be64 in_ptr; + __be32 in[4]; + __be32 out[4]; + __be64 out_ptr; + __be32 outlen; + u8 token; + u8 sig; + u8 rsvd1; + u8 status_own; +}; + + +struct health_buffer { + __be32 assert_var[5]; + __be32 rsvd0[3]; + __be32 assert_exit_ptr; + __be32 assert_callra; + __be32 rsvd1[2]; + __be32 fw_ver; + __be32 hw_id; + __be32 rsvd2; + u8 irisc_index; + u8 synd; + __be16 ext_sync; +}; + +struct mlx5_init_seg { + __be32 fw_rev; + __be32 cmdif_rev_fw_sub; + __be32 rsvd0[2]; + __be32 cmdq_addr_h; + __be32 cmdq_addr_l_sz; + __be32 cmd_dbell; + __be32 rsvd1[121]; + struct health_buffer health; + __be32 rsvd2[884]; + __be32 health_counter; + __be32 rsvd3[1023]; + __be64 ieee1588_clk; + __be32 ieee1588_clk_type; + __be32 clr_intx; +}; + +struct mlx5_eqe_comp { + __be32 reserved[6]; + __be32 cqn; +}; + +struct mlx5_eqe_qp_srq { + __be32 reserved[6]; + __be32 qp_srq_n; +}; + +struct mlx5_eqe_cq_err { + __be32 cqn; + u8 reserved1[7]; + u8 syndrome; +}; + +struct mlx5_eqe_dropped_packet { +}; + +struct mlx5_eqe_port_state { + u8 reserved0[8]; + u8 port; +}; + +struct mlx5_eqe_gpio { + __be32 reserved0[2]; + __be64 gpio_event; +}; + +struct mlx5_eqe_congestion { + u8 type; + u8 rsvd0; + u8 congestion_level; +}; + +struct mlx5_eqe_stall_vl { + u8 rsvd0[3]; + u8 port_vl; +}; + +struct mlx5_eqe_cmd { + __be32 vector; + __be32 rsvd[6]; +}; + +struct mlx5_eqe_page_req { + u8 rsvd0[2]; + __be16 func_id; + u8 rsvd1[2]; + __be16 num_pages; + __be32 rsvd2[5]; +}; + +union ev_data { + __be32 raw[7]; + struct mlx5_eqe_cmd cmd; + struct mlx5_eqe_comp comp; + struct mlx5_eqe_qp_srq qp_srq; + struct mlx5_eqe_cq_err cq_err; + struct mlx5_eqe_dropped_packet dp; + struct mlx5_eqe_port_state port; + struct mlx5_eqe_gpio gpio; + struct mlx5_eqe_congestion cong; + struct mlx5_eqe_stall_vl stall_vl; + struct mlx5_eqe_page_req req_pages; +} __packed; + +struct mlx5_eqe { + u8 rsvd0; + u8 type; + u8 rsvd1; + u8 sub_type; + __be32 rsvd2[7]; + union ev_data data; + __be16 rsvd3; + u8 signature; + u8 owner; +} __packed; + +struct mlx5_cmd_prot_block { + u8 data[MLX5_CMD_DATA_BLOCK_SIZE]; + u8 rsvd0[48]; + __be64 next; + __be32 block_num; + u8 rsvd1; + u8 token; + u8 ctrl_sig; + u8 sig; +}; + +struct mlx5_err_cqe { + u8 rsvd0[32]; + __be32 srqn; + u8 rsvd1[18]; + u8 vendor_err_synd; + u8 syndrome; + __be32 s_wqe_opcode_qpn; + __be16 wqe_counter; + u8 signature; + u8 op_own; +}; + +struct mlx5_cqe64 { + u8 rsvd0[17]; + u8 ml_path; + u8 rsvd20[4]; + __be16 slid; + __be32 flags_rqpn; + u8 rsvd28[4]; + __be32 srqn; + __be32 imm_inval_pkey; + u8 rsvd40[4]; + __be32 byte_cnt; + __be64 timestamp; + __be32 sop_drop_qpn; + __be16 wqe_counter; + u8 signature; + u8 op_own; +}; + +struct mlx5_wqe_srq_next_seg { + u8 rsvd0[2]; + __be16 next_wqe_index; + u8 signature; + u8 rsvd1[11]; +}; + +union mlx5_ext_cqe { + struct ib_grh grh; + u8 inl[64]; +}; + +struct mlx5_cqe128 { + union mlx5_ext_cqe inl_grh; + struct mlx5_cqe64 cqe64; +}; + +struct mlx5_srq_ctx { + u8 state_log_sz; + u8 rsvd0[3]; + __be32 flags_xrcd; + __be32 pgoff_cqn; + u8 rsvd1[4]; + u8 log_pg_sz; + u8 rsvd2[7]; + __be32 pd; + __be16 lwm; + __be16 wqe_cnt; + u8 rsvd3[8]; + __be64 db_record; +}; + +struct mlx5_create_srq_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 input_srqn; + u8 rsvd0[4]; + struct mlx5_srq_ctx ctx; + u8 rsvd1[208]; + __be64 pas[0]; +}; + +struct mlx5_create_srq_mbox_out { + struct mlx5_outbox_hdr hdr; + __be32 srqn; + u8 rsvd[4]; +}; + +struct mlx5_destroy_srq_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 srqn; + u8 rsvd[4]; +}; + +struct mlx5_destroy_srq_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_query_srq_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 srqn; + u8 rsvd0[4]; +}; + +struct mlx5_query_srq_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[8]; + struct mlx5_srq_ctx ctx; + u8 rsvd1[32]; + __be64 pas[0]; +}; + +struct mlx5_arm_srq_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 srqn; + __be16 rsvd; + __be16 lwm; +}; + +struct mlx5_arm_srq_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_cq_context { + u8 status; + u8 cqe_sz_flags; + u8 st; + u8 rsvd3; + u8 rsvd4[6]; + __be16 page_offset; + __be32 log_sz_usr_page; + __be16 cq_period; + __be16 cq_max_count; + __be16 rsvd20; + __be16 c_eqn; + u8 log_pg_sz; + u8 rsvd25[7]; + __be32 last_notified_index; + __be32 solicit_producer_index; + __be32 consumer_counter; + __be32 producer_counter; + u8 rsvd48[8]; + __be64 db_record_addr; +}; + +struct mlx5_create_cq_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 input_cqn; + u8 rsvdx[4]; + struct mlx5_cq_context ctx; + u8 rsvd6[192]; + __be64 pas[0]; +}; + +struct mlx5_create_cq_mbox_out { + struct mlx5_outbox_hdr hdr; + __be32 cqn; + u8 rsvd0[4]; +}; + +struct mlx5_destroy_cq_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 cqn; + u8 rsvd0[4]; +}; + +struct mlx5_destroy_cq_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[8]; +}; + +struct mlx5_query_cq_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 cqn; + u8 rsvd0[4]; +}; + +struct mlx5_query_cq_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[8]; + struct mlx5_cq_context ctx; + u8 rsvd6[16]; + __be64 pas[0]; +}; + +struct mlx5_eq_context { + u8 status; + u8 ec_oi; + u8 st; + u8 rsvd2[7]; + __be16 page_pffset; + __be32 log_sz_usr_page; + u8 rsvd3[7]; + u8 intr; + u8 log_page_size; + u8 rsvd4[15]; + __be32 consumer_counter; + __be32 produser_counter; + u8 rsvd5[16]; +}; + +struct mlx5_create_eq_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd0[3]; + u8 input_eqn; + u8 rsvd1[4]; + struct mlx5_eq_context ctx; + u8 rsvd2[8]; + __be64 events_mask; + u8 rsvd3[176]; + __be64 pas[0]; +}; + +struct mlx5_create_eq_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[3]; + u8 eq_number; + u8 rsvd1[4]; +}; + +struct mlx5_destroy_eq_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd0[3]; + u8 eqn; + u8 rsvd1[4]; +}; + +struct mlx5_destroy_eq_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_map_eq_mbox_in { + struct mlx5_inbox_hdr hdr; + __be64 mask; + u8 mu; + u8 rsvd0[2]; + u8 eqn; + u8 rsvd1[24]; +}; + +struct mlx5_map_eq_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_query_eq_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd0[3]; + u8 eqn; + u8 rsvd1[4]; +}; + +struct mlx5_query_eq_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; + struct mlx5_eq_context ctx; +}; + +struct mlx5_mkey_seg { + /* This is a two bit field occupying bits 31-30. + * bit 31 is always 0, + * bit 30 is zero for regular MRs and 1 (e.g free) for UMRs that do not have tanslation + */ + u8 status; + u8 pcie_control; + u8 flags; + u8 version; + __be32 qpn_mkey7_0; + u8 rsvd1[4]; + __be32 flags_pd; + __be64 start_addr; + __be64 len; + __be32 bsfs_octo_size; + u8 rsvd2[16]; + __be32 xlt_oct_size; + u8 rsvd3[3]; + u8 log2_page_size; + u8 rsvd4[4]; +}; + +struct mlx5_query_special_ctxs_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_query_special_ctxs_mbox_out { + struct mlx5_outbox_hdr hdr; + __be32 dump_fill_mkey; + __be32 reserved_lkey; +}; + +struct mlx5_create_mkey_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 input_mkey_index; + u8 rsvd0[4]; + struct mlx5_mkey_seg seg; + u8 rsvd1[16]; + __be32 xlat_oct_act_size; + __be32 bsf_coto_act_size; + u8 rsvd2[168]; + __be64 pas[0]; +}; + +struct mlx5_create_mkey_mbox_out { + struct mlx5_outbox_hdr hdr; + __be32 mkey; + u8 rsvd[4]; +}; + +struct mlx5_destroy_mkey_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 mkey; + u8 rsvd[4]; +}; + +struct mlx5_destroy_mkey_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_query_mkey_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 mkey; +}; + +struct mlx5_query_mkey_mbox_out { + struct mlx5_outbox_hdr hdr; + __be64 pas[0]; +}; + +struct mlx5_modify_mkey_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 mkey; + __be64 pas[0]; +}; + +struct mlx5_modify_mkey_mbox_out { + struct mlx5_outbox_hdr hdr; +}; + +struct mlx5_dump_mkey_mbox_in { + struct mlx5_inbox_hdr hdr; +}; + +struct mlx5_dump_mkey_mbox_out { + struct mlx5_outbox_hdr hdr; + __be32 mkey; +}; + +struct mlx5_mad_ifc_mbox_in { + struct mlx5_inbox_hdr hdr; + __be16 remote_lid; + u8 rsvd0; + u8 port; + u8 rsvd1[4]; + u8 data[256]; +}; + +struct mlx5_mad_ifc_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; + u8 data[256]; +}; + +struct mlx5_access_reg_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd0[2]; + __be16 register_id; + __be32 arg; + __be32 data[0]; +}; + +struct mlx5_access_reg_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; + __be32 data[0]; +}; + +#define MLX5_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90) + +enum { + MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO = 1 << 0 +}; + +#endif /* MLX5_DEVICE_H */ diff --git a/include/linux/mlx5/doorbell.h b/include/linux/mlx5/doorbell.h new file mode 100644 index 000000000000..163a818411e7 --- /dev/null +++ b/include/linux/mlx5/doorbell.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_DOORBELL_H +#define MLX5_DOORBELL_H + +#define MLX5_BF_OFFSET 0x800 +#define MLX5_CQ_DOORBELL 0x20 + +#if BITS_PER_LONG == 64 +/* Assume that we can just write a 64-bit doorbell atomically. s390 + * actually doesn't have writeq() but S/390 systems don't even have + * PCI so we won't worry about it. + */ + +#define MLX5_DECLARE_DOORBELL_LOCK(name) +#define MLX5_INIT_DOORBELL_LOCK(ptr) do { } while (0) +#define MLX5_GET_DOORBELL_LOCK(ptr) (NULL) + +static inline void mlx5_write64(__be32 val[2], void __iomem *dest, + spinlock_t *doorbell_lock) +{ + __raw_writeq(*(u64 *)val, dest); +} + +#else + +/* Just fall back to a spinlock to protect the doorbell if + * BITS_PER_LONG is 32 -- there's no portable way to do atomic 64-bit + * MMIO writes. + */ + +#define MLX5_DECLARE_DOORBELL_LOCK(name) spinlock_t name; +#define MLX5_INIT_DOORBELL_LOCK(ptr) spin_lock_init(ptr) +#define MLX5_GET_DOORBELL_LOCK(ptr) (ptr) + +static inline void mlx5_write64(__be32 val[2], void __iomem *dest, + spinlock_t *doorbell_lock) +{ + unsigned long flags; + + spin_lock_irqsave(doorbell_lock, flags); + __raw_writel((__force u32) val[0], dest); + __raw_writel((__force u32) val[1], dest + 4); + spin_unlock_irqrestore(doorbell_lock, flags); +} + +#endif + +#endif /* MLX5_DOORBELL_H */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h new file mode 100644 index 000000000000..e47f1e4c9b03 --- /dev/null +++ b/include/linux/mlx5/driver.h @@ -0,0 +1,769 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_DRIVER_H +#define MLX5_DRIVER_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +enum { + MLX5_BOARD_ID_LEN = 64, + MLX5_MAX_NAME_LEN = 16, +}; + +enum { + /* one minute for the sake of bringup. Generally, commands must always + * complete and we may need to increase this timeout value + */ + MLX5_CMD_TIMEOUT_MSEC = 7200 * 1000, + MLX5_CMD_WQ_MAX_NAME = 32, +}; + +enum { + CMD_OWNER_SW = 0x0, + CMD_OWNER_HW = 0x1, + CMD_STATUS_SUCCESS = 0, +}; + +enum mlx5_sqp_t { + MLX5_SQP_SMI = 0, + MLX5_SQP_GSI = 1, + MLX5_SQP_IEEE_1588 = 2, + MLX5_SQP_SNIFFER = 3, + MLX5_SQP_SYNC_UMR = 4, +}; + +enum { + MLX5_MAX_PORTS = 2, +}; + +enum { + MLX5_EQ_VEC_PAGES = 0, + MLX5_EQ_VEC_CMD = 1, + MLX5_EQ_VEC_ASYNC = 2, + MLX5_EQ_VEC_COMP_BASE, +}; + +enum { + MLX5_MAX_EQ_NAME = 20 +}; + +enum { + MLX5_ATOMIC_MODE_IB_COMP = 1 << 16, + MLX5_ATOMIC_MODE_CX = 2 << 16, + MLX5_ATOMIC_MODE_8B = 3 << 16, + MLX5_ATOMIC_MODE_16B = 4 << 16, + MLX5_ATOMIC_MODE_32B = 5 << 16, + MLX5_ATOMIC_MODE_64B = 6 << 16, + MLX5_ATOMIC_MODE_128B = 7 << 16, + MLX5_ATOMIC_MODE_256B = 8 << 16, +}; + +enum { + MLX5_CMD_OP_QUERY_HCA_CAP = 0x100, + MLX5_CMD_OP_QUERY_ADAPTER = 0x101, + MLX5_CMD_OP_INIT_HCA = 0x102, + MLX5_CMD_OP_TEARDOWN_HCA = 0x103, + MLX5_CMD_OP_QUERY_PAGES = 0x107, + MLX5_CMD_OP_MANAGE_PAGES = 0x108, + MLX5_CMD_OP_SET_HCA_CAP = 0x109, + + MLX5_CMD_OP_CREATE_MKEY = 0x200, + MLX5_CMD_OP_QUERY_MKEY = 0x201, + MLX5_CMD_OP_DESTROY_MKEY = 0x202, + MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS = 0x203, + + MLX5_CMD_OP_CREATE_EQ = 0x301, + MLX5_CMD_OP_DESTROY_EQ = 0x302, + MLX5_CMD_OP_QUERY_EQ = 0x303, + + MLX5_CMD_OP_CREATE_CQ = 0x400, + MLX5_CMD_OP_DESTROY_CQ = 0x401, + MLX5_CMD_OP_QUERY_CQ = 0x402, + MLX5_CMD_OP_MODIFY_CQ = 0x403, + + MLX5_CMD_OP_CREATE_QP = 0x500, + MLX5_CMD_OP_DESTROY_QP = 0x501, + MLX5_CMD_OP_RST2INIT_QP = 0x502, + MLX5_CMD_OP_INIT2RTR_QP = 0x503, + MLX5_CMD_OP_RTR2RTS_QP = 0x504, + MLX5_CMD_OP_RTS2RTS_QP = 0x505, + MLX5_CMD_OP_SQERR2RTS_QP = 0x506, + MLX5_CMD_OP_2ERR_QP = 0x507, + MLX5_CMD_OP_RTS2SQD_QP = 0x508, + MLX5_CMD_OP_SQD2RTS_QP = 0x509, + MLX5_CMD_OP_2RST_QP = 0x50a, + MLX5_CMD_OP_QUERY_QP = 0x50b, + MLX5_CMD_OP_CONF_SQP = 0x50c, + MLX5_CMD_OP_MAD_IFC = 0x50d, + MLX5_CMD_OP_INIT2INIT_QP = 0x50e, + MLX5_CMD_OP_SUSPEND_QP = 0x50f, + MLX5_CMD_OP_UNSUSPEND_QP = 0x510, + MLX5_CMD_OP_SQD2SQD_QP = 0x511, + MLX5_CMD_OP_ALLOC_QP_COUNTER_SET = 0x512, + MLX5_CMD_OP_DEALLOC_QP_COUNTER_SET = 0x513, + MLX5_CMD_OP_QUERY_QP_COUNTER_SET = 0x514, + + MLX5_CMD_OP_CREATE_PSV = 0x600, + MLX5_CMD_OP_DESTROY_PSV = 0x601, + MLX5_CMD_OP_QUERY_PSV = 0x602, + MLX5_CMD_OP_QUERY_SIG_RULE_TABLE = 0x603, + MLX5_CMD_OP_QUERY_BLOCK_SIZE_TABLE = 0x604, + + MLX5_CMD_OP_CREATE_SRQ = 0x700, + MLX5_CMD_OP_DESTROY_SRQ = 0x701, + MLX5_CMD_OP_QUERY_SRQ = 0x702, + MLX5_CMD_OP_ARM_RQ = 0x703, + MLX5_CMD_OP_RESIZE_SRQ = 0x704, + + MLX5_CMD_OP_ALLOC_PD = 0x800, + MLX5_CMD_OP_DEALLOC_PD = 0x801, + MLX5_CMD_OP_ALLOC_UAR = 0x802, + MLX5_CMD_OP_DEALLOC_UAR = 0x803, + + MLX5_CMD_OP_ATTACH_TO_MCG = 0x806, + MLX5_CMD_OP_DETACH_FROM_MCG = 0x807, + + + MLX5_CMD_OP_ALLOC_XRCD = 0x80e, + MLX5_CMD_OP_DEALLOC_XRCD = 0x80f, + + MLX5_CMD_OP_ACCESS_REG = 0x805, + MLX5_CMD_OP_MAX = 0x810, +}; + +enum { + MLX5_REG_PCAP = 0x5001, + MLX5_REG_PMTU = 0x5003, + MLX5_REG_PTYS = 0x5004, + MLX5_REG_PAOS = 0x5006, + MLX5_REG_PMAOS = 0x5012, + MLX5_REG_PUDE = 0x5009, + MLX5_REG_PMPE = 0x5010, + MLX5_REG_PELC = 0x500e, + MLX5_REG_PMLP = 0, /* TBD */ + MLX5_REG_NODE_DESC = 0x6001, + MLX5_REG_HOST_ENDIANNESS = 0x7004, +}; + +enum dbg_rsc_type { + MLX5_DBG_RSC_QP, + MLX5_DBG_RSC_EQ, + MLX5_DBG_RSC_CQ, +}; + +struct mlx5_field_desc { + struct dentry *dent; + int i; +}; + +struct mlx5_rsc_debug { + struct mlx5_core_dev *dev; + void *object; + enum dbg_rsc_type type; + struct dentry *root; + struct mlx5_field_desc fields[0]; +}; + +enum mlx5_dev_event { + MLX5_DEV_EVENT_SYS_ERROR, + MLX5_DEV_EVENT_PORT_UP, + MLX5_DEV_EVENT_PORT_DOWN, + MLX5_DEV_EVENT_PORT_INITIALIZED, + MLX5_DEV_EVENT_LID_CHANGE, + MLX5_DEV_EVENT_PKEY_CHANGE, + MLX5_DEV_EVENT_GUID_CHANGE, + MLX5_DEV_EVENT_CLIENT_REREG, +}; + +struct mlx5_uuar_info { + struct mlx5_uar *uars; + int num_uars; + int num_low_latency_uuars; + unsigned long *bitmap; + unsigned int *count; + struct mlx5_bf *bfs; + + /* + * protect uuar allocation data structs + */ + struct mutex lock; +}; + +struct mlx5_bf { + void __iomem *reg; + void __iomem *regreg; + int buf_size; + struct mlx5_uar *uar; + unsigned long offset; + int need_lock; + /* protect blue flame buffer selection when needed + */ + spinlock_t lock; + + /* serialize 64 bit writes when done as two 32 bit accesses + */ + spinlock_t lock32; + int uuarn; +}; + +struct mlx5_cmd_first { + __be32 data[4]; +}; + +struct mlx5_cmd_msg { + struct list_head list; + struct cache_ent *cache; + u32 len; + struct mlx5_cmd_first first; + struct mlx5_cmd_mailbox *next; +}; + +struct mlx5_cmd_debug { + struct dentry *dbg_root; + struct dentry *dbg_in; + struct dentry *dbg_out; + struct dentry *dbg_outlen; + struct dentry *dbg_status; + struct dentry *dbg_run; + void *in_msg; + void *out_msg; + u8 status; + u16 inlen; + u16 outlen; +}; + +struct cache_ent { + /* protect block chain allocations + */ + spinlock_t lock; + struct list_head head; +}; + +struct cmd_msg_cache { + struct cache_ent large; + struct cache_ent med; + +}; + +struct mlx5_cmd_stats { + u64 sum; + u64 n; + struct dentry *root; + struct dentry *avg; + struct dentry *count; + /* protect command average calculations */ + spinlock_t lock; +}; + +struct mlx5_cmd { + void *cmd_buf; + dma_addr_t dma; + u16 cmdif_rev; + u8 log_sz; + u8 log_stride; + int max_reg_cmds; + int events; + u32 __iomem *vector; + + /* protect command queue allocations + */ + spinlock_t alloc_lock; + + /* protect token allocations + */ + spinlock_t token_lock; + u8 token; + unsigned long bitmask; + char wq_name[MLX5_CMD_WQ_MAX_NAME]; + struct workqueue_struct *wq; + struct semaphore sem; + struct semaphore pages_sem; + int mode; + struct mlx5_cmd_work_ent *ent_arr[MLX5_MAX_COMMANDS]; + struct pci_pool *pool; + struct mlx5_cmd_debug dbg; + struct cmd_msg_cache cache; + int checksum_disabled; + struct mlx5_cmd_stats stats[MLX5_CMD_OP_MAX]; +}; + +struct mlx5_port_caps { + int gid_table_len; + int pkey_table_len; +}; + +struct mlx5_caps { + u8 log_max_eq; + u8 log_max_cq; + u8 log_max_qp; + u8 log_max_mkey; + u8 log_max_pd; + u8 log_max_srq; + u32 max_cqes; + int max_wqes; + int max_sq_desc_sz; + int max_rq_desc_sz; + u64 flags; + u16 stat_rate_support; + int log_max_msg; + int num_ports; + int max_ra_res_qp; + int max_ra_req_qp; + int max_srq_wqes; + int bf_reg_size; + int bf_regs_per_page; + struct mlx5_port_caps port[MLX5_MAX_PORTS]; + u8 ext_port_cap[MLX5_MAX_PORTS]; + int max_vf; + u32 reserved_lkey; + u8 local_ca_ack_delay; + u8 log_max_mcg; + u16 max_qp_mcg; + int min_page_sz; +}; + +struct mlx5_cmd_mailbox { + void *buf; + dma_addr_t dma; + struct mlx5_cmd_mailbox *next; +}; + +struct mlx5_buf_list { + void *buf; + dma_addr_t map; +}; + +struct mlx5_buf { + struct mlx5_buf_list direct; + struct mlx5_buf_list *page_list; + int nbufs; + int npages; + int page_shift; + int size; +}; + +struct mlx5_eq { + struct mlx5_core_dev *dev; + __be32 __iomem *doorbell; + u32 cons_index; + struct mlx5_buf buf; + int size; + u8 irqn; + u8 eqn; + int nent; + u64 mask; + char name[MLX5_MAX_EQ_NAME]; + struct list_head list; + int index; + struct mlx5_rsc_debug *dbg; +}; + + +struct mlx5_core_mr { + u64 iova; + u64 size; + u32 key; + u32 pd; + u32 access; +}; + +struct mlx5_core_srq { + u32 srqn; + int max; + int max_gs; + int max_avail_gather; + int wqe_shift; + void (*event) (struct mlx5_core_srq *, enum mlx5_event); + + atomic_t refcount; + struct completion free; +}; + +struct mlx5_eq_table { + void __iomem *update_ci; + void __iomem *update_arm_ci; + struct list_head *comp_eq_head; + struct mlx5_eq pages_eq; + struct mlx5_eq async_eq; + struct mlx5_eq cmd_eq; + struct msix_entry *msix_arr; + int num_comp_vectors; + /* protect EQs list + */ + spinlock_t lock; +}; + +struct mlx5_uar { + u32 index; + struct list_head bf_list; + unsigned free_bf_bmap; + void __iomem *wc_map; + void __iomem *map; +}; + + +struct mlx5_core_health { + struct health_buffer __iomem *health; + __be32 __iomem *health_counter; + struct timer_list timer; + struct list_head list; + u32 prev; + int miss_counter; +}; + +struct mlx5_cq_table { + /* protect radix tree + */ + spinlock_t lock; + struct radix_tree_root tree; +}; + +struct mlx5_qp_table { + /* protect radix tree + */ + spinlock_t lock; + struct radix_tree_root tree; +}; + +struct mlx5_srq_table { + /* protect radix tree + */ + spinlock_t lock; + struct radix_tree_root tree; +}; + +struct mlx5_priv { + char name[MLX5_MAX_NAME_LEN]; + struct mlx5_eq_table eq_table; + struct mlx5_uuar_info uuari; + MLX5_DECLARE_DOORBELL_LOCK(cq_uar_lock); + + /* pages stuff */ + struct workqueue_struct *pg_wq; + struct rb_root page_root; + int fw_pages; + int reg_pages; + + struct mlx5_core_health health; + + struct mlx5_srq_table srq_table; + + /* start: qp staff */ + struct mlx5_qp_table qp_table; + struct dentry *qp_debugfs; + struct dentry *eq_debugfs; + struct dentry *cq_debugfs; + struct dentry *cmdif_debugfs; + /* end: qp staff */ + + /* start: cq staff */ + struct mlx5_cq_table cq_table; + /* end: cq staff */ + + /* start: alloc staff */ + struct mutex pgdir_mutex; + struct list_head pgdir_list; + /* end: alloc staff */ + struct dentry *dbg_root; + + /* protect mkey key part */ + spinlock_t mkey_lock; + u8 mkey_key; +}; + +struct mlx5_core_dev { + struct pci_dev *pdev; + u8 rev_id; + char board_id[MLX5_BOARD_ID_LEN]; + struct mlx5_cmd cmd; + struct mlx5_caps caps; + phys_addr_t iseg_base; + struct mlx5_init_seg __iomem *iseg; + void (*event) (struct mlx5_core_dev *dev, + enum mlx5_dev_event event, + void *data); + struct mlx5_priv priv; + struct mlx5_profile *profile; + atomic_t num_qps; +}; + +struct mlx5_db { + __be32 *db; + union { + struct mlx5_db_pgdir *pgdir; + struct mlx5_ib_user_db_page *user_page; + } u; + dma_addr_t dma; + int index; +}; + +enum { + MLX5_DB_PER_PAGE = PAGE_SIZE / L1_CACHE_BYTES, +}; + +enum { + MLX5_COMP_EQ_SIZE = 1024, +}; + +struct mlx5_db_pgdir { + struct list_head list; + DECLARE_BITMAP(bitmap, MLX5_DB_PER_PAGE); + __be32 *db_page; + dma_addr_t db_dma; +}; + +typedef void (*mlx5_cmd_cbk_t)(int status, void *context); + +struct mlx5_cmd_work_ent { + struct mlx5_cmd_msg *in; + struct mlx5_cmd_msg *out; + mlx5_cmd_cbk_t callback; + void *context; + int idx; + struct completion done; + struct mlx5_cmd *cmd; + struct work_struct work; + struct mlx5_cmd_layout *lay; + int ret; + int page_queue; + u8 status; + u8 token; + struct timespec ts1; + struct timespec ts2; +}; + +struct mlx5_pas { + u64 pa; + u8 log_sz; +}; + +static inline void *mlx5_buf_offset(struct mlx5_buf *buf, int offset) +{ + if (likely(BITS_PER_LONG == 64 || buf->nbufs == 1)) + return buf->direct.buf + offset; + else + return buf->page_list[offset >> PAGE_SHIFT].buf + + (offset & (PAGE_SIZE - 1)); +} + +extern struct workqueue_struct *mlx5_core_wq; + +#define STRUCT_FIELD(header, field) \ + .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \ + .struct_size_bytes = sizeof((struct ib_unpacked_ ## header *)0)->field + +struct ib_field { + size_t struct_offset_bytes; + size_t struct_size_bytes; + int offset_bits; + int size_bits; +}; + +static inline struct mlx5_core_dev *pci2mlx5_core_dev(struct pci_dev *pdev) +{ + return pci_get_drvdata(pdev); +} + +extern struct dentry *mlx5_debugfs_root; + +static inline u16 fw_rev_maj(struct mlx5_core_dev *dev) +{ + return ioread32be(&dev->iseg->fw_rev) & 0xffff; +} + +static inline u16 fw_rev_min(struct mlx5_core_dev *dev) +{ + return ioread32be(&dev->iseg->fw_rev) >> 16; +} + +static inline u16 fw_rev_sub(struct mlx5_core_dev *dev) +{ + return ioread32be(&dev->iseg->cmdif_rev_fw_sub) & 0xffff; +} + +static inline u16 cmdif_rev(struct mlx5_core_dev *dev) +{ + return ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16; +} + +static inline void *mlx5_vzalloc(unsigned long size) +{ + void *rtn; + + rtn = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); + if (!rtn) + rtn = vzalloc(size); + return rtn; +} + +static inline void mlx5_vfree(const void *addr) +{ + if (addr && is_vmalloc_addr(addr)) + vfree(addr); + else + kfree(addr); +} + +int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev); +void mlx5_dev_cleanup(struct mlx5_core_dev *dev); +int mlx5_cmd_init(struct mlx5_core_dev *dev); +void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); +void mlx5_cmd_use_events(struct mlx5_core_dev *dev); +void mlx5_cmd_use_polling(struct mlx5_core_dev *dev); +int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr); +int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, + int out_size); +int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn); +int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn); +int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); +int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); +void mlx5_health_cleanup(void); +void __init mlx5_health_init(void); +void mlx5_start_health_poll(struct mlx5_core_dev *dev); +void mlx5_stop_health_poll(struct mlx5_core_dev *dev); +int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, int max_direct, + struct mlx5_buf *buf); +void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf); +struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, + gfp_t flags, int npages); +void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev, + struct mlx5_cmd_mailbox *head); +int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_create_srq_mbox_in *in, int inlen); +int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq); +int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_query_srq_mbox_out *out); +int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + u16 lwm, int is_srq); +int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, + struct mlx5_create_mkey_mbox_in *in, int inlen); +int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr); +int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, + struct mlx5_query_mkey_mbox_out *out, int outlen); +int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, + u32 *mkey); +int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn); +int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn); +int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, void *inb, void *outb, + u16 opmod, int port); +void mlx5_pagealloc_init(struct mlx5_core_dev *dev); +void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); +int mlx5_pagealloc_start(struct mlx5_core_dev *dev); +void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); +void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, + s16 npages); +int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev); +int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev); +void mlx5_register_debugfs(void); +void mlx5_unregister_debugfs(void); +int mlx5_eq_init(struct mlx5_core_dev *dev); +void mlx5_eq_cleanup(struct mlx5_core_dev *dev); +void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas); +void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn); +void mlx5_qp_event(struct mlx5_core_dev *dev, u32 qpn, int event_type); +void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type); +struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); +void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector); +void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type); +int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, + int nent, u64 mask, const char *name, struct mlx5_uar *uar); +int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +int mlx5_start_eqs(struct mlx5_core_dev *dev); +int mlx5_stop_eqs(struct mlx5_core_dev *dev); +int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); +int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); + +int mlx5_qp_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev); +int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, + int size_in, void *data_out, int size_out, + u16 reg_num, int arg, int write); +int mlx5_set_port_caps(struct mlx5_core_dev *dev, int port_num, u32 caps); + +int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct mlx5_query_eq_mbox_out *out, int outlen); +int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev); +int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev); +int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db); +void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db); + +typedef void (*health_handler_t)(struct pci_dev *pdev, void *buf, int size); +int mlx5_register_health_report_handler(health_handler_t handler); +void mlx5_unregister_health_report_handler(void); +const char *mlx5_command_str(int command); +int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev); + +static inline u32 mlx5_mkey_to_idx(u32 mkey) +{ + return mkey >> 8; +} + +static inline u32 mlx5_idx_to_mkey(u32 mkey_idx) +{ + return mkey_idx << 8; +} + +enum { + MLX5_PROF_MASK_QP_SIZE = (u64)1 << 0, + MLX5_PROF_MASK_CMDIF_CSUM = (u64)1 << 1, + MLX5_PROF_MASK_MR_CACHE = (u64)1 << 2, +}; + +enum { + MAX_MR_CACHE_ENTRIES = 16, +}; + +struct mlx5_profile { + u64 mask; + u32 log_max_qp; + int cmdif_csum; + struct { + int size; + int limit; + } mr_cache[MAX_MR_CACHE_ENTRIES]; +}; + +#endif /* MLX5_DRIVER_H */ diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h new file mode 100644 index 000000000000..d9e3eacb3a7f --- /dev/null +++ b/include/linux/mlx5/qp.h @@ -0,0 +1,467 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_QP_H +#define MLX5_QP_H + +#include +#include + +#define MLX5_INVALID_LKEY 0x100 + +enum mlx5_qp_optpar { + MLX5_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0, + MLX5_QP_OPTPAR_RRE = 1 << 1, + MLX5_QP_OPTPAR_RAE = 1 << 2, + MLX5_QP_OPTPAR_RWE = 1 << 3, + MLX5_QP_OPTPAR_PKEY_INDEX = 1 << 4, + MLX5_QP_OPTPAR_Q_KEY = 1 << 5, + MLX5_QP_OPTPAR_RNR_TIMEOUT = 1 << 6, + MLX5_QP_OPTPAR_PRIMARY_ADDR_PATH = 1 << 7, + MLX5_QP_OPTPAR_SRA_MAX = 1 << 8, + MLX5_QP_OPTPAR_RRA_MAX = 1 << 9, + MLX5_QP_OPTPAR_PM_STATE = 1 << 10, + MLX5_QP_OPTPAR_RETRY_COUNT = 1 << 12, + MLX5_QP_OPTPAR_RNR_RETRY = 1 << 13, + MLX5_QP_OPTPAR_ACK_TIMEOUT = 1 << 14, + MLX5_QP_OPTPAR_PRI_PORT = 1 << 16, + MLX5_QP_OPTPAR_SRQN = 1 << 18, + MLX5_QP_OPTPAR_CQN_RCV = 1 << 19, + MLX5_QP_OPTPAR_DC_HS = 1 << 20, + MLX5_QP_OPTPAR_DC_KEY = 1 << 21, +}; + +enum mlx5_qp_state { + MLX5_QP_STATE_RST = 0, + MLX5_QP_STATE_INIT = 1, + MLX5_QP_STATE_RTR = 2, + MLX5_QP_STATE_RTS = 3, + MLX5_QP_STATE_SQER = 4, + MLX5_QP_STATE_SQD = 5, + MLX5_QP_STATE_ERR = 6, + MLX5_QP_STATE_SQ_DRAINING = 7, + MLX5_QP_STATE_SUSPENDED = 9, + MLX5_QP_NUM_STATE +}; + +enum { + MLX5_QP_ST_RC = 0x0, + MLX5_QP_ST_UC = 0x1, + MLX5_QP_ST_UD = 0x2, + MLX5_QP_ST_XRC = 0x3, + MLX5_QP_ST_MLX = 0x4, + MLX5_QP_ST_DCI = 0x5, + MLX5_QP_ST_DCT = 0x6, + MLX5_QP_ST_QP0 = 0x7, + MLX5_QP_ST_QP1 = 0x8, + MLX5_QP_ST_RAW_ETHERTYPE = 0x9, + MLX5_QP_ST_RAW_IPV6 = 0xa, + MLX5_QP_ST_SNIFFER = 0xb, + MLX5_QP_ST_SYNC_UMR = 0xe, + MLX5_QP_ST_PTP_1588 = 0xd, + MLX5_QP_ST_REG_UMR = 0xc, + MLX5_QP_ST_MAX +}; + +enum { + MLX5_QP_PM_MIGRATED = 0x3, + MLX5_QP_PM_ARMED = 0x0, + MLX5_QP_PM_REARM = 0x1 +}; + +enum { + MLX5_NON_ZERO_RQ = 0 << 24, + MLX5_SRQ_RQ = 1 << 24, + MLX5_CRQ_RQ = 2 << 24, + MLX5_ZERO_LEN_RQ = 3 << 24 +}; + +enum { + /* params1 */ + MLX5_QP_BIT_SRE = 1 << 15, + MLX5_QP_BIT_SWE = 1 << 14, + MLX5_QP_BIT_SAE = 1 << 13, + /* params2 */ + MLX5_QP_BIT_RRE = 1 << 15, + MLX5_QP_BIT_RWE = 1 << 14, + MLX5_QP_BIT_RAE = 1 << 13, + MLX5_QP_BIT_RIC = 1 << 4, +}; + +enum { + MLX5_WQE_CTRL_CQ_UPDATE = 2 << 2, + MLX5_WQE_CTRL_SOLICITED = 1 << 1, +}; + +enum { + MLX5_SEND_WQE_BB = 64, +}; + +enum { + MLX5_WQE_FMR_PERM_LOCAL_READ = 1 << 27, + MLX5_WQE_FMR_PERM_LOCAL_WRITE = 1 << 28, + MLX5_WQE_FMR_PERM_REMOTE_READ = 1 << 29, + MLX5_WQE_FMR_PERM_REMOTE_WRITE = 1 << 30, + MLX5_WQE_FMR_PERM_ATOMIC = 1 << 31 +}; + +enum { + MLX5_FENCE_MODE_NONE = 0 << 5, + MLX5_FENCE_MODE_INITIATOR_SMALL = 1 << 5, + MLX5_FENCE_MODE_STRONG_ORDERING = 3 << 5, + MLX5_FENCE_MODE_SMALL_AND_FENCE = 4 << 5, +}; + +enum { + MLX5_QP_LAT_SENSITIVE = 1 << 28, + MLX5_QP_ENABLE_SIG = 1 << 31, +}; + +enum { + MLX5_RCV_DBR = 0, + MLX5_SND_DBR = 1, +}; + +struct mlx5_wqe_fmr_seg { + __be32 flags; + __be32 mem_key; + __be64 buf_list; + __be64 start_addr; + __be64 reg_len; + __be32 offset; + __be32 page_size; + u32 reserved[2]; +}; + +struct mlx5_wqe_ctrl_seg { + __be32 opmod_idx_opcode; + __be32 qpn_ds; + u8 signature; + u8 rsvd[2]; + u8 fm_ce_se; + __be32 imm; +}; + +struct mlx5_wqe_xrc_seg { + __be32 xrc_srqn; + u8 rsvd[12]; +}; + +struct mlx5_wqe_masked_atomic_seg { + __be64 swap_add; + __be64 compare; + __be64 swap_add_mask; + __be64 compare_mask; +}; + +struct mlx5_av { + union { + struct { + __be32 qkey; + __be32 reserved; + } qkey; + __be64 dc_key; + } key; + __be32 dqp_dct; + u8 stat_rate_sl; + u8 fl_mlid; + __be16 rlid; + u8 reserved0[10]; + u8 tclass; + u8 hop_limit; + __be32 grh_gid_fl; + u8 rgid[16]; +}; + +struct mlx5_wqe_datagram_seg { + struct mlx5_av av; +}; + +struct mlx5_wqe_raddr_seg { + __be64 raddr; + __be32 rkey; + u32 reserved; +}; + +struct mlx5_wqe_atomic_seg { + __be64 swap_add; + __be64 compare; +}; + +struct mlx5_wqe_data_seg { + __be32 byte_count; + __be32 lkey; + __be64 addr; +}; + +struct mlx5_wqe_umr_ctrl_seg { + u8 flags; + u8 rsvd0[3]; + __be16 klm_octowords; + __be16 bsf_octowords; + __be64 mkey_mask; + u8 rsvd1[32]; +}; + +struct mlx5_seg_set_psv { + __be32 psv_num; + __be16 syndrome; + __be16 status; + __be32 transient_sig; + __be32 ref_tag; +}; + +struct mlx5_seg_get_psv { + u8 rsvd[19]; + u8 num_psv; + __be32 l_key; + __be64 va; + __be32 psv_index[4]; +}; + +struct mlx5_seg_check_psv { + u8 rsvd0[2]; + __be16 err_coalescing_op; + u8 rsvd1[2]; + __be16 xport_err_op; + u8 rsvd2[2]; + __be16 xport_err_mask; + u8 rsvd3[7]; + u8 num_psv; + __be32 l_key; + __be64 va; + __be32 psv_index[4]; +}; + +struct mlx5_rwqe_sig { + u8 rsvd0[4]; + u8 signature; + u8 rsvd1[11]; +}; + +struct mlx5_wqe_signature_seg { + u8 rsvd0[4]; + u8 signature; + u8 rsvd1[11]; +}; + +struct mlx5_wqe_inline_seg { + __be32 byte_count; +}; + +struct mlx5_core_qp { + void (*event) (struct mlx5_core_qp *, int); + int qpn; + atomic_t refcount; + struct completion free; + struct mlx5_rsc_debug *dbg; + int pid; +}; + +struct mlx5_qp_path { + u8 fl; + u8 rsvd3; + u8 free_ar; + u8 pkey_index; + u8 rsvd0; + u8 grh_mlid; + __be16 rlid; + u8 ackto_lt; + u8 mgid_index; + u8 static_rate; + u8 hop_limit; + __be32 tclass_flowlabel; + u8 rgid[16]; + u8 rsvd1[4]; + u8 sl; + u8 port; + u8 rsvd2[6]; +}; + +struct mlx5_qp_context { + __be32 flags; + __be32 flags_pd; + u8 mtu_msgmax; + u8 rq_size_stride; + __be16 sq_crq_size; + __be32 qp_counter_set_usr_page; + __be32 wire_qpn; + __be32 log_pg_sz_remote_qpn; + struct mlx5_qp_path pri_path; + struct mlx5_qp_path alt_path; + __be32 params1; + u8 reserved2[4]; + __be32 next_send_psn; + __be32 cqn_send; + u8 reserved3[8]; + __be32 last_acked_psn; + __be32 ssn; + __be32 params2; + __be32 rnr_nextrecvpsn; + __be32 xrcd; + __be32 cqn_recv; + __be64 db_rec_addr; + __be32 qkey; + __be32 rq_type_srqn; + __be32 rmsn; + __be16 hw_sq_wqe_counter; + __be16 sw_sq_wqe_counter; + __be16 hw_rcyclic_byte_counter; + __be16 hw_rq_counter; + __be16 sw_rcyclic_byte_counter; + __be16 sw_rq_counter; + u8 rsvd0[5]; + u8 cgs; + u8 cs_req; + u8 cs_res; + __be64 dc_access_key; + u8 rsvd1[24]; +}; + +struct mlx5_create_qp_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 input_qpn; + u8 rsvd0[4]; + __be32 opt_param_mask; + u8 rsvd1[4]; + struct mlx5_qp_context ctx; + u8 rsvd3[16]; + __be64 pas[0]; +}; + +struct mlx5_create_qp_mbox_out { + struct mlx5_outbox_hdr hdr; + __be32 qpn; + u8 rsvd0[4]; +}; + +struct mlx5_destroy_qp_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 qpn; + u8 rsvd0[4]; +}; + +struct mlx5_destroy_qp_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[8]; +}; + +struct mlx5_modify_qp_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 qpn; + u8 rsvd1[4]; + __be32 optparam; + u8 rsvd0[4]; + struct mlx5_qp_context ctx; +}; + +struct mlx5_modify_qp_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[8]; +}; + +struct mlx5_query_qp_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 qpn; + u8 rsvd[4]; +}; + +struct mlx5_query_qp_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd1[8]; + __be32 optparam; + u8 rsvd0[4]; + struct mlx5_qp_context ctx; + u8 rsvd2[16]; + __be64 pas[0]; +}; + +struct mlx5_conf_sqp_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 qpn; + u8 rsvd[3]; + u8 type; +}; + +struct mlx5_conf_sqp_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_alloc_xrcd_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_alloc_xrcd_mbox_out { + struct mlx5_outbox_hdr hdr; + __be32 xrcdn; + u8 rsvd[4]; +}; + +struct mlx5_dealloc_xrcd_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 xrcdn; + u8 rsvd[4]; +}; + +struct mlx5_dealloc_xrcd_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u32 qpn) +{ + return radix_tree_lookup(&dev->priv.qp_table.tree, qpn); +} + +int mlx5_core_create_qp(struct mlx5_core_dev *dev, + struct mlx5_core_qp *qp, + struct mlx5_create_qp_mbox_in *in, + int inlen); +int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state, + enum mlx5_qp_state new_state, + struct mlx5_modify_qp_mbox_in *in, int sqd_event, + struct mlx5_core_qp *qp); +int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, + struct mlx5_core_qp *qp); +int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, + struct mlx5_query_qp_mbox_out *out, int outlen); + +int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn); +int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn); +void mlx5_init_qp_table(struct mlx5_core_dev *dev); +void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev); +int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); +void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); + +#endif /* MLX5_QP_H */ diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h new file mode 100644 index 000000000000..e1a363a33663 --- /dev/null +++ b/include/linux/mlx5/srq.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_SRQ_H +#define MLX5_SRQ_H + +#include + +void mlx5_init_srq_table(struct mlx5_core_dev *dev); +void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev); + +#endif /* MLX5_SRQ_H */ -- cgit v1.2.3 From 63884c90ffa3f73a81b81f169c51c34d2b9cf75e Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 1 Jul 2013 14:15:17 -0700 Subject: mlx5: Fix parameter type of health_handler_t This deals with the sparse warning: drivers/net/ethernet/mellanox/mlx5/core/health.c:94:54: warning: incorrect type in argument 2 (different address spaces) drivers/net/ethernet/mellanox/mlx5/core/health.c:94:54: expected void *buf drivers/net/ethernet/mellanox/mlx5/core/health.c:94:54: got struct health_buffer [noderef] *health Signed-off-by: Roland Dreier --- include/linux/mlx5/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e47f1e4c9b03..f22e4419839b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -729,7 +729,7 @@ void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db); void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db); -typedef void (*health_handler_t)(struct pci_dev *pdev, void *buf, int size); +typedef void (*health_handler_t)(struct pci_dev *pdev, struct health_buffer __iomem *buf, int size); int mlx5_register_health_report_handler(health_handler_t handler); void mlx5_unregister_health_report_handler(void); const char *mlx5_command_str(int command); -- cgit v1.2.3 From ad32b95f8281ac5a292da83ff27616aceee4b770 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 8 Jul 2013 00:13:35 -0700 Subject: IB/mlx5: Make profile[] static in main.c Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 6b1007f9bc29..8000fff4d444 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -62,7 +62,7 @@ static char mlx5_version[] = DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v" DRIVER_VERSION " (" DRIVER_RELDATE ")\n"; -struct mlx5_profile profile[] = { +static struct mlx5_profile profile[] = { [0] = { .mask = 0, }, -- cgit v1.2.3 From 582c016e68dc5dfea4d3582512157f165a428149 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 8 Jul 2013 10:52:28 -0700 Subject: mlx5_core: Fixes for sparse warnings - use be32_to_cpu() instead of cpu_to_be32() where appropriate. - use proper accessors for pointers marked __iomem. Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx5/core/debugfs.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/health.c | 28 +++++++++++++++-------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index 5e9cf2b9aaf7..a550a8ef94a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -323,11 +323,11 @@ static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, param = 0; break; case QP_LOG_PG_SZ: - param = ((cpu_to_be32(ctx->log_pg_sz_remote_qpn) >> 24) & 0x1f); + param = (be32_to_cpu(ctx->log_pg_sz_remote_qpn) >> 24) & 0x1f; param += 12; break; case QP_RQPN: - param = cpu_to_be32(ctx->log_pg_sz_remote_qpn) & 0xffffff; + param = be32_to_cpu(ctx->log_pg_sz_remote_qpn) & 0xffffff; break; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index ea4b9bca6d4a..748f10a155c4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -132,6 +132,16 @@ static const char *hsynd_str(u8 synd) } } +static u16 read_be16(__be16 __iomem *p) +{ + return swab16(readl((__force u16 __iomem *) p)); +} + +static u32 read_be32(__be32 __iomem *p) +{ + return swab32(readl((__force u32 __iomem *) p)); +} + static void print_health_info(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; @@ -139,15 +149,15 @@ static void print_health_info(struct mlx5_core_dev *dev) int i; for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) - pr_info("assert_var[%d] 0x%08x\n", i, be32_to_cpu(h->assert_var[i])); - - pr_info("assert_exit_ptr 0x%08x\n", be32_to_cpu(h->assert_exit_ptr)); - pr_info("assert_callra 0x%08x\n", be32_to_cpu(h->assert_callra)); - pr_info("fw_ver 0x%08x\n", be32_to_cpu(h->fw_ver)); - pr_info("hw_id 0x%08x\n", be32_to_cpu(h->hw_id)); - pr_info("irisc_index %d\n", h->irisc_index); - pr_info("synd 0x%x: %s\n", h->synd, hsynd_str(h->synd)); - pr_info("ext_sync 0x%04x\n", be16_to_cpu(h->ext_sync)); + pr_info("assert_var[%d] 0x%08x\n", i, read_be32(h->assert_var + i)); + + pr_info("assert_exit_ptr 0x%08x\n", read_be32(&h->assert_exit_ptr)); + pr_info("assert_callra 0x%08x\n", read_be32(&h->assert_callra)); + pr_info("fw_ver 0x%08x\n", read_be32(&h->fw_ver)); + pr_info("hw_id 0x%08x\n", read_be32(&h->hw_id)); + pr_info("irisc_index %d\n", readb(&h->irisc_index)); + pr_info("synd 0x%x: %s\n", readb(&h->synd), hsynd_str(readb(&h->synd))); + pr_info("ext_sync 0x%04x\n", read_be16(&h->ext_sync)); } static void poll_health(unsigned long data) -- cgit v1.2.3 From da183c7af8444cb2c1beedaa498a9359f19ff665 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 8 Jul 2013 11:15:45 -0700 Subject: IB/uverbs: Use get_unused_fd_flags(O_CLOEXEC) instead of get_unused_fd() The macro get_unused_fd() is used to allocate a file descriptor with default flags. Those default flags (0) can be "unsafe": O_CLOEXEC must be used by default to not leak file descriptor across exec(). Replace calls to get_unused_fd() in uverbs with calls to get_unused_fd_flags(O_CLOEXEC). Inheriting uverbs fds across exec() cannot be used to do anything useful. Based on a patch/suggestion from Yann Droneaud . Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_cmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index a7d00f6b3bc1..b3c07b0c9f26 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -334,7 +334,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, resp.num_comp_vectors = file->device->num_comp_vectors; - ret = get_unused_fd(); + ret = get_unused_fd_flags(O_CLOEXEC); if (ret < 0) goto err_free; resp.async_fd = ret; @@ -1184,7 +1184,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - ret = get_unused_fd(); + ret = get_unused_fd_flags(O_CLOEXEC); if (ret < 0) return ret; resp.fd = ret; -- cgit v1.2.3