From 65b302ad31b02b0790417f4e65833af494cb35ce Mon Sep 17 00:00:00 2001 From: Christoph Jaeger Date: Mon, 21 Apr 2014 17:02:42 +0200 Subject: RDMA/cxgb4: Fix memory leaks in c4iw_alloc() error paths c4iw_alloc() bails out without freeing the storage that 'devp' points to. Picked up by Coverity - CID 1204241. Fixes: fa658a98a2 ("RDMA/cxgb4: Use the BAR2/WC path for kernel QPs and T5 devices") Signed-off-by: Christoph Jaeger Acked-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/device.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index f4fa50a609e2..8914ea90ddd9 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -736,6 +736,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) pci_resource_len(devp->rdev.lldi.pdev, 2)); if (!devp->rdev.bar2_kva) { pr_err(MOD "Unable to ioremap BAR2\n"); + ib_dealloc_device(&devp->ibdev); return ERR_PTR(-EINVAL); } } else if (ocqp_supported(infop)) { @@ -747,6 +748,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) devp->rdev.lldi.vr->ocq.size); if (!devp->rdev.oc_mw_kva) { pr_err(MOD "Unable to ioremap onchip mem\n"); + ib_dealloc_device(&devp->ibdev); return ERR_PTR(-EINVAL); } } -- cgit v1.2.3 From 0cc65dd6918f529ae2c19be95b86dec19549b7ed Mon Sep 17 00:00:00 2001 From: Duan Jiong Date: Wed, 16 Apr 2014 09:37:49 +0800 Subject: RDMA/ocrdma: Convert to use simple_open() This removes an open-coded duplicate of simple_open(). Signed-off-by: Duan Jiong Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_stats.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c index 6c54106f5e64..41a9aec9998d 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -510,16 +510,9 @@ exit: return status; } -static int ocrdma_debugfs_open(struct inode *inode, struct file *file) -{ - if (inode->i_private) - file->private_data = inode->i_private; - return 0; -} - static const struct file_operations ocrdma_dbg_ops = { .owner = THIS_MODULE, - .open = ocrdma_debugfs_open, + .open = simple_open, .read = ocrdma_dbgfs_ops_read, }; -- cgit v1.2.3 From 11b8e22d4d0979d8201cbdf0b5fffdbe2d5bcedf Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 16 May 2014 12:42:46 -0500 Subject: RDMA/cxgb4: Fix vlan support RDMA connections over a vlan interface don't work due to import_ep() not using the correct egress device. - use the real device in import_ep() - use rdma_vlan_dev_real_dev() in get_real_dev(). Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 1f863a96a480..28114e6b02e9 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -47,6 +47,8 @@ #include #include +#include + #include "iw_cxgb4.h" static char *states[] = { @@ -341,10 +343,7 @@ static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp) static struct net_device *get_real_dev(struct net_device *egress_dev) { - struct net_device *phys_dev = egress_dev; - if (egress_dev->priv_flags & IFF_802_1Q_VLAN) - phys_dev = vlan_dev_real_dev(egress_dev); - return phys_dev; + return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev; } static int our_interface(struct c4iw_dev *dev, struct net_device *egress_dev) @@ -1746,16 +1745,16 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, if (!ep->l2t) goto out; ep->mtu = dst_mtu(dst); - ep->tx_chan = cxgb4_port_chan(n->dev); - ep->smac_idx = (cxgb4_port_viid(n->dev) & 0x7F) << 1; + ep->tx_chan = cxgb4_port_chan(pdev); + ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; step = cdev->rdev.lldi.ntxq / cdev->rdev.lldi.nchan; - ep->txq_idx = cxgb4_port_idx(n->dev) * step; - ep->ctrlq_idx = cxgb4_port_idx(n->dev); + ep->txq_idx = cxgb4_port_idx(pdev) * step; + ep->ctrlq_idx = cxgb4_port_idx(pdev); step = cdev->rdev.lldi.nrxq / cdev->rdev.lldi.nchan; ep->rss_qid = cdev->rdev.lldi.rxq_ids[ - cxgb4_port_idx(n->dev) * step]; + cxgb4_port_idx(pdev) * step]; if (clear_mpa_v1) { ep->retry_with_mpa_v1 = 0; -- cgit v1.2.3 From 8524867b9c3d11e38de084f47e2128f43e74610a Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 18 May 2014 18:32:38 +0300 Subject: mlx5_core: Fix signature handover operation for interleaved buffers When the data and protection are interleaved in the memory domain, no need to expand the mkey total length. At the moment no Linux user works (iSER initiator & target) in interleaved mode. This may change in the future as for SCSI pass-through devices there is no real point in target performing de-interleaving and re-interleaving of the protection data in the PT stage. Regardless, signature verbs support this mode. Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/qp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index dc930ed21eca..74ee4a474e68 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2275,7 +2275,10 @@ static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, /* length of the protected region, data + protection */ region_len = wr->sg_list->length; - if (wr->wr.sig_handover.prot) + if (wr->wr.sig_handover.prot && + (wr->wr.sig_handover.prot->lkey != wr->sg_list->lkey || + wr->wr.sig_handover.prot->addr != wr->sg_list->addr || + wr->wr.sig_handover.prot->length != wr->sg_list->length)) region_len += wr->wr.sig_handover.prot->length; /** -- cgit v1.2.3 From 5c273b16771eaeb3957d365bb3695b92aff037cf Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 18 May 2014 18:32:39 +0300 Subject: mlx5_core: Simplify signature handover wqe for interleaved buffers No need for repetition format pattern in case the data and protection are already interleaved in the memory domain since the pattern already exists. A single key entry is sufficient and may save some extra fetch ops. Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/qp.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 74ee4a474e68..00b5563acab7 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2131,9 +2131,13 @@ static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, int ret; int wqe_size; - if (!wr->wr.sig_handover.prot) { + if (!wr->wr.sig_handover.prot || + (data_key == wr->wr.sig_handover.prot->lkey && + data_va == wr->wr.sig_handover.prot->addr && + data_len == wr->wr.sig_handover.prot->length)) { /** * Source domain doesn't contain signature information + * or data and protection are interleaved in memory. * So need construct: * ------------------ * | data_klm | @@ -2187,23 +2191,13 @@ static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, data_sentry->bcount = cpu_to_be16(block_size); data_sentry->key = cpu_to_be32(data_key); data_sentry->va = cpu_to_be64(data_va); + data_sentry->stride = cpu_to_be16(block_size); + prot_sentry->bcount = cpu_to_be16(prot_size); prot_sentry->key = cpu_to_be32(prot_key); + prot_sentry->va = cpu_to_be64(prot_va); + prot_sentry->stride = cpu_to_be16(prot_size); - if (prot_key == data_key && prot_va == data_va) { - /** - * The data and protection are interleaved - * in a single memory region - **/ - prot_sentry->va = cpu_to_be64(data_va + block_size); - prot_sentry->stride = cpu_to_be16(block_size + prot_size); - data_sentry->stride = prot_sentry->stride; - } else { - /* The data and protection are two different buffers */ - prot_sentry->va = cpu_to_be64(prot_va); - data_sentry->stride = cpu_to_be16(block_size); - prot_sentry->stride = cpu_to_be16(prot_size); - } wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) + sizeof(*prot_sentry), 64); } -- cgit v1.2.3 From c7f44fbda68a6b2d6ceb10e45c711750e779bace Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 18 May 2014 18:32:40 +0300 Subject: mlx5_core: Copy DIF fields only when input and output space values match Some DIF implementations (SCSI initiator/target) may want to use different input/output values for application tag and/or reference tag. So in case memory/wire domain values don't match HW must not copy them. Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/qp.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 00b5563acab7..a89f70473fb7 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2078,6 +2078,7 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr, struct ib_sig_domain *wire = &sig_attrs->wire; int ret, selector; + memset(bsf, 0, sizeof(*bsf)); switch (sig_attrs->mem.sig_type) { case IB_SIG_TYPE_T10_DIF: if (sig_attrs->wire.sig_type != IB_SIG_TYPE_T10_DIF) @@ -2090,9 +2091,11 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr, /* Same block structure */ basic->bsf_size_sbs = 1 << 4; if (mem->sig.dif.bg_type == wire->sig.dif.bg_type) - basic->wire.copy_byte_mask = 0xff; - else - basic->wire.copy_byte_mask = 0x3f; + basic->wire.copy_byte_mask |= 0xc0; + if (mem->sig.dif.app_tag == wire->sig.dif.app_tag) + basic->wire.copy_byte_mask |= 0x30; + if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag) + basic->wire.copy_byte_mask |= 0x0f; } else basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval); -- cgit v1.2.3 From 096f7e72c604e983e14b84b84fc37593fc433585 Mon Sep 17 00:00:00 2001 From: Haggai Eran Date: Thu, 22 May 2014 14:50:08 +0300 Subject: IB/mlx5: Fix error handling in reg_umr If ib_post_send fails when posting the UMR work request in reg_umr, the code doesn't release the temporary pas buffer allocated, and doesn't dma_unmap it. Signed-off-by: Haggai Eran Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/mr.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 81392b26d078..ad5898592016 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -730,7 +730,7 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, struct mlx5_ib_mr *mr; struct ib_sge sg; int size = sizeof(u64) * npages; - int err; + int err = 0; int i; for (i = 0; i < 1; i++) { @@ -751,7 +751,7 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); if (!mr->pas) { err = -ENOMEM; - goto error; + goto free_mr; } mlx5_ib_populate_pas(dev, umem, page_shift, @@ -760,9 +760,8 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size, DMA_TO_DEVICE); if (dma_mapping_error(ddev, mr->dma)) { - kfree(mr->pas); err = -ENOMEM; - goto error; + goto free_pas; } memset(&wr, 0, sizeof(wr)); @@ -778,26 +777,28 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, err = ib_post_send(umrc->qp, &wr, &bad); if (err) { mlx5_ib_warn(dev, "post send failed, err %d\n", err); - up(&umrc->sem); - goto error; + goto unmap_dma; } wait_for_completion(&mr->done); - up(&umrc->sem); + if (mr->status != IB_WC_SUCCESS) { + mlx5_ib_warn(dev, "reg umr failed\n"); + err = -EFAULT; + } +unmap_dma: + up(&umrc->sem); dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); + +free_pas: kfree(mr->pas); - if (mr->status != IB_WC_SUCCESS) { - mlx5_ib_warn(dev, "reg umr failed\n"); - err = -EFAULT; - goto error; +free_mr: + if (err) { + free_cached_mr(dev, mr); + return ERR_PTR(err); } return mr; - -error: - free_cached_mr(dev, mr); - return ERR_PTR(err); } static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, -- cgit v1.2.3 From 8605933a22796243982e7ed838deca5549c64c62 Mon Sep 17 00:00:00 2001 From: Haggai Eran Date: Thu, 22 May 2014 14:50:09 +0300 Subject: IB/mlx5: Add MR to radix tree in reg_mr_callback For memory regions that are allocated using reg_umr, the suffix of mlx5_core_create_mkey isn't being called. Instead the creation is completed in a callback function (reg_mr_callback). This means that these MRs aren't being added to the MR radix tree. Add them in the callback. Signed-off-by: Haggai Eran Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/mr.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index ad5898592016..9d932a2aa9f4 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -73,6 +73,8 @@ static void reg_mr_callback(int status, void *context) struct mlx5_cache_ent *ent = &cache->ent[c]; u8 key; unsigned long flags; + struct mlx5_mr_table *table = &dev->mdev.priv.mr_table; + int err; spin_lock_irqsave(&ent->lock, flags); ent->pending--; @@ -107,6 +109,13 @@ static void reg_mr_callback(int status, void *context) ent->cur++; ent->size++; spin_unlock_irqrestore(&ent->lock, flags); + + write_lock_irqsave(&table->lock, flags); + err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key), + &mr->mmr); + if (err) + pr_err("Error inserting to mr tree. 0x%x\n", -err); + write_unlock_irqrestore(&table->lock, flags); } static int add_keys(struct mlx5_ib_dev *dev, int c, int num) -- cgit v1.2.3 From b475598aec63f2efbc78f0ff1895d917d2370846 Mon Sep 17 00:00:00 2001 From: Haggai Eran Date: Thu, 22 May 2014 14:50:10 +0300 Subject: mlx5_core: Store MR attributes in mlx5_mr_core during creation and after UMR The patch stores iova, pd and size during mr creation and after UMRs that modify them. It removes the unused access flags field. Signed-off-by: Haggai Eran Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/mr.c | 4 ++++ drivers/net/ethernet/mellanox/mlx5/core/mr.c | 4 ++++ include/linux/mlx5/driver.h | 1 - 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 9d932a2aa9f4..f472ab246d94 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -794,6 +794,10 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, err = -EFAULT; } + mr->mmr.iova = virt_addr; + mr->mmr.size = len; + mr->mmr.pd = to_mpd(pd)->pdn; + unmap_dma: up(&umrc->sem); dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 4cc927649404..ac52a0fe2d3a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -82,7 +82,11 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, return mlx5_cmd_status_to_err(&lout.hdr); } + mr->iova = be64_to_cpu(in->seg.start_addr); + mr->size = be64_to_cpu(in->seg.len); mr->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key; + mr->pd = be32_to_cpu(in->seg.flags_pd) & 0xffffff; + mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", be32_to_cpu(lout.mkey), key, mr->key); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 93cef6313e72..2bce4aad2570 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -427,7 +427,6 @@ struct mlx5_core_mr { u64 size; u32 key; u32 pd; - u32 access; }; struct mlx5_core_srq { -- cgit v1.2.3 From 48fea837bb2709bda73cd4ae8bbd57cb277f7b90 Mon Sep 17 00:00:00 2001 From: Haggai Eran Date: Thu, 22 May 2014 14:50:11 +0300 Subject: IB/mlx5: Set QP offsets and parameters for user QPs and not just for kernel QPs For user QPs, the creation process does not currently initialize the fields: * qp->rq.offset * qp->sq.offset * qp->sq.wqe_shift Signed-off-by: Haggai Eran Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/qp.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index a89f70473fb7..d13ddf1c0033 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -574,6 +574,10 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, uar_index = uuarn_to_uar_index(&context->uuari, uuarn); mlx5_ib_dbg(dev, "uuarn 0x%x, uar_index 0x%x\n", uuarn, uar_index); + qp->rq.offset = 0; + qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); + qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; + err = set_user_buf_size(dev, qp, &ucmd); if (err) goto err_uuar; -- cgit v1.2.3 From a74d24168d2df78e7a532567eb0e7538e6b09568 Mon Sep 17 00:00:00 2001 From: Shachar Raindel Date: Thu, 22 May 2014 14:50:12 +0300 Subject: IB/mlx5: Refactor UMR to have its own context struct Instead of having the UMR context part of each memory region, allocate a struct on the stack. This allows queuing multiple UMRs that access the same memory region. Signed-off-by: Shachar Raindel Signed-off-by: Haggai Eran Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 13 ++++++++++-- drivers/infiniband/hw/mlx5/mr.c | 40 ++++++++++++++++++------------------ 2 files changed, 31 insertions(+), 22 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 50541586e0a6..f2ccf1a5a291 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -264,8 +264,6 @@ struct mlx5_ib_mr { __be64 *pas; dma_addr_t dma; int npages; - struct completion done; - enum ib_wc_status status; struct mlx5_ib_dev *dev; struct mlx5_create_mkey_mbox_out out; struct mlx5_core_sig_ctx *sig; @@ -277,6 +275,17 @@ struct mlx5_ib_fast_reg_page_list { dma_addr_t map; }; +struct mlx5_ib_umr_context { + enum ib_wc_status status; + struct completion done; +}; + +static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context) +{ + context->status = -1; + init_completion(&context->done); +} + struct umr_common { struct ib_pd *pd; struct ib_cq *cq; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index f472ab246d94..14ee4fdcf172 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -708,7 +708,7 @@ static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context) { - struct mlx5_ib_mr *mr; + struct mlx5_ib_umr_context *context; struct ib_wc wc; int err; @@ -721,9 +721,9 @@ void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context) if (err == 0) break; - mr = (struct mlx5_ib_mr *)(unsigned long)wc.wr_id; - mr->status = wc.status; - complete(&mr->done); + context = (struct mlx5_ib_umr_context *)wc.wr_id; + context->status = wc.status; + complete(&context->done); } ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); } @@ -735,6 +735,7 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, struct mlx5_ib_dev *dev = to_mdev(pd->device); struct device *ddev = dev->ib_dev.dma_device; struct umr_common *umrc = &dev->umrc; + struct mlx5_ib_umr_context umr_context; struct ib_send_wr wr, *bad; struct mlx5_ib_mr *mr; struct ib_sge sg; @@ -774,24 +775,21 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, } memset(&wr, 0, sizeof(wr)); - wr.wr_id = (u64)(unsigned long)mr; + wr.wr_id = (u64)(unsigned long)&umr_context; prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags); - /* We serialize polls so one process does not kidnap another's - * completion. This is not a problem since wr is completed in - * around 1 usec - */ + mlx5_ib_init_umr_context(&umr_context); down(&umrc->sem); - init_completion(&mr->done); err = ib_post_send(umrc->qp, &wr, &bad); if (err) { mlx5_ib_warn(dev, "post send failed, err %d\n", err); goto unmap_dma; - } - wait_for_completion(&mr->done); - if (mr->status != IB_WC_SUCCESS) { - mlx5_ib_warn(dev, "reg umr failed\n"); - err = -EFAULT; + } else { + wait_for_completion(&umr_context.done); + if (umr_context.status != IB_WC_SUCCESS) { + mlx5_ib_warn(dev, "reg umr failed\n"); + err = -EFAULT; + } } mr->mmr.iova = virt_addr; @@ -940,24 +938,26 @@ error: static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { struct umr_common *umrc = &dev->umrc; + struct mlx5_ib_umr_context umr_context; struct ib_send_wr wr, *bad; int err; memset(&wr, 0, sizeof(wr)); - wr.wr_id = (u64)(unsigned long)mr; + wr.wr_id = (u64)(unsigned long)&umr_context; prep_umr_unreg_wqe(dev, &wr, mr->mmr.key); + mlx5_ib_init_umr_context(&umr_context); down(&umrc->sem); - init_completion(&mr->done); err = ib_post_send(umrc->qp, &wr, &bad); if (err) { up(&umrc->sem); mlx5_ib_dbg(dev, "err %d\n", err); goto error; + } else { + wait_for_completion(&umr_context.done); + up(&umrc->sem); } - wait_for_completion(&mr->done); - up(&umrc->sem); - if (mr->status != IB_WC_SUCCESS) { + if (umr_context.status != IB_WC_SUCCESS) { mlx5_ib_warn(dev, "unreg umr failed\n"); err = -EFAULT; goto error; -- cgit v1.2.3 From a8237b32a3faab155a5dc8f886452147ce73da3e Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Mon, 5 May 2014 19:33:21 +0200 Subject: IB/mlx5: add missing padding at end of struct mlx5_ib_create_cq The i386 ABI disagrees with most other ABIs regarding alignment of data type larger than 4 bytes: on most ABIs a padding must be added at end of the structures, while it is not required on i386. So for most ABI struct mlx5_ib_create_cq get padded to be aligned on a 8 bytes multiple, while for i386, such padding is not added. The tool pahole can be used to find such implicit padding: $ pahole --anon_include \ --nested_anon_include \ --recursive \ --class_name mlx5_ib_create_cq \ drivers/infiniband/hw/mlx5/mlx5_ib.o Then, structure layout can be compared between i386 and x86_64: +++ obj-i386/drivers/infiniband/hw/mlx5/mlx5_ib.o.pahole.txt 2014-03-28 11:43:07.386413682 +0100 --- obj-x86_64/drivers/infiniband/hw/mlx5/mlx5_ib.o.pahole.txt 2014-03-27 13:06:17.788472721 +0100 @@ -34,9 +34,8 @@ struct mlx5_ib_create_cq { __u64 db_addr; /* 8 8 */ __u32 cqe_size; /* 16 4 */ - /* size: 20, cachelines: 1, members: 3 */ - /* last cacheline: 20 bytes */ + /* size: 24, cachelines: 1, members: 3 */ + /* padding: 4 */ + /* last cacheline: 24 bytes */ }; This ABI disagreement will make an x86_64 kernel try to read past the buffer provided by an i386 binary. When boundary check will be implemented, a x86_64 kernel will refuse to read past the i386 userspace provided buffer and the uverb will fail. Anyway, if the structure lies in memory on a page boundary and next page is not mapped, ib_copy_from_udata() will fail when trying to read the 4 bytes of padding and the uverb will fail. This patch makes create_cq_user() takes care of the input data size to handle the case where no padding is provided. This way, x86_64 kernel will be able to handle struct mlx5_ib_create_cq as sent by unpatched and patched i386 libmlx5. Link: http://marc.info/?i=cover.1399309513.git.ydroneaud@opteya.com Cc: Fixes: e126ba97dba9e ("mlx5: Add driver for Mellanox Connect-IB adapter") Signed-off-by: Yann Droneaud Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/cq.c | 13 ++++++++++++- drivers/infiniband/hw/mlx5/user.h | 1 + 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 62bb6b49dc1d..8ae4f896cb41 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -32,6 +32,7 @@ #include #include +#include #include "mlx5_ib.h" #include "user.h" @@ -602,14 +603,24 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, int *cqe_size, int *index, int *inlen) { struct mlx5_ib_create_cq ucmd; + size_t ucmdlen; int page_shift; int npages; int ncont; int err; - if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) + ucmdlen = + (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) < + sizeof(ucmd)) ? (sizeof(ucmd) - + sizeof(ucmd.reserved)) : sizeof(ucmd); + + if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) return -EFAULT; + if (ucmdlen == sizeof(ucmd) && + ucmd.reserved != 0) + return -EINVAL; + if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128) return -EINVAL; diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h index 0f4f8e42a17f..d44ecd2c2faf 100644 --- a/drivers/infiniband/hw/mlx5/user.h +++ b/drivers/infiniband/hw/mlx5/user.h @@ -91,6 +91,7 @@ struct mlx5_ib_create_cq { __u64 buf_addr; __u64 db_addr; __u32 cqe_size; + __u32 reserved; /* explicit padding (optional on i386) */ }; struct mlx5_ib_create_cq_resp { -- cgit v1.2.3 From 43bc889380c2ad9aa230eccc03a15cc52cf710d4 Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Mon, 5 May 2014 19:33:22 +0200 Subject: IB/mlx5: add missing padding at end of struct mlx5_ib_create_srq The i386 ABI disagrees with most other ABIs regarding alignment of data type larger than 4 bytes: on most ABIs a padding must be added at end of the structures, while it is not required on i386. So for most ABIs struct mlx5_ib_create_srq gets implicitly padded to be aligned on a 8 bytes multiple, while for i386, such padding is not added. Tool pahole could be used to find such implicit padding: $ pahole --anon_include \ --nested_anon_include \ --recursive \ --class_name mlx5_ib_create_srq \ drivers/infiniband/hw/mlx5/mlx5_ib.o Then, structure layout can be compared between i386 and x86_64: +++ obj-i386/drivers/infiniband/hw/mlx5/mlx5_ib.o.pahole.txt 2014-03-28 11:43:07.386413682 +0100 --- obj-x86_64/drivers/infiniband/hw/mlx5/mlx5_ib.o.pahole.txt 2014-03-27 13:06:17.788472721 +0100 @@ -69,7 +68,6 @@ struct mlx5_ib_create_srq { __u64 db_addr; /* 8 8 */ __u32 flags; /* 16 4 */ - /* size: 20, cachelines: 1, members: 3 */ - /* last cacheline: 20 bytes */ + /* size: 24, cachelines: 1, members: 3 */ + /* padding: 4 */ + /* last cacheline: 24 bytes */ }; ABI disagreement will make an x86_64 kernel try to read past the buffer provided by an i386 binary. When boundary check will be implemented, the x86_64 kernel will refuse to read past the i386 userspace provided buffer and the uverb will fail. Anyway, if the structure lay in memory on a page boundary and next page is not mapped, ib_copy_from_udata() will fail and the uverb will fail. This patch makes create_srq_user() takes care of the input data size to handle the case where no padding was provided. This way, x86_64 kernel will be able to handle struct mlx5_ib_create_srq as sent by unpatched and patched i386 libmlx5. Link: http://marc.info/?i=cover.1399309513.git.ydroneaud@opteya.com Cc: Fixes: e126ba97dba9e ("mlx5: Add driver for Mellanox Connect-IB adapter") Signed-off-by: Yann Droneaud Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/srq.c | 14 +++++++++++++- drivers/infiniband/hw/mlx5/user.h | 1 + 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 210b3eaf188a..384af6dec5eb 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "mlx5_ib.h" #include "user.h" @@ -78,16 +79,27 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_create_srq ucmd; + size_t ucmdlen; int err; int npages; int page_shift; int ncont; u32 offset; - if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { + ucmdlen = + (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) < + sizeof(ucmd)) ? (sizeof(ucmd) - + sizeof(ucmd.reserved)) : sizeof(ucmd); + + if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) { mlx5_ib_dbg(dev, "failed copy udata\n"); return -EFAULT; } + + if (ucmdlen == sizeof(ucmd) && + ucmd.reserved != 0) + return -EINVAL; + srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE); srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size, diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h index d44ecd2c2faf..d0ba264ac1ed 100644 --- a/drivers/infiniband/hw/mlx5/user.h +++ b/drivers/infiniband/hw/mlx5/user.h @@ -110,6 +110,7 @@ struct mlx5_ib_create_srq { __u64 buf_addr; __u64 db_addr; __u32 flags; + __u32 reserved; /* explicit padding (optional on i386) */ }; struct mlx5_ib_create_srq_resp { -- cgit v1.2.3 From e4514cbd972786af67dd6c442c072685387e22a2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 27 May 2014 00:04:44 +0300 Subject: RDMA/cxgb3: Fix information leak in send_abort() The cpl_abort_req struct has several reserved members which need to be cleared to avoid disclosing kernel information. I have added a memset() so now it matches the cxgb4 version of this function. Signed-off-by: Dan Carpenter Acked-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch_cm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 095bb046e2c8..cb78b1e9bcd9 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -418,6 +418,7 @@ static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp) skb->priority = CPL_PRIORITY_DATA; set_arp_failure_handler(skb, abort_arp_failure); req = (struct cpl_abort_req *) skb_put(skb, sizeof(*req)); + memset(req, 0, sizeof(*req)); req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ)); req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid)); -- cgit v1.2.3 From 911eccd284d13d78c92ec4f1f1092c03457d732a Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 2 May 2014 11:28:04 -0400 Subject: IB/qib: Fix port in pkey change event The code used a literal 1 in dispatching an IB_EVENT_PKEY_CHANGE. As of the dual port qib QDR card, this is not necessarily correct. Change to use the port as specified in the call. Cc: Reported-by: Alex Estrin Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_mad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index edad991d60ed..22c720e5740d 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -1028,7 +1028,7 @@ static int set_pkeys(struct qib_devdata *dd, u8 port, u16 *pkeys) event.event = IB_EVENT_PKEY_CHANGE; event.device = &dd->verbs_dev.ibdev; - event.element.port_num = 1; + event.element.port_num = port; ib_dispatch_event(&event); } return 0; -- cgit v1.2.3 From 7e6d3e5c70f13874fb06e6b67696ed90ce79bd48 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 2 May 2014 11:40:17 -0400 Subject: IB/ipath: Translate legacy diagpkt into newer extended diagpkt This patch addresses an issue where the legacy diagpacket is sent in from the user, but the driver operates on only the extended diagpkt. This patch specifically initializes the extended diagpkt based on the legacy packet. Cc: Reported-by: Rickard Strandqvist Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_diag.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c index e2f9a51f4a38..45802e97332e 100644 --- a/drivers/infiniband/hw/ipath/ipath_diag.c +++ b/drivers/infiniband/hw/ipath/ipath_diag.c @@ -346,6 +346,10 @@ static ssize_t ipath_diagpkt_write(struct file *fp, ret = -EFAULT; goto bail; } + dp.len = odp.len; + dp.unit = odp.unit; + dp.data = odp.data; + dp.pbc_wd = 0; } else { ret = -EINVAL; goto bail; -- cgit v1.2.3 From ed477c4c83b31ebc2b143382b0b5d70325fc9643 Mon Sep 17 00:00:00 2001 From: Upinder Malhi Date: Sat, 19 Apr 2014 01:12:18 +0000 Subject: IB/usnic: Fix source file missing copyright and license Prepends copyright and license to usnic_uiom_interval_tree.c Signed-off-by: Upinder Malhi Signed-off-by: Roland Dreier --- drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c index d135ad90d914..3a4288e0fbac 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c @@ -1,3 +1,21 @@ +/* + * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + #include #include #include -- cgit v1.2.3 From 6c9b5d9b00ed2b1cbd5e5d2c176bf88da7beb224 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 28 May 2014 09:23:03 -0700 Subject: IB/mlx5: Fix warning about cast of wr_id back to pointer on 32 bits We need to cast wr_id to unsigned long before casting to a pointer. This fixes: drivers/infiniband/hw/mlx5/mr.c: In function 'mlx5_umr_cq_handler': >> drivers/infiniband/hw/mlx5/mr.c:724:13: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] context = (struct mlx5_ib_umr_context *)wc.wr_id; Reported-by: kbuild test robot Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 14ee4fdcf172..afa873bd028e 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -721,7 +721,7 @@ void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context) if (err == 0) break; - context = (struct mlx5_ib_umr_context *)wc.wr_id; + context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id; context->status = wc.status; complete(&context->done); } -- cgit v1.2.3 From 49410185c356b2767409de9220d5cf5c8db062e5 Mon Sep 17 00:00:00 2001 From: Manuel Schölling Date: Sun, 25 May 2014 15:26:41 +0200 Subject: IB/ipath: Use time_before()/_after() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Time comparisons must use time_after / time_before to avoid problems when jiffies wraps. Signed-off-by: Manuel Schölling Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_intr.c | 4 ++-- drivers/infiniband/hw/ipath/ipath_sdma.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 26dfbc8ee0f1..01ba792791a0 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -70,7 +70,7 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd) if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) { int i; if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) && - dd->ipath_lastcancel > jiffies) { + time_after(dd->ipath_lastcancel, jiffies)) { __IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG, "SendbufErrs %lx %lx", sbuf[0], sbuf[1]); @@ -755,7 +755,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) /* likely due to cancel; so suppress message unless verbose */ if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) && - dd->ipath_lastcancel > jiffies) { + time_after(dd->ipath_lastcancel, jiffies)) { /* armlaunch takes precedence; it often causes both. */ ipath_cdbg(VERBOSE, "Suppressed %s error (%llx) after sendbuf cancel\n", diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c index 98ac18ec977e..17a517766ad2 100644 --- a/drivers/infiniband/hw/ipath/ipath_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_sdma.c @@ -247,7 +247,7 @@ static void sdma_abort_task(unsigned long opaque) /* ipath_sdma_abort() is done, waiting for interrupt */ if (status == IPATH_SDMA_ABORT_DISARMED) { - if (jiffies < dd->ipath_sdma_abort_intr_timeout) + if (time_before(jiffies, dd->ipath_sdma_abort_intr_timeout)) goto resched_noprint; /* give up, intr got lost somewhere */ ipath_dbg("give up waiting for SDMADISABLED intr\n"); @@ -341,7 +341,7 @@ resched: * JAG - this is bad to just have default be a loop without * state change */ - if (jiffies > dd->ipath_sdma_abort_jiffies) { + if (time_after(jiffies, dd->ipath_sdma_abort_jiffies)) { ipath_dbg("looping with status 0x%08lx\n", dd->ipath_sdma_status); dd->ipath_sdma_abort_jiffies = jiffies + 5 * HZ; -- cgit v1.2.3 From bfdfcfee3c9281e9cd28c0b08235aba1762504a6 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 17 May 2014 23:33:44 +0100 Subject: IB/mlx4: fix unitialised variable is_mcast Commit 297e0dad7206 ("IB/mlx4: Handle Ethernet L2 parameters for IP based GID addressing") introduced a bug where is_mcast is now no longer initialized on the non-multicast condition and so it can be any random value from the stack. This issue was detected by cppcheck: [drivers/infiniband/hw/mlx4/ah.c:103]: (error) Uninitialized variable: is_mcast Simple fix is to initialise is_mcast to zero. Signed-off-by: Colin Ian King Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/ah.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 170dca608042..2d8c3397774f 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -73,7 +73,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr { struct mlx4_ib_dev *ibdev = to_mdev(pd->device); struct mlx4_dev *dev = ibdev->dev; - int is_mcast; + int is_mcast = 0; struct in6_addr in6; u16 vlan_tag; -- cgit v1.2.3 From 3c735d481b9a507dccee63eeaf6aa6ffa2bccc67 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 6 Feb 2014 15:39:34 +0300 Subject: RDMA/cxgb3: Remove a couple unneeded conditions We know that "reset_tpt_entry" is false on this side of the if else statement so there is no need to check again. Signed-off-by: Dan Carpenter Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/cxio_hal.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index c3f5aca4ef00..de1c61b417d6 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -735,14 +735,12 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry, ((perm & TPT_MW_BIND) ? F_TPT_MW_BIND_ENABLE : 0) | V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) | V_TPT_PAGE_SIZE(page_size)); - tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 : - cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3)); + tpt.rsvd_pbl_addr = cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3)); tpt.len = cpu_to_be32(len); tpt.va_hi = cpu_to_be32((u32) (to >> 32)); tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL)); tpt.rsvd_bind_cnt_or_pstag = 0; - tpt.rsvd_pbl_size = reset_tpt_entry ? 0 : - cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2)); + tpt.rsvd_pbl_size = cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2)); } err = cxio_hal_ctrl_qp_write_mem(rdev_p, stag_idx + -- cgit v1.2.3 From 0a66d2bd300cbdaa3146c81cb823d01593963066 Mon Sep 17 00:00:00 2001 From: Vinit Agnihotri Date: Thu, 29 May 2014 15:46:09 -0400 Subject: IB/qib: Additional Intel branding changes This patches changes user visible function names containing "qlogic" in module init and cleanup. Reviewed-by: Mike Marciniszyn Signed-off-by: Vinit Agnihotri Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_init.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 5b7aeb224a30..8d3c78ddc906 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1272,7 +1272,7 @@ static int qib_notify_dca(struct notifier_block *nb, unsigned long event, * Do all the generic driver unit- and chip-independent memory * allocation and initialization. */ -static int __init qlogic_ib_init(void) +static int __init qib_ib_init(void) { int ret; @@ -1316,12 +1316,12 @@ bail: return ret; } -module_init(qlogic_ib_init); +module_init(qib_ib_init); /* * Do the non-unit driver cleanup, memory free, etc. at unload. */ -static void __exit qlogic_ib_cleanup(void) +static void __exit qib_ib_cleanup(void) { int ret; @@ -1346,7 +1346,7 @@ static void __exit qlogic_ib_cleanup(void) qib_dev_cleanup(); } -module_exit(qlogic_ib_cleanup); +module_exit(qib_ib_cleanup); /* this can only be called after a successful initialization */ static void cleanup_device_data(struct qib_devdata *dd) -- cgit v1.2.3 From 61565013cf7024e8aa52e0a8e78208a955ce7e5f Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 29 May 2014 16:31:01 +0300 Subject: IB/mlx4: SET_PORT called by mlx4_ib_modify_port should be wrapped mlx4_ib_modify_port is invoked in IB for resetting the Q_Key violations counters and for modifying the IB port capability flags. For example, when opensm is started up on the hypervisor, mlx4_ib_modify_port is called to set the port's IsSM flag. In multifunction mode, the SET_PORT command used in this flow should be wrapped (so that the PF port capability flags are also tracked, thus enabling the aggregate of all the VF/PF capability flags to be tracked properly). The procedure mlx4_SET_PORT() in main.c is also renamed to mlx4_ib_SET_PORT() to differentiate it from procedure mlx4_SET_PORT() in port.c. mlx4_ib_SET_PORT() is used exclusively by mlx4_ib_modify_port(). Finally, the CM invokes ib_modify_port() to set the IsCMSupported flag even when running over RoCE. Therefore, when RoCE is active, mlx4_ib_modify_port should return OK unconditionally (since the capability flags and qkey violations counter are not relevant). Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 1b6dbe156a37..3c3806aff712 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -544,12 +544,11 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, return 0; } -static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, - u32 cap_mask) +static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, + u32 cap_mask) { struct mlx4_cmd_mailbox *mailbox; int err; - u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH; mailbox = mlx4_alloc_cmd_mailbox(dev->dev); if (IS_ERR(mailbox)) @@ -563,8 +562,8 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, ((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask); } - err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT, - MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + err = mlx4_cmd(dev->dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); mlx4_free_cmd_mailbox(dev->dev, mailbox); return err; @@ -573,11 +572,20 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, struct ib_port_modify *props) { + struct mlx4_ib_dev *mdev = to_mdev(ibdev); + u8 is_eth = mdev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH; struct ib_port_attr attr; u32 cap_mask; int err; - mutex_lock(&to_mdev(ibdev)->cap_mask_mutex); + /* return OK if this is RoCE. CM calls ib_modify_port() regardless + * of whether port link layer is ETH or IB. For ETH ports, qkey + * violations and port capabilities are not meaningful. + */ + if (is_eth) + return 0; + + mutex_lock(&mdev->cap_mask_mutex); err = mlx4_ib_query_port(ibdev, port, &attr); if (err) @@ -586,9 +594,9 @@ static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) & ~props->clr_port_cap_mask; - err = mlx4_SET_PORT(to_mdev(ibdev), port, - !!(mask & IB_PORT_RESET_QKEY_CNTR), - cap_mask); + err = mlx4_ib_SET_PORT(mdev, port, + !!(mask & IB_PORT_RESET_QKEY_CNTR), + cap_mask); out: mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex); -- cgit v1.2.3 From 97982f5a91e91dab26dd0246083b9adf3ba8b2e3 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 29 May 2014 16:31:02 +0300 Subject: IB/mlx4: Preparation for VFs to issue/receive SMI (QP0) requests/responses Currently, VFs in SRIOV VFs are denied QP0 access. The main reason for this decision is security, since Subnet Management Datagrams (SMPs) are not restricted by network partitioning and may affect the physical network topology. Moreover, even the SM may be denied access from portions of the network by setting management keys unknown to the SM. However, it is desirable to grant SMI access to certain privileged VFs, so that certain network management activities may be conducted within virtual machines instead of the hypervisor. This commit does the following: 1. Create QP0 tunnel QPs for all VFs. 2. Discard SMI mads sent-from/received-for non-privileged VFs in the hypervisor MAD multiplex/demultiplex logic. SMI mads from/for privileged VFs are allowed to pass. 3. MAD_IFC wrapper changes/fixes. For non-privileged VFs, only host-view MAD_IFC commands are allowed, and only for SMI LID-Routed GET mads. For privileged VFs, there are no restrictions. This commit does not allow privileged VFs as yet. To determine if a VF is privileged, it calls function mlx4_vf_smi_enabled(). This function returns 0 unconditionally for now. The next two commits allow defining and activating privileged VFs. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mad.c | 40 ++++++++++++++++----------- drivers/infiniband/hw/mlx4/qp.c | 16 +++++------ drivers/net/ethernet/mellanox/mlx4/cmd.c | 47 ++++++++++++++++++++++---------- include/linux/mlx4/device.h | 1 + 4 files changed, 66 insertions(+), 38 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index fd36ec672632..287ad0564acd 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -478,10 +478,6 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, if (!tun_ctx || tun_ctx->state != DEMUX_PV_STATE_ACTIVE) return -EAGAIN; - /* QP0 forwarding only for Dom0 */ - if (!dest_qpt && (mlx4_master_func_num(dev->dev) != slave)) - return -EINVAL; - if (!dest_qpt) tun_qp = &tun_ctx->qp[0]; else @@ -667,6 +663,21 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, } /* Class-specific handling */ switch (mad->mad_hdr.mgmt_class) { + case IB_MGMT_CLASS_SUBN_LID_ROUTED: + case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: + /* 255 indicates the dom0 */ + if (slave != 255 && slave != mlx4_master_func_num(dev->dev)) { + if (!mlx4_vf_smi_enabled(dev->dev, slave, port)) + return -EPERM; + /* for a VF. drop unsolicited MADs */ + if (!(mad->mad_hdr.method & IB_MGMT_METHOD_RESP)) { + mlx4_ib_warn(ibdev, "demux QP0. rejecting unsolicited mad for slave %d class 0x%x, method 0x%x\n", + slave, mad->mad_hdr.mgmt_class, + mad->mad_hdr.method); + return -EINVAL; + } + } + break; case IB_MGMT_CLASS_SUBN_ADM: if (mlx4_ib_demux_sa_handler(ibdev, port, slave, (struct ib_sa_mad *) mad)) @@ -1165,10 +1176,6 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, if (!sqp_ctx || sqp_ctx->state != DEMUX_PV_STATE_ACTIVE) return -EAGAIN; - /* QP0 forwarding only for Dom0 */ - if (dest_qpt == IB_QPT_SMI && (mlx4_master_func_num(dev->dev) != slave)) - return -EINVAL; - if (dest_qpt == IB_QPT_SMI) { src_qpnum = 0; sqp = &sqp_ctx->qp[0]; @@ -1285,11 +1292,6 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc "belongs to another slave\n", wc->src_qp); return; } - if (slave != mlx4_master_func_num(dev->dev) && !(wc->src_qp & 0x2)) { - mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: " - "non-master trying to send QP0 packets\n", wc->src_qp); - return; - } /* Map transaction ID */ ib_dma_sync_single_for_cpu(ctx->ib_dev, tun_qp->ring[wr_ix].map, @@ -1317,6 +1319,12 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc /* Class-specific handling */ switch (tunnel->mad.mad_hdr.mgmt_class) { + case IB_MGMT_CLASS_SUBN_LID_ROUTED: + case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: + if (slave != mlx4_master_func_num(dev->dev) && + !mlx4_vf_smi_enabled(dev->dev, slave, ctx->port)) + return; + break; case IB_MGMT_CLASS_SUBN_ADM: if (mlx4_ib_multiplex_sa_handler(ctx->ib_dev, ctx->port, slave, (struct ib_sa_mad *) &tunnel->mad)) @@ -1749,9 +1757,9 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port, return -EEXIST; ctx->state = DEMUX_PV_STATE_STARTING; - /* have QP0 only on port owner, and only if link layer is IB */ - if (ctx->slave == mlx4_master_func_num(to_mdev(ctx->ib_dev)->dev) && - rdma_port_get_link_layer(ibdev, ctx->port) == IB_LINK_LAYER_INFINIBAND) + /* have QP0 only if link layer is IB */ + if (rdma_port_get_link_layer(ibdev, ctx->port) == + IB_LINK_LAYER_INFINIBAND) ctx->has_smi = 1; if (ctx->has_smi) { diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 41308af4163c..2e8c58806e2f 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2370,7 +2370,8 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, struct mlx4_wqe_datagram_seg *dseg, - struct ib_send_wr *wr, enum ib_qp_type qpt) + struct ib_send_wr *wr, + enum mlx4_ib_qp_type qpt) { union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av; struct mlx4_av sqp_av = {0}; @@ -2383,8 +2384,10 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, cpu_to_be32(0xf0000000); memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av)); - /* This function used only for sending on QP1 proxies */ - dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]); + if (qpt == MLX4_IB_QPT_PROXY_GSI) + dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]); + else + dseg->dqpn = cpu_to_be32(dev->dev->caps.qp0_tunnel[port - 1]); /* Use QKEY from the QP context, which is set by master */ dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY); } @@ -2700,16 +2703,13 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, size += seglen / 16; break; case MLX4_IB_QPT_PROXY_SMI: - /* don't allow QP0 sends on guests */ - err = -ENOSYS; - *bad_wr = wr; - goto out; case MLX4_IB_QPT_PROXY_GSI: /* If we are tunneling special qps, this is a UD qp. * In this case we first add a UD segment targeting * the tunnel qp, and then add a header with address * information */ - set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, ibqp->qp_type); + set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, + qp->mlx4_ib_qp_type); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; build_tunnel_header(wr, wqe, &seglen); diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 78099eab7673..b0e48d426fce 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -705,20 +705,28 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, struct ib_smp *smp = inbox->buf; u32 index; u8 port; + u8 opcode_modifier; u16 *table; int err; int vidx, pidx; + int network_view; struct mlx4_priv *priv = mlx4_priv(dev); struct ib_smp *outsmp = outbox->buf; __be16 *outtab = (__be16 *)(outsmp->data); __be32 slave_cap_mask; __be64 slave_node_guid; + port = vhcr->in_modifier; + /* network-view bit is for driver use only, and should not be passed to FW */ + opcode_modifier = vhcr->op_modifier & ~0x8; /* clear netw view bit */ + network_view = !!(vhcr->op_modifier & 0x8); + if (smp->base_version == 1 && smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED && smp->class_version == 1) { - if (smp->method == IB_MGMT_METHOD_GET) { + /* host view is paravirtualized */ + if (!network_view && smp->method == IB_MGMT_METHOD_GET) { if (smp->attr_id == IB_SMP_ATTR_PKEY_TABLE) { index = be32_to_cpu(smp->attr_mod); if (port < 1 || port > dev->caps.num_ports) @@ -743,7 +751,7 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, /*get the slave specific caps:*/ /*do the command */ err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, - vhcr->in_modifier, vhcr->op_modifier, + vhcr->in_modifier, opcode_modifier, vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); /* modify the response for slaves */ if (!err && slave != mlx4_master_func_num(dev)) { @@ -760,7 +768,7 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, smp->attr_mod = cpu_to_be32(slave / 8); /* execute cmd */ err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, - vhcr->in_modifier, vhcr->op_modifier, + vhcr->in_modifier, opcode_modifier, vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); if (!err) { /* if needed, move slave gid to index 0 */ @@ -774,7 +782,7 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, } if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) { err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, - vhcr->in_modifier, vhcr->op_modifier, + vhcr->in_modifier, opcode_modifier, vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); if (!err) { slave_node_guid = mlx4_get_slave_node_guid(dev, slave); @@ -784,19 +792,24 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, } } } + + /* Non-privileged VFs are only allowed "host" view LID-routed 'Get' MADs. + * These are the MADs used by ib verbs (such as ib_query_gids). + */ if (slave != mlx4_master_func_num(dev) && - ((smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) || - (smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED && - smp->method == IB_MGMT_METHOD_SET))) { - mlx4_err(dev, "slave %d is trying to execute a Subnet MGMT MAD, " - "class 0x%x, method 0x%x for attr 0x%x. Rejecting\n", - slave, smp->method, smp->mgmt_class, - be16_to_cpu(smp->attr_id)); - return -EPERM; + !mlx4_vf_smi_enabled(dev, slave, port)) { + if (!(smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED && + smp->method == IB_MGMT_METHOD_GET) || network_view) { + mlx4_err(dev, "Unprivileged slave %d is trying to execute a Subnet MGMT MAD, class 0x%x, method 0x%x, view=%s for attr 0x%x. Rejecting\n", + slave, smp->method, smp->mgmt_class, + network_view ? "Network" : "Host", + be16_to_cpu(smp->attr_id)); + return -EPERM; + } } - /*default:*/ + return mlx4_cmd_box(dev, inbox->dma, outbox->dma, - vhcr->in_modifier, vhcr->op_modifier, + vhcr->in_modifier, opcode_modifier, vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); } @@ -2537,3 +2550,9 @@ int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_stat return 0; } EXPORT_SYMBOL_GPL(mlx4_set_vf_link_state); + +int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port) +{ + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_vf_smi_enabled); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index ba87bd21295a..83612faa819c 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1234,4 +1234,5 @@ int mlx4_phys_to_slave_port(struct mlx4_dev *dev, int slave, int port); int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port); int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port); +int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port); #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3 From 99ec41d0a48cb6d14af25765f9449762f9d101f6 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 29 May 2014 16:31:03 +0300 Subject: mlx4: Add infrastructure for selecting VFs to enable QP0 via MLX proxy QPs This commit adds the infrastructure for enabling selected VFs to operate SMI (QP0) MADs without restriction. Additionally, for these enabled VFs, their QP0 proxy and tunnel QPs are MLX QPs. As such, they operate over VL15. Therefore, they are not affected by "credit" problems or changes in the VLArb table (which may shut down VL0). Non-enabled VFs may only create UD proxy QP0 qps (which are forced by the hypervisor to send packets using the q-key it assigns and places in the qp-context). Thus, non-enabled VFs will not pose a security risk. The hypervisor discards any privileged MADs it receives from these non-enabled VFs. By default, all VFs are NOT enabled, and must explicitly be enabled by the administrator. The sysfs interface which operates the VF enablement infrastructure is provided in the next commit. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 52 +++++++++++++++++----- drivers/net/ethernet/mellanox/mlx4/cmd.c | 13 +++++- drivers/net/ethernet/mellanox/mlx4/fw.c | 44 ++++++++++++------ drivers/net/ethernet/mellanox/mlx4/fw.h | 1 + drivers/net/ethernet/mellanox/mlx4/main.c | 16 +++++-- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 8 ++++ .../net/ethernet/mellanox/mlx4/resource_tracker.c | 26 ++++++++--- include/linux/mlx4/device.h | 1 + 8 files changed, 126 insertions(+), 35 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 2e8c58806e2f..b25600997cb6 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -608,6 +608,16 @@ static int qp_has_rq(struct ib_qp_init_attr *attr) return !attr->srq; } +static int qp0_enabled_vf(struct mlx4_dev *dev, int qpn) +{ + int i; + for (i = 0; i < dev->caps.num_ports; i++) { + if (qpn == dev->caps.qp0_proxy[i]) + return !!dev->caps.qp0_qkey[i]; + } + return 0; +} + static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp) @@ -625,10 +635,13 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, !(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) { if (init_attr->qp_type == IB_QPT_GSI) qp_type = MLX4_IB_QPT_PROXY_GSI; - else if (mlx4_is_master(dev->dev)) - qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER; - else - qp_type = MLX4_IB_QPT_PROXY_SMI; + else { + if (mlx4_is_master(dev->dev) || + qp0_enabled_vf(dev->dev, sqpn)) + qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER; + else + qp_type = MLX4_IB_QPT_PROXY_SMI; + } } qpn = sqpn; /* add extra sg entry for tunneling */ @@ -643,7 +656,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, return -EINVAL; if (tnl_init->proxy_qp_type == IB_QPT_GSI) qp_type = MLX4_IB_QPT_TUN_GSI; - else if (tnl_init->slave == mlx4_master_func_num(dev->dev)) + else if (tnl_init->slave == mlx4_master_func_num(dev->dev) || + mlx4_vf_smi_enabled(dev->dev, tnl_init->slave, + tnl_init->port)) qp_type = MLX4_IB_QPT_TUN_SMI_OWNER; else qp_type = MLX4_IB_QPT_TUN_SMI; @@ -1930,6 +1945,19 @@ out: return err; } +static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey) +{ + int i; + for (i = 0; i < dev->caps.num_ports; i++) { + if (qpn == dev->caps.qp0_proxy[i] || + qpn == dev->caps.qp0_tunnel[i]) { + *qkey = dev->caps.qp0_qkey[i]; + return 0; + } + } + return -EINVAL; +} + static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len) @@ -1987,8 +2015,13 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); - if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) - return -EINVAL; + if (mlx4_is_master(mdev->dev)) { + if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) + return -EINVAL; + } else { + if (vf_get_qp0_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) + return -EINVAL; + } sqp->ud_header.deth.qkey = cpu_to_be32(qkey); sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn); @@ -2682,11 +2715,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case MLX4_IB_QPT_PROXY_SMI_OWNER: - if (unlikely(!mlx4_is_master(to_mdev(ibqp->device)->dev))) { - err = -ENOSYS; - *bad_wr = wr; - goto out; - } err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen); if (unlikely(err)) { *bad_wr = wr; diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index b0e48d426fce..26c3ebaa49d1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1666,6 +1666,8 @@ static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave) for (port = min_port; port <= max_port; port++) { if (!test_bit(port - 1, actv_ports.ports)) continue; + priv->mfunc.master.vf_oper[slave].smi_enabled[port] = + priv->mfunc.master.vf_admin[slave].enable_smi[port]; vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; vp_oper->state = *vp_admin; @@ -1717,6 +1719,8 @@ static void mlx4_master_deactivate_admin_state(struct mlx4_priv *priv, int slave for (port = min_port; port <= max_port; port++) { if (!test_bit(port - 1, actv_ports.ports)) continue; + priv->mfunc.master.vf_oper[slave].smi_enabled[port] = + MLX4_VF_SMI_DISABLED; vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; if (NO_INDX != vp_oper->vlan_idx) { __mlx4_unregister_vlan(&priv->dev, @@ -2553,6 +2557,13 @@ EXPORT_SYMBOL_GPL(mlx4_set_vf_link_state); int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port) { - return 0; + struct mlx4_priv *priv = mlx4_priv(dev); + + if (slave < 1 || slave >= dev->num_slaves || + port < 1 || port > MLX4_MAX_PORTS) + return 0; + + return priv->mfunc.master.vf_oper[slave].smi_enabled[port] == + MLX4_VF_SMI_ENABLED; } EXPORT_SYMBOL_GPL(mlx4_vf_smi_enabled); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index ef242e19766f..01e6dd61ee3c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -178,8 +178,8 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_info *cmd) { struct mlx4_priv *priv = mlx4_priv(dev); - u8 field; - u32 size; + u8 field, port; + u32 size, proxy_qp, qkey; int err = 0; #define QUERY_FUNC_CAP_FLAGS_OFFSET 0x0 @@ -209,6 +209,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, /* when opcode modifier = 1 */ #define QUERY_FUNC_CAP_PHYS_PORT_OFFSET 0x3 +#define QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET 0x4 #define QUERY_FUNC_CAP_FLAGS0_OFFSET 0x8 #define QUERY_FUNC_CAP_FLAGS1_OFFSET 0xc @@ -221,6 +222,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_FLAGS1_FORCE_MAC 0x40 #define QUERY_FUNC_CAP_FLAGS1_FORCE_VLAN 0x80 #define QUERY_FUNC_CAP_FLAGS1_NIC_INFO 0x10 +#define QUERY_FUNC_CAP_VF_ENABLE_QP0 0x08 #define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80 @@ -234,28 +236,35 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, return -EINVAL; vhcr->in_modifier = converted_port; - /* Set nic_info bit to mark new fields support */ - field = QUERY_FUNC_CAP_FLAGS1_NIC_INFO; - MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS1_OFFSET); - /* phys-port = logical-port */ field = vhcr->in_modifier - find_first_bit(actv_ports.ports, dev->caps.num_ports); MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); - field = vhcr->in_modifier; + port = vhcr->in_modifier; + proxy_qp = dev->phys_caps.base_proxy_sqpn + 8 * slave + port - 1; + + /* Set nic_info bit to mark new fields support */ + field = QUERY_FUNC_CAP_FLAGS1_NIC_INFO; + + if (mlx4_vf_smi_enabled(dev, slave, port) && + !mlx4_get_parav_qkey(dev, proxy_qp, &qkey)) { + field |= QUERY_FUNC_CAP_VF_ENABLE_QP0; + MLX4_PUT(outbox->buf, qkey, + QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET); + } + MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS1_OFFSET); + /* size is now the QP number */ - size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + field - 1; + size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + port - 1; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_TUNNEL); size += 2; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_TUNNEL); - size = dev->phys_caps.base_proxy_sqpn + 8 * slave + field - 1; - MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_PROXY); - - size += 2; - MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_PROXY); + MLX4_PUT(outbox->buf, proxy_qp, QUERY_FUNC_CAP_QP0_PROXY); + proxy_qp += 2; + MLX4_PUT(outbox->buf, proxy_qp, QUERY_FUNC_CAP_QP1_PROXY); MLX4_PUT(outbox->buf, dev->caps.phys_port_id[vhcr->in_modifier], QUERY_FUNC_CAP_PHYS_PORT_ID); @@ -326,7 +335,7 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, struct mlx4_cmd_mailbox *mailbox; u32 *outbox; u8 field, op_modifier; - u32 size; + u32 size, qkey; int err = 0, quotas = 0; op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */ @@ -442,6 +451,13 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, goto out; } + if (func_cap->flags1 & QUERY_FUNC_CAP_VF_ENABLE_QP0) { + MLX4_GET(qkey, outbox, QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET); + func_cap->qp0_qkey = qkey; + } else { + func_cap->qp0_qkey = 0; + } + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_TUNNEL); func_cap->qp0_tunnel_qpn = size & 0xFFFFFF; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 6811ee00ba7c..1fce03ebe5c4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -134,6 +134,7 @@ struct mlx4_func_cap { int max_eq; int reserved_eq; int mcg_quota; + u32 qp0_qkey; u32 qp0_tunnel_qpn; u32 qp0_proxy_qpn; u32 qp1_tunnel_qpn; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 12a7ee2e6098..908326876ab5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -666,13 +666,15 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) return -ENODEV; } + dev->caps.qp0_qkey = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL); dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || - !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) { + !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy || + !dev->caps.qp0_qkey) { err = -ENOMEM; goto err_mem; } @@ -684,6 +686,7 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) " port %d, aborting (%d).\n", i, err); goto err_mem; } + dev->caps.qp0_qkey[i - 1] = func_cap.qp0_qkey; dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn; dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn; dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn; @@ -729,12 +732,16 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) return 0; err_mem: + kfree(dev->caps.qp0_qkey); kfree(dev->caps.qp0_tunnel); kfree(dev->caps.qp0_proxy); kfree(dev->caps.qp1_tunnel); kfree(dev->caps.qp1_proxy); - dev->caps.qp0_tunnel = dev->caps.qp0_proxy = - dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL; + dev->caps.qp0_qkey = NULL; + dev->caps.qp0_tunnel = NULL; + dev->caps.qp0_proxy = NULL; + dev->caps.qp1_tunnel = NULL; + dev->caps.qp1_proxy = NULL; return err; } @@ -1697,6 +1704,7 @@ unmap_bf: unmap_bf_area(dev); if (mlx4_is_slave(dev)) { + kfree(dev->caps.qp0_qkey); kfree(dev->caps.qp0_tunnel); kfree(dev->caps.qp0_proxy); kfree(dev->caps.qp1_tunnel); @@ -2573,6 +2581,7 @@ err_master_mfunc: mlx4_multi_func_cleanup(dev); if (mlx4_is_slave(dev)) { + kfree(dev->caps.qp0_qkey); kfree(dev->caps.qp0_tunnel); kfree(dev->caps.qp0_proxy); kfree(dev->caps.qp1_tunnel); @@ -2702,6 +2711,7 @@ static void __mlx4_remove_one(struct pci_dev *pdev) if (!mlx4_is_slave(dev)) mlx4_free_ownership(dev); + kfree(dev->caps.qp0_qkey); kfree(dev->caps.qp0_tunnel); kfree(dev->caps.qp0_proxy); kfree(dev->caps.qp1_tunnel); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index f9c465101963..0efd1738fcb3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -133,6 +133,11 @@ enum { MLX4_COMM_CMD_FLR = 254 }; +enum { + MLX4_VF_SMI_DISABLED, + MLX4_VF_SMI_ENABLED +}; + /*The flag indicates that the slave should delay the RESET cmd*/ #define MLX4_DELAY_RESET_SLAVE 0xbbbbbbb /*indicates how many retries will be done if we are in the middle of FLR*/ @@ -488,6 +493,7 @@ struct mlx4_vport_state { struct mlx4_vf_admin_state { struct mlx4_vport_state vport[MLX4_MAX_PORTS + 1]; + u8 enable_smi[MLX4_MAX_PORTS + 1]; }; struct mlx4_vport_oper_state { @@ -495,8 +501,10 @@ struct mlx4_vport_oper_state { int mac_idx; int vlan_idx; }; + struct mlx4_vf_oper_state { struct mlx4_vport_oper_state vport[MLX4_MAX_PORTS + 1]; + u8 smi_enabled[MLX4_MAX_PORTS + 1]; }; struct slave_list { diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 1c3fdd4a1f7d..ad98162a8d79 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2827,10 +2827,12 @@ static int get_containing_mtt(struct mlx4_dev *dev, int slave, int start, } static int verify_qp_parameters(struct mlx4_dev *dev, + struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, enum qp_transition transition, u8 slave) { u32 qp_type; + u32 qpn; struct mlx4_qp_context *qp_ctx; enum mlx4_qp_optpar optpar; int port; @@ -2870,6 +2872,20 @@ static int verify_qp_parameters(struct mlx4_dev *dev, return -EINVAL; } break; + case MLX4_QP_ST_MLX: + qpn = vhcr->in_modifier & 0x7fffff; + port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; + if (transition == QP_TRANS_INIT2RTR && + slave != mlx4_master_func_num(dev) && + mlx4_is_qp_reserved(dev, qpn) && + !mlx4_vf_smi_enabled(dev, slave, port)) { + /* only enabled VFs may create MLX proxy QPs */ + mlx4_err(dev, "%s: unprivileged slave %d attempting to create an MLX proxy special QP on port %d\n", + __func__, slave, port); + return -EPERM; + } + break; + default: break; } @@ -3454,7 +3470,7 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, err = adjust_qp_sched_queue(dev, slave, qpc, inbox); if (err) return err; - err = verify_qp_parameters(dev, inbox, QP_TRANS_INIT2RTR, slave); + err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_INIT2RTR, slave); if (err) return err; @@ -3508,7 +3524,7 @@ int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, err = adjust_qp_sched_queue(dev, slave, context, inbox); if (err) return err; - err = verify_qp_parameters(dev, inbox, QP_TRANS_RTR2RTS, slave); + err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_RTR2RTS, slave); if (err) return err; @@ -3530,7 +3546,7 @@ int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, err = adjust_qp_sched_queue(dev, slave, context, inbox); if (err) return err; - err = verify_qp_parameters(dev, inbox, QP_TRANS_RTS2RTS, slave); + err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_RTS2RTS, slave); if (err) return err; @@ -3567,7 +3583,7 @@ int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave, err = adjust_qp_sched_queue(dev, slave, context, inbox); if (err) return err; - err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2SQD, slave); + err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_SQD2SQD, slave); if (err) return err; @@ -3589,7 +3605,7 @@ int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, err = adjust_qp_sched_queue(dev, slave, context, inbox); if (err) return err; - err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2RTS, slave); + err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_SQD2RTS, slave); if (err) return err; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 83612faa819c..e2fc7011314c 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -401,6 +401,7 @@ struct mlx4_caps { int max_rq_desc_sz; int max_qp_init_rdma; int max_qp_dest_rdma; + u32 *qp0_qkey; u32 *qp0_proxy; u32 *qp1_proxy; u32 *qp0_tunnel; -- cgit v1.2.3 From 65fed8a8c155271cf647651bd62eecb5928ae3a4 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 29 May 2014 16:31:04 +0300 Subject: IB/mlx4: Add interface for selecting VFs to enable QP0 via MLX proxy QPs This commit adds the sysfs interface for enabling QP0 on VFs for selected VF/port. By default, no VFs are enabled for QP0 operation. To enable QP0 operation on a VF/port, under /sys/class/infiniband/mlx4_x/iov//ports/x there are two new entries: - smi_enabled (read-only). Indicates whether smi is currently enabled for the indicated VF/port - enable_smi_admin (rw). Used by the admin to request that smi capability be enabled or disabled for the indicated VF/port. 0 = disable, 1 = enable. The requested enablement will occur at the next reset of the VF (e.g. driver restart on the VM which owns the VF). Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/sysfs.c | 105 ++++++++++++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx4/cmd.c | 34 ++++++++++ include/linux/mlx4/device.h | 3 + 3 files changed, 141 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index 5a38e43eca65..cb4c66e723b5 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -389,8 +389,10 @@ struct mlx4_port { struct mlx4_ib_dev *dev; struct attribute_group pkey_group; struct attribute_group gid_group; - u8 port_num; + struct device_attribute enable_smi_admin; + struct device_attribute smi_enabled; int slave; + u8 port_num; }; @@ -558,6 +560,101 @@ err: return NULL; } +static ssize_t sysfs_show_smi_enabled(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mlx4_port *p = + container_of(attr, struct mlx4_port, smi_enabled); + ssize_t len = 0; + + if (mlx4_vf_smi_enabled(p->dev->dev, p->slave, p->port_num)) + len = sprintf(buf, "%d\n", 1); + else + len = sprintf(buf, "%d\n", 0); + + return len; +} + +static ssize_t sysfs_show_enable_smi_admin(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlx4_port *p = + container_of(attr, struct mlx4_port, enable_smi_admin); + ssize_t len = 0; + + if (mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, p->port_num)) + len = sprintf(buf, "%d\n", 1); + else + len = sprintf(buf, "%d\n", 0); + + return len; +} + +static ssize_t sysfs_store_enable_smi_admin(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mlx4_port *p = + container_of(attr, struct mlx4_port, enable_smi_admin); + int enable; + + if (sscanf(buf, "%i", &enable) != 1 || + enable < 0 || enable > 1) + return -EINVAL; + + if (mlx4_vf_set_enable_smi_admin(p->dev->dev, p->slave, p->port_num, enable)) + return -EINVAL; + return count; +} + +static int add_vf_smi_entries(struct mlx4_port *p) +{ + int is_eth = rdma_port_get_link_layer(&p->dev->ib_dev, p->port_num) == + IB_LINK_LAYER_ETHERNET; + int ret; + + /* do not display entries if eth transport, or if master */ + if (is_eth || p->slave == mlx4_master_func_num(p->dev->dev)) + return 0; + + sysfs_attr_init(&p->smi_enabled.attr); + p->smi_enabled.show = sysfs_show_smi_enabled; + p->smi_enabled.store = NULL; + p->smi_enabled.attr.name = "smi_enabled"; + p->smi_enabled.attr.mode = 0444; + ret = sysfs_create_file(&p->kobj, &p->smi_enabled.attr); + if (ret) { + pr_err("failed to create smi_enabled\n"); + return ret; + } + + sysfs_attr_init(&p->enable_smi_admin.attr); + p->enable_smi_admin.show = sysfs_show_enable_smi_admin; + p->enable_smi_admin.store = sysfs_store_enable_smi_admin; + p->enable_smi_admin.attr.name = "enable_smi_admin"; + p->enable_smi_admin.attr.mode = 0644; + ret = sysfs_create_file(&p->kobj, &p->enable_smi_admin.attr); + if (ret) { + pr_err("failed to create enable_smi_admin\n"); + sysfs_remove_file(&p->kobj, &p->smi_enabled.attr); + return ret; + } + return 0; +} + +static void remove_vf_smi_entries(struct mlx4_port *p) +{ + int is_eth = rdma_port_get_link_layer(&p->dev->ib_dev, p->port_num) == + IB_LINK_LAYER_ETHERNET; + + if (is_eth || p->slave == mlx4_master_func_num(p->dev->dev)) + return; + + sysfs_remove_file(&p->kobj, &p->smi_enabled.attr); + sysfs_remove_file(&p->kobj, &p->enable_smi_admin.attr); +} + static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave) { struct mlx4_port *p; @@ -602,6 +699,10 @@ static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave) if (ret) goto err_free_gid; + ret = add_vf_smi_entries(p); + if (ret) + goto err_free_gid; + list_add_tail(&p->kobj.entry, &dev->pkeys.pkey_port_list[slave]); return 0; @@ -669,6 +770,7 @@ err_add: mport = container_of(p, struct mlx4_port, kobj); sysfs_remove_group(p, &mport->pkey_group); sysfs_remove_group(p, &mport->gid_group); + remove_vf_smi_entries(mport); kobject_put(p); } kobject_put(dev->dev_ports_parent[slave]); @@ -713,6 +815,7 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device) port = container_of(p, struct mlx4_port, kobj); sysfs_remove_group(p, &port->pkey_group); sysfs_remove_group(p, &port->gid_group); + remove_vf_smi_entries(port); kobject_put(p); kobject_put(device->dev_ports_parent[slave]); } diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 26c3ebaa49d1..3370ecb8c3d2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2567,3 +2567,37 @@ int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port) MLX4_VF_SMI_ENABLED; } EXPORT_SYMBOL_GPL(mlx4_vf_smi_enabled); + +int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + if (slave == mlx4_master_func_num(dev)) + return 1; + + if (slave < 1 || slave >= dev->num_slaves || + port < 1 || port > MLX4_MAX_PORTS) + return 0; + + return priv->mfunc.master.vf_admin[slave].enable_smi[port] == + MLX4_VF_SMI_ENABLED; +} +EXPORT_SYMBOL_GPL(mlx4_vf_get_enable_smi_admin); + +int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port, + int enabled) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + if (slave == mlx4_master_func_num(dev)) + return 0; + + if (slave < 1 || slave >= dev->num_slaves || + port < 1 || port > MLX4_MAX_PORTS || + enabled < 0 || enabled > 1) + return -EINVAL; + + priv->mfunc.master.vf_admin[slave].enable_smi[port] = enabled; + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_vf_set_enable_smi_admin); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index e2fc7011314c..dcabe11f37f7 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1236,4 +1236,7 @@ int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port); int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port); int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port); +int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port); +int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port, + int enable); #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3 From b6f04d3d21458818073a2f5af5339f958864bf71 Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Mon, 5 May 2014 19:33:23 +0200 Subject: RDMA/cxgb4: Add missing padding at end of struct c4iw_create_cq_resp The i386 ABI disagrees with most other ABIs regarding alignment of data types larger than 4 bytes: on most ABIs a padding must be added at end of the structures, while it is not required on i386. So for most ABI struct c4iw_create_cq_resp gets implicitly padded to be aligned on a 8 bytes multiple, while for i386, such padding is not added. The tool pahole can be used to find such implicit padding: $ pahole --anon_include \ --nested_anon_include \ --recursive \ --class_name c4iw_create_cq_resp \ drivers/infiniband/hw/cxgb4/iw_cxgb4.o Then, structure layout can be compared between i386 and x86_64: +++ obj-i386/drivers/infiniband/hw/cxgb4/iw_cxgb4.o.pahole.txt 2014-03-28 11:43:05.547432195 +0100 --- obj-x86_64/drivers/infiniband/hw/cxgb4/iw_cxgb4.o.pahole.txt 2014-03-28 10:55:10.990133017 +0100 @@ -14,9 +13,8 @@ struct c4iw_create_cq_resp { __u32 size; /* 28 4 */ __u32 qid_mask; /* 32 4 */ - /* size: 36, cachelines: 1, members: 6 */ - /* last cacheline: 36 bytes */ + /* size: 40, cachelines: 1, members: 6 */ + /* padding: 4 */ + /* last cacheline: 40 bytes */ }; This ABI disagreement will make an x86_64 kernel try to write past the buffer provided by an i386 binary. When boundary check will be implemented, the x86_64 kernel will refuse to write past the i386 userspace provided buffer and the uverbs will fail. If the structure is on a page boundary and the next page is not mapped, ib_copy_to_udata() will fail and the uverb will fail. This patch adds an explicit padding at end of structure c4iw_create_cq_resp, and, like 92b0ca7cb149 ("IB/mlx5: Fix stack info leak in mlx5_ib_alloc_ucontext()"), makes function c4iw_create_cq() not writting this padding field to userspace. This way, x86_64 kernel will be able to write struct c4iw_create_cq_resp as expected by unpatched and patched i386 libcxgb4. Link: http://marc.info/?i=cover.1399309513.git.ydroneaud@opteya.com Cc: Fixes: cfdda9d764362 ("RDMA/cxgb4: Add driver for Chelsio T4 RNIC") Fixes: e24a72a3302a6 ("RDMA/cxgb4: Fix four byte info leak in c4iw_create_cq()") Cc: Dan Carpenter Signed-off-by: Yann Droneaud Acked-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cq.c | 4 ++-- drivers/infiniband/hw/cxgb4/user.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index cfaa56ada189..7151a02b4ebb 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -940,7 +940,6 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, if (!mm2) goto err4; - memset(&uresp, 0, sizeof(uresp)); uresp.qid_mask = rhp->rdev.cqmask; uresp.cqid = chp->cq.cqid; uresp.size = chp->cq.size; @@ -951,7 +950,8 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, uresp.gts_key = ucontext->key; ucontext->key += PAGE_SIZE; spin_unlock(&ucontext->mmap_lock); - ret = ib_copy_to_udata(udata, &uresp, sizeof uresp); + ret = ib_copy_to_udata(udata, &uresp, + sizeof(uresp) - sizeof(uresp.reserved)); if (ret) goto err5; diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h index 11ccd276e5d9..9b7534b5f07d 100644 --- a/drivers/infiniband/hw/cxgb4/user.h +++ b/drivers/infiniband/hw/cxgb4/user.h @@ -48,6 +48,7 @@ struct c4iw_create_cq_resp { __u32 cqid; __u32 size; __u32 qid_mask; + __u32 reserved; /* explicit padding (optional for i386) */ }; -- cgit v1.2.3 From 729ee4efcc29c040a1729c058f03fae3cebc6035 Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Thu, 27 Mar 2014 12:10:33 +0100 Subject: IB: Allow build of hw/ and ulp/ subdirectories independently It is not possible to build only the drivers/infiniband/hw/ (or ulp/) subdirectory with command such as: $ make ARCH=x86_64 O=./obj-x86_64/ drivers/infiniband/hw/ This fails with following error messages: make[2]: Nothing to be done for `all'. make[2]: Nothing to be done for `relocs'. CHK include/config/kernel.release Using /home/ydroneaud/src/linux as source for kernel GEN /home/ydroneaud/src/linux/obj-x86_64/Makefile CHK include/generated/uapi/linux/version.h CHK include/generated/utsrelease.h CALL /home/ydroneaud/src/linux/scripts/checksyscalls.sh /home/ydroneaud/src/linux/scripts/Makefile.build:44: /home/ydroneaud/src/linux/drivers/infiniband/hw/Makefile: No such file or directory make[2]: *** No rule to make target `/home/ydroneaud/src/linux/drivers/infiniband/hw/Makefile'. Stop. make[1]: *** [drivers/infiniband/hw/] Error 2 make: *** [sub-make] Error 2 This patch creates a Makefile in hw/ and ulp/ and moves each corresponding parts of drivers/infiniband/Makefile in the new Makefiles. It should not break build except if some hw/ drivers or ulp/ were allowed previously to be built while CONFIG_INFINIBAND is set to 'n', but according to drivers/infiniband/Kconfig, it's not possible. So it should be safe to apply. Signed-off-by: Yann Droneaud Reviewed-by: Bart Van Assche Signed-off-by: Roland Dreier --- drivers/infiniband/Makefile | 19 ++----------------- drivers/infiniband/hw/Makefile | 12 ++++++++++++ drivers/infiniband/ulp/Makefile | 5 +++++ 3 files changed, 19 insertions(+), 17 deletions(-) create mode 100644 drivers/infiniband/hw/Makefile create mode 100644 drivers/infiniband/ulp/Makefile (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile index bf508b5550c4..dc21836b5a8d 100644 --- a/drivers/infiniband/Makefile +++ b/drivers/infiniband/Makefile @@ -1,18 +1,3 @@ obj-$(CONFIG_INFINIBAND) += core/ -obj-$(CONFIG_INFINIBAND_MTHCA) += hw/mthca/ -obj-$(CONFIG_INFINIBAND_IPATH) += hw/ipath/ -obj-$(CONFIG_INFINIBAND_QIB) += hw/qib/ -obj-$(CONFIG_INFINIBAND_EHCA) += hw/ehca/ -obj-$(CONFIG_INFINIBAND_AMSO1100) += hw/amso1100/ -obj-$(CONFIG_INFINIBAND_CXGB3) += hw/cxgb3/ -obj-$(CONFIG_INFINIBAND_CXGB4) += hw/cxgb4/ -obj-$(CONFIG_MLX4_INFINIBAND) += hw/mlx4/ -obj-$(CONFIG_MLX5_INFINIBAND) += hw/mlx5/ -obj-$(CONFIG_INFINIBAND_NES) += hw/nes/ -obj-$(CONFIG_INFINIBAND_OCRDMA) += hw/ocrdma/ -obj-$(CONFIG_INFINIBAND_USNIC) += hw/usnic/ -obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ -obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/ -obj-$(CONFIG_INFINIBAND_SRPT) += ulp/srpt/ -obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/ -obj-$(CONFIG_INFINIBAND_ISERT) += ulp/isert/ +obj-$(CONFIG_INFINIBAND) += hw/ +obj-$(CONFIG_INFINIBAND) += ulp/ diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile new file mode 100644 index 000000000000..e900b03531a9 --- /dev/null +++ b/drivers/infiniband/hw/Makefile @@ -0,0 +1,12 @@ +obj-$(CONFIG_INFINIBAND_MTHCA) += mthca/ +obj-$(CONFIG_INFINIBAND_IPATH) += ipath/ +obj-$(CONFIG_INFINIBAND_QIB) += qib/ +obj-$(CONFIG_INFINIBAND_EHCA) += ehca/ +obj-$(CONFIG_INFINIBAND_AMSO1100) += amso1100/ +obj-$(CONFIG_INFINIBAND_CXGB3) += cxgb3/ +obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/ +obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/ +obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/ +obj-$(CONFIG_INFINIBAND_NES) += nes/ +obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/ +obj-$(CONFIG_INFINIBAND_USNIC) += usnic/ diff --git a/drivers/infiniband/ulp/Makefile b/drivers/infiniband/ulp/Makefile new file mode 100644 index 000000000000..f3c7dcf03098 --- /dev/null +++ b/drivers/infiniband/ulp/Makefile @@ -0,0 +1,5 @@ +obj-$(CONFIG_INFINIBAND_IPOIB) += ipoib/ +obj-$(CONFIG_INFINIBAND_SRP) += srp/ +obj-$(CONFIG_INFINIBAND_SRPT) += srpt/ +obj-$(CONFIG_INFINIBAND_ISER) += iser/ +obj-$(CONFIG_INFINIBAND_ISERT) += isert/ -- cgit v1.2.3 From 60093dc0c8b6407bc7494cbcb3e84322cc6782c8 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 11 May 2014 15:15:10 +0300 Subject: IB: Return error for unsupported QP creation flags Fix the usnic and thw qib drivers to err when QP creation flags that they don't understand are provided. Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_qp.c | 3 ++- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 0cad0c40d742..7fcc150d603c 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -985,7 +985,8 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, struct ib_qp *ret; if (init_attr->cap.max_send_sge > ib_qib_max_sges || - init_attr->cap.max_send_wr > ib_qib_max_qp_wrs) { + init_attr->cap.max_send_wr > ib_qib_max_qp_wrs || + init_attr->create_flags) { ret = ERR_PTR(-EINVAL); goto bail; } diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index d48d2c0a2e3c..53bd6a2d9cdb 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -466,6 +466,9 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, ucontext = to_uucontext(pd->uobject->context); us_ibdev = to_usdev(pd->device); + if (init_attr->create_flags) + return ERR_PTR(-EINVAL); + err = ib_copy_from_udata(&cmd, udata, sizeof(cmd)); if (err) { usnic_err("%s: cannot copy udata for create_qp\n", -- cgit v1.2.3 From 40f2287bd583f4df4c602c1a29a48df2730fb6d4 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Sun, 11 May 2014 15:15:12 +0300 Subject: IB/mlx4: Implement IB_QP_CREATE_USE_GFP_NOIO Modify the various routines used to allocate memory resources which serve QPs in mlx4 to get an input GFP directive. Have the Ethernet driver to use GFP_KERNEL in it's QP allocations as done prior to this commit, and the IB driver to use GFP_NOIO when the IB verbs IB_QP_CREATE_USE_GFP_NOIO QP creation flag is provided. Signed-off-by: Mel Gorman Signed-off-by: Jiri Kosina Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/cq.c | 6 ++--- drivers/infiniband/hw/mlx4/mlx4_ib.h | 1 + drivers/infiniband/hw/mlx4/qp.c | 30 ++++++++++++---------- drivers/infiniband/hw/mlx4/srq.c | 7 ++--- drivers/net/ethernet/mellanox/mlx4/alloc.c | 27 +++++++++---------- drivers/net/ethernet/mellanox/mlx4/cq.c | 4 +-- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 6 ++--- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 2 +- drivers/net/ethernet/mellanox/mlx4/icm.c | 7 ++--- drivers/net/ethernet/mellanox/mlx4/icm.h | 3 ++- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 4 +-- drivers/net/ethernet/mellanox/mlx4/mr.c | 17 ++++++------ drivers/net/ethernet/mellanox/mlx4/qp.c | 20 +++++++-------- .../net/ethernet/mellanox/mlx4/resource_tracker.c | 4 +-- drivers/net/ethernet/mellanox/mlx4/srq.c | 4 +-- include/linux/mlx4/device.h | 10 +++++--- 16 files changed, 82 insertions(+), 70 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 5f640814cc81..1066eec854a9 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -102,7 +102,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf * int err; err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size, - PAGE_SIZE * 2, &buf->buf); + PAGE_SIZE * 2, &buf->buf, GFP_KERNEL); if (err) goto out; @@ -113,7 +113,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf * if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf); + err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf, GFP_KERNEL); if (err) goto err_mtt; @@ -209,7 +209,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector uar = &to_mucontext(context)->uar; } else { - err = mlx4_db_alloc(dev->dev, &cq->db, 1); + err = mlx4_db_alloc(dev->dev, &cq->db, 1, GFP_KERNEL); if (err) goto err_cq; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index f589522fddfd..bb8c9dd442ae 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -156,6 +156,7 @@ enum mlx4_ib_qp_flags { MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP, + MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO, MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30, MLX4_IB_SRIOV_SQP = 1 << 31, }; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 41308af4163c..8710baf60bb9 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -610,7 +610,8 @@ static int qp_has_rq(struct ib_qp_init_attr *attr) static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, - struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp) + struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp, + gfp_t gfp) { int qpn; int err; @@ -748,14 +749,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err; if (qp_has_rq(init_attr)) { - err = mlx4_db_alloc(dev->dev, &qp->db, 0); + err = mlx4_db_alloc(dev->dev, &qp->db, 0, gfp); if (err) goto err; *qp->db.db = 0; } - if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) { + if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf, gfp)) { err = -ENOMEM; goto err_db; } @@ -765,13 +766,12 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf); + err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf, gfp); if (err) goto err_mtt; - qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL); - qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL); - + qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp); + qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp); if (!qp->sq.wrid || !qp->rq.wrid) { err = -ENOMEM; goto err_wrid; @@ -801,7 +801,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err_proxy; } - err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp); + err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp); if (err) goto err_qpn; @@ -1040,7 +1040,10 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp = NULL; int err; u16 xrcdn = 0; + gfp_t gfp; + gfp = (init_attr->create_flags & MLX4_IB_QP_CREATE_USE_GFP_NOIO) ? + GFP_NOIO : GFP_KERNEL; /* * We only support LSO, vendor flag1, and multicast loopback blocking, * and only for kernel UD QPs. @@ -1049,7 +1052,8 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK | MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP | - MLX4_IB_QP_NETIF)) + MLX4_IB_QP_NETIF | + MLX4_IB_QP_CREATE_USE_GFP_NOIO)) return ERR_PTR(-EINVAL); if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) { @@ -1059,7 +1063,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, if (init_attr->create_flags && (udata || - ((init_attr->create_flags & ~MLX4_IB_SRIOV_SQP) && + ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | MLX4_IB_QP_CREATE_USE_GFP_NOIO)) && init_attr->qp_type != IB_QPT_UD) || ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) && init_attr->qp_type > IB_QPT_GSI))) @@ -1079,7 +1083,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, case IB_QPT_RC: case IB_QPT_UC: case IB_QPT_RAW_PACKET: - qp = kzalloc(sizeof *qp, GFP_KERNEL); + qp = kzalloc(sizeof *qp, gfp); if (!qp) return ERR_PTR(-ENOMEM); qp->pri.vid = 0xFFFF; @@ -1088,7 +1092,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, case IB_QPT_UD: { err = create_qp_common(to_mdev(pd->device), pd, init_attr, - udata, 0, &qp); + udata, 0, &qp, gfp); if (err) return ERR_PTR(err); @@ -1106,7 +1110,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, get_sqp_num(to_mdev(pd->device), init_attr), - &qp); + &qp, gfp); if (err) return ERR_PTR(err); diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 60c5fb025fc7..62d9285300af 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -134,13 +134,14 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, if (err) goto err_mtt; } else { - err = mlx4_db_alloc(dev->dev, &srq->db, 0); + err = mlx4_db_alloc(dev->dev, &srq->db, 0, GFP_KERNEL); if (err) goto err_srq; *srq->db.db = 0; - if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf)) { + if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf, + GFP_KERNEL)) { err = -ENOMEM; goto err_db; } @@ -165,7 +166,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf); + err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf, GFP_KERNEL); if (err) goto err_mtt; diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c index c3ad464d0627..b0297da50304 100644 --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c @@ -171,7 +171,7 @@ void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap) */ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, - struct mlx4_buf *buf) + struct mlx4_buf *buf, gfp_t gfp) { dma_addr_t t; @@ -180,7 +180,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, buf->npages = 1; buf->page_shift = get_order(size) + PAGE_SHIFT; buf->direct.buf = dma_alloc_coherent(&dev->pdev->dev, - size, &t, GFP_KERNEL); + size, &t, gfp); if (!buf->direct.buf) return -ENOMEM; @@ -200,14 +200,14 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, buf->npages = buf->nbufs; buf->page_shift = PAGE_SHIFT; buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list), - GFP_KERNEL); + gfp); if (!buf->page_list) return -ENOMEM; for (i = 0; i < buf->nbufs; ++i) { buf->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, - &t, GFP_KERNEL); + &t, gfp); if (!buf->page_list[i].buf) goto err_free; @@ -218,7 +218,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, if (BITS_PER_LONG == 64) { struct page **pages; - pages = kmalloc(sizeof *pages * buf->nbufs, GFP_KERNEL); + pages = kmalloc(sizeof *pages * buf->nbufs, gfp); if (!pages) goto err_free; for (i = 0; i < buf->nbufs; ++i) @@ -260,11 +260,12 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf) } EXPORT_SYMBOL_GPL(mlx4_buf_free); -static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device) +static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device, + gfp_t gfp) { struct mlx4_db_pgdir *pgdir; - pgdir = kzalloc(sizeof *pgdir, GFP_KERNEL); + pgdir = kzalloc(sizeof *pgdir, gfp); if (!pgdir) return NULL; @@ -272,7 +273,7 @@ static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device) pgdir->bits[0] = pgdir->order0; pgdir->bits[1] = pgdir->order1; pgdir->db_page = dma_alloc_coherent(dma_device, PAGE_SIZE, - &pgdir->db_dma, GFP_KERNEL); + &pgdir->db_dma, gfp); if (!pgdir->db_page) { kfree(pgdir); return NULL; @@ -312,7 +313,7 @@ found: return 0; } -int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order) +int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, gfp_t gfp) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_db_pgdir *pgdir; @@ -324,7 +325,7 @@ int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order) if (!mlx4_alloc_db_from_pgdir(pgdir, db, order)) goto out; - pgdir = mlx4_alloc_db_pgdir(&(dev->pdev->dev)); + pgdir = mlx4_alloc_db_pgdir(&(dev->pdev->dev), gfp); if (!pgdir) { ret = -ENOMEM; goto out; @@ -376,13 +377,13 @@ int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, { int err; - err = mlx4_db_alloc(dev, &wqres->db, 1); + err = mlx4_db_alloc(dev, &wqres->db, 1, GFP_KERNEL); if (err) return err; *wqres->db.db = 0; - err = mlx4_buf_alloc(dev, size, max_direct, &wqres->buf); + err = mlx4_buf_alloc(dev, size, max_direct, &wqres->buf, GFP_KERNEL); if (err) goto err_db; @@ -391,7 +392,7 @@ int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf); + err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf, GFP_KERNEL); if (err) goto err_mtt; diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index 0487121e4a0f..c90cde5b4aee 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -173,11 +173,11 @@ int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn) if (*cqn == -1) return -ENOMEM; - err = mlx4_table_get(dev, &cq_table->table, *cqn); + err = mlx4_table_get(dev, &cq_table->table, *cqn, GFP_KERNEL); if (err) goto err_out; - err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn); + err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn, GFP_KERNEL); if (err) goto err_put; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index ba049ae88749..87857a6463eb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -972,7 +972,7 @@ static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn, if (!context) return -ENOMEM; - err = mlx4_qp_alloc(mdev->dev, qpn, qp); + err = mlx4_qp_alloc(mdev->dev, qpn, qp, GFP_KERNEL); if (err) { en_err(priv, "Failed to allocate qp #%x\n", qpn); goto out; @@ -1012,7 +1012,7 @@ int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv) en_err(priv, "Failed reserving drop qpn\n"); return err; } - err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp); + err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp, GFP_KERNEL); if (err) { en_err(priv, "Failed allocating drop qp\n"); mlx4_qp_release_range(priv->mdev->dev, qpn, 1); @@ -1071,7 +1071,7 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) } /* Configure RSS indirection qp */ - err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp); + err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp, GFP_KERNEL); if (err) { en_err(priv, "Failed to allocate RSS indirection QP\n"); goto rss_err; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index dd1f6d346459..bc0cc1eb214d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -113,7 +113,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, ring->buf_size, (unsigned long long) ring->wqres.buf.direct.map); ring->qpn = qpn; - err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp); + err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp, GFP_KERNEL); if (err) { en_err(priv, "Failed allocating qp %d\n", ring->qpn); goto err_map; diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c index 5fbf4924c272..eb1747e1937d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.c +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -245,7 +245,8 @@ int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev) MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); } -int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj) +int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj, + int gfp) { u32 i = (obj & (table->num_obj - 1)) / (MLX4_TABLE_CHUNK_SIZE / table->obj_size); @@ -259,7 +260,7 @@ int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj) } table->icm[i] = mlx4_alloc_icm(dev, MLX4_TABLE_CHUNK_SIZE >> PAGE_SHIFT, - (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) | + (table->lowmem ? gfp : GFP_HIGHUSER) | __GFP_NOWARN, table->coherent); if (!table->icm[i]) { ret = -ENOMEM; @@ -356,7 +357,7 @@ int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 i; for (i = start; i <= end; i += inc) { - err = mlx4_table_get(dev, table, i); + err = mlx4_table_get(dev, table, i, GFP_KERNEL); if (err) goto fail; } diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.h b/drivers/net/ethernet/mellanox/mlx4/icm.h index dee67fa39107..067e6e0af36c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.h +++ b/drivers/net/ethernet/mellanox/mlx4/icm.h @@ -71,7 +71,8 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, gfp_t gfp_mask, int coherent); void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent); -int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj); +int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj, + int gfp); void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj); int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 start, u32 end); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index f9c465101963..627a54ef2955 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -888,7 +888,7 @@ void mlx4_cleanup_cq_table(struct mlx4_dev *dev); void mlx4_cleanup_qp_table(struct mlx4_dev *dev); void mlx4_cleanup_srq_table(struct mlx4_dev *dev); void mlx4_cleanup_mcg_table(struct mlx4_dev *dev); -int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn); +int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp); void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn); int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn); void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn); @@ -896,7 +896,7 @@ int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn); void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn); int __mlx4_mpt_reserve(struct mlx4_dev *dev); void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index); -int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index); +int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp); void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index); u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order); void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 first_seg, int order); diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 24835853b753..4c71dafad217 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -364,14 +364,14 @@ static void mlx4_mpt_release(struct mlx4_dev *dev, u32 index) __mlx4_mpt_release(dev, index); } -int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index) +int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp) { struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table; - return mlx4_table_get(dev, &mr_table->dmpt_table, index); + return mlx4_table_get(dev, &mr_table->dmpt_table, index, gfp); } -static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index) +static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp) { u64 param = 0; @@ -382,7 +382,7 @@ static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index) MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } - return __mlx4_mpt_alloc_icm(dev, index); + return __mlx4_mpt_alloc_icm(dev, index, gfp); } void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index) @@ -469,7 +469,7 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr) struct mlx4_mpt_entry *mpt_entry; int err; - err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key)); + err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key), GFP_KERNEL); if (err) return err; @@ -627,13 +627,14 @@ int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, EXPORT_SYMBOL_GPL(mlx4_write_mtt); int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, - struct mlx4_buf *buf) + struct mlx4_buf *buf, gfp_t gfp) { u64 *page_list; int err; int i; - page_list = kmalloc(buf->npages * sizeof *page_list, GFP_KERNEL); + page_list = kmalloc(buf->npages * sizeof *page_list, + gfp); if (!page_list) return -ENOMEM; @@ -680,7 +681,7 @@ int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw) struct mlx4_mpt_entry *mpt_entry; int err; - err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key)); + err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key), GFP_KERNEL); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 61d64ebffd56..917f0d0ba7c6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -272,29 +272,29 @@ void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt) } EXPORT_SYMBOL_GPL(mlx4_qp_release_range); -int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn) +int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_qp_table *qp_table = &priv->qp_table; int err; - err = mlx4_table_get(dev, &qp_table->qp_table, qpn); + err = mlx4_table_get(dev, &qp_table->qp_table, qpn, gfp); if (err) goto err_out; - err = mlx4_table_get(dev, &qp_table->auxc_table, qpn); + err = mlx4_table_get(dev, &qp_table->auxc_table, qpn, gfp); if (err) goto err_put_qp; - err = mlx4_table_get(dev, &qp_table->altc_table, qpn); + err = mlx4_table_get(dev, &qp_table->altc_table, qpn, gfp); if (err) goto err_put_auxc; - err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn); + err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn, gfp); if (err) goto err_put_altc; - err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn); + err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn, gfp); if (err) goto err_put_rdmarc; @@ -316,7 +316,7 @@ err_out: return err; } -static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn) +static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, int gfp) { u64 param = 0; @@ -326,7 +326,7 @@ static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn) MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } - return __mlx4_qp_alloc_icm(dev, qpn); + return __mlx4_qp_alloc_icm(dev, qpn, gfp); } void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn) @@ -355,7 +355,7 @@ static void mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn) __mlx4_qp_free_icm(dev, qpn); } -int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp) +int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, gfp_t gfp) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_qp_table *qp_table = &priv->qp_table; @@ -366,7 +366,7 @@ int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp) qp->qpn = qpn; - err = mlx4_qp_alloc_icm(dev, qpn); + err = mlx4_qp_alloc_icm(dev, qpn, gfp); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 1c3fdd4a1f7d..45da913b5679 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -1532,7 +1532,7 @@ static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, return err; if (!fw_reserved(dev, qpn)) { - err = __mlx4_qp_alloc_icm(dev, qpn); + err = __mlx4_qp_alloc_icm(dev, qpn, GFP_KERNEL); if (err) { res_abort_move(dev, slave, RES_QP, qpn); return err; @@ -1619,7 +1619,7 @@ static int mpt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (err) return err; - err = __mlx4_mpt_alloc_icm(dev, mpt->key); + err = __mlx4_mpt_alloc_icm(dev, mpt->key, GFP_KERNEL); if (err) { res_abort_move(dev, slave, RES_MPT, id); return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c index 98faf870b0b0..67146624eb58 100644 --- a/drivers/net/ethernet/mellanox/mlx4/srq.c +++ b/drivers/net/ethernet/mellanox/mlx4/srq.c @@ -103,11 +103,11 @@ int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn) if (*srqn == -1) return -ENOMEM; - err = mlx4_table_get(dev, &srq_table->table, *srqn); + err = mlx4_table_get(dev, &srq_table->table, *srqn, GFP_KERNEL); if (err) goto err_out; - err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn); + err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn, GFP_KERNEL); if (err) goto err_put; return 0; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index ba87bd21295a..be60b002bb37 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -837,7 +837,7 @@ static inline int mlx4_is_slave(struct mlx4_dev *dev) } int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, - struct mlx4_buf *buf); + struct mlx4_buf *buf, gfp_t gfp); void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf); static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset) { @@ -874,9 +874,10 @@ int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw); int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int start_index, int npages, u64 *page_list); int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, - struct mlx4_buf *buf); + struct mlx4_buf *buf, gfp_t gfp); -int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order); +int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, + gfp_t gfp); void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db); int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, @@ -892,7 +893,8 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq); int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base); void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt); -int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp); +int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, + gfp_t gfp); void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp); int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcdn, -- cgit v1.2.3 From b7dfa8895f64ffa371d0ed09c1d1ba8c6e19b956 Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Mon, 5 May 2014 19:35:26 +0200 Subject: RDMA/cxgb4: add missing padding at end of struct c4iw_alloc_ucontext_resp The i386 ABI disagrees with most other ABIs regarding alignment of data types larger than 4 bytes: on most ABIs a padding must be added at end of the structures, while it is not required on i386. So for most ABI struct c4iw_alloc_ucontext_resp gets implicitly padded to be aligned on a 8 bytes multiple, while for i386, such padding is not added. The tool pahole can be used to find such implicit padding: $ pahole --anon_include \ --nested_anon_include \ --recursive \ --class_name c4iw_alloc_ucontext_resp \ drivers/infiniband/hw/cxgb4/iw_cxgb4.o Then, structure layout can be compared between i386 and x86_64: +++ obj-i386/drivers/infiniband/hw/cxgb4/iw_cxgb4.o.pahole.txt 2014-03-28 11:43:05.547432195 +0100 --- obj-x86_64/drivers/infiniband/hw/cxgb4/iw_cxgb4.o.pahole.txt 2014-03-28 10:55:10.990133017 +0100 @@ -2,9 +2,8 @@ struct c4iw_alloc_ucontext_resp { __u64 status_page_key; /* 0 8 */ __u32 status_page_size; /* 8 4 */ - /* size: 12, cachelines: 1, members: 2 */ - /* last cacheline: 12 bytes */ + /* size: 16, cachelines: 1, members: 2 */ + /* padding: 4 */ + /* last cacheline: 16 bytes */ }; This ABI disagreement will make an x86_64 kernel try to write past the buffer provided by an i386 binary. When boundary check will be implemented, the x86_64 kernel will refuse to write past the i386 userspace provided buffer and the uverbs will fail. If the structure is on a page boundary and the next page is not mapped, ib_copy_to_udata() will fail and the uverb will fail. Additionally, as reported by Dan Carpenter, without the implicit padding being properly cleared, an information leak would take place in most architectures. This patch adds an explicit padding to struct c4iw_alloc_ucontext_resp, and, like 92b0ca7cb149 ("IB/mlx5: Fix stack info leak in mlx5_ib_alloc_ucontext()"), makes function c4iw_alloc_ucontext() not writting this padding field to userspace. This way, x86_64 kernel will be able to write struct c4iw_alloc_ucontext_resp as expected by unpatched and patched i386 libcxgb4. Link: http://marc.info/?i=cover.1399309513.git.ydroneaud@opteya.com Link: http://marc.info/?i=1395848977.3297.15.camel@localhost.localdomain Link: http://marc.info/?i=20140328082428.GH25192@mwanda Cc: Fixes: 05eb23893c2c ("cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes") Reported-by: Yann Droneaud Reported-by: Dan Carpenter Signed-off-by: Yann Droneaud Acked-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/provider.c | 5 +++-- drivers/infiniband/hw/cxgb4/user.h | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index a94a3e12c349..c777e22bd8d5 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -122,7 +122,7 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev, INIT_LIST_HEAD(&context->mmaps); spin_lock_init(&context->mmap_lock); - if (udata->outlen < sizeof(uresp)) { + if (udata->outlen < sizeof(uresp) - sizeof(uresp.reserved)) { if (!warned++) pr_err(MOD "Warning - downlevel libcxgb4 (non-fatal), device status page disabled."); rhp->rdev.flags |= T4_STATUS_PAGE_DISABLED; @@ -140,7 +140,8 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev, context->key += PAGE_SIZE; spin_unlock(&context->mmap_lock); - ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + ret = ib_copy_to_udata(udata, &uresp, + sizeof(uresp) - sizeof(uresp.reserved)); if (ret) goto err_mm; diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h index 9b7534b5f07d..cbd0ce170728 100644 --- a/drivers/infiniband/hw/cxgb4/user.h +++ b/drivers/infiniband/hw/cxgb4/user.h @@ -75,5 +75,6 @@ struct c4iw_create_qp_resp { struct c4iw_alloc_ucontext_resp { __u64 status_page_key; __u32 status_page_size; + __u32 reserved; /* explicit padding (optional for i386) */ }; #endif -- cgit v1.2.3 From 6fcd8d0d93fb2f38807371bb144f3f869d4ca5a2 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Mon, 9 Jun 2014 16:36:33 +0200 Subject: IB/mlx4: Fix gfp passing in create_qp_common() There are two kzalloc() calls which were not converted to use value of gfp passed to create_qp_common() instead of using hardcoded GFP_KERNEL in 40f2287bd583 ("IB/mlx4: Implement IB_QP_CREATE_USE_GFP_NOIO"). Fix this by passing gfp value down properly. Reported-by: Dan Carpenter Signed-off-by: Jiri Kosina Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 8710baf60bb9..a0bfda275647 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -659,14 +659,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI || (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) { - sqp = kzalloc(sizeof (struct mlx4_ib_sqp), GFP_KERNEL); + sqp = kzalloc(sizeof (struct mlx4_ib_sqp), gfp); if (!sqp) return -ENOMEM; qp = &sqp->qp; qp->pri.vid = 0xFFFF; qp->alt.vid = 0xFFFF; } else { - qp = kzalloc(sizeof (struct mlx4_ib_qp), GFP_KERNEL); + qp = kzalloc(sizeof (struct mlx4_ib_qp), gfp); if (!qp) return -ENOMEM; qp->pri.vid = 0xFFFF; -- cgit v1.2.3 From 5647263cb136a2795b67b9ce46f2debb653e3373 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Wed, 26 Mar 2014 17:07:57 -0500 Subject: RDMA/nes: Add support for iWARP Port Mapper user space service Signed-off-by: Tatyana Nikolova Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes.c | 25 ++- drivers/infiniband/hw/nes/nes.h | 3 + drivers/infiniband/hw/nes/nes_cm.c | 320 ++++++++++++++++++++++++++++++------- drivers/infiniband/hw/nes/nes_cm.h | 12 +- 4 files changed, 296 insertions(+), 64 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 353c7b05a90a..3b2a6dc8ea99 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -68,7 +68,6 @@ MODULE_VERSION(DRV_VERSION); int max_mtu = 9000; int interrupt_mod_interval = 0; - /* Interoperability */ int mpa_version = 1; module_param(mpa_version, int, 0644); @@ -112,6 +111,16 @@ static struct pci_device_id nes_pci_table[] = { MODULE_DEVICE_TABLE(pci, nes_pci_table); +/* registered nes netlink callbacks */ +static struct ibnl_client_cbs nes_nl_cb_table[] = { + [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, + [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, + [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, + [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, + [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, + [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb} +}; + static int nes_inetaddr_event(struct notifier_block *, unsigned long, void *); static int nes_net_event(struct notifier_block *, unsigned long, void *); static int nes_notifiers_registered; @@ -672,6 +681,17 @@ static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) } nes_notifiers_registered++; + if (ibnl_add_client(RDMA_NL_NES, RDMA_NL_IWPM_NUM_OPS, nes_nl_cb_table)) + printk(KERN_ERR PFX "%s[%u]: Failed to add netlink callback\n", + __func__, __LINE__); + + ret = iwpm_init(RDMA_NL_NES); + if (ret) { + printk(KERN_ERR PFX "%s: port mapper initialization failed\n", + pci_name(pcidev)); + goto bail7; + } + INIT_DELAYED_WORK(&nesdev->work, nes_recheck_link_status); /* Initialize network devices */ @@ -710,6 +730,7 @@ static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) nes_debug(NES_DBG_INIT, "netdev_count=%d, nesadapter->netdev_count=%d\n", nesdev->netdev_count, nesdev->nesadapter->netdev_count); + ibnl_remove_client(RDMA_NL_NES); nes_notifiers_registered--; if (nes_notifiers_registered == 0) { @@ -773,6 +794,8 @@ static void nes_remove(struct pci_dev *pcidev) nesdev->nesadapter->netdev_count--; } } + ibnl_remove_client(RDMA_NL_NES); + iwpm_exit(RDMA_NL_NES); nes_notifiers_registered--; if (nes_notifiers_registered == 0) { diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index 33cc58941a3e..bd9d132f11c7 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -51,6 +51,8 @@ #include #include #include +#include +#include #define NES_SEND_FIRST_WRITE @@ -130,6 +132,7 @@ #define NES_DBG_IW_TX 0x00040000 #define NES_DBG_SHUTDOWN 0x00080000 #define NES_DBG_PAU 0x00100000 +#define NES_DBG_NLMSG 0x00200000 #define NES_DBG_RSVD1 0x10000000 #define NES_DBG_RSVD2 0x20000000 #define NES_DBG_RSVD3 0x40000000 diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index dfa9df484505..6f09a72e78d7 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2014 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -59,6 +59,7 @@ #include #include #include +#include #include "nes.h" @@ -166,7 +167,6 @@ int nes_rem_ref_cm_node(struct nes_cm_node *cm_node) { return rem_ref_cm_node(cm_node->cm_core, cm_node); } - /** * create_event */ @@ -482,11 +482,11 @@ static void form_cm_frame(struct sk_buff *skb, iph->ttl = 0x40; iph->protocol = 0x06; /* IPPROTO_TCP */ - iph->saddr = htonl(cm_node->loc_addr); - iph->daddr = htonl(cm_node->rem_addr); + iph->saddr = htonl(cm_node->mapped_loc_addr); + iph->daddr = htonl(cm_node->mapped_rem_addr); - tcph->source = htons(cm_node->loc_port); - tcph->dest = htons(cm_node->rem_port); + tcph->source = htons(cm_node->mapped_loc_port); + tcph->dest = htons(cm_node->mapped_rem_port); tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num); if (flags & SET_ACK) { @@ -525,6 +525,100 @@ static void form_cm_frame(struct sk_buff *skb, cm_packets_created++; } +/* + * nes_create_sockaddr - Record ip addr and tcp port in a sockaddr struct + */ +static void nes_create_sockaddr(__be32 ip_addr, __be16 port, + struct sockaddr_storage *addr) +{ + struct sockaddr_in *nes_sockaddr = (struct sockaddr_in *)addr; + nes_sockaddr->sin_family = AF_INET; + memcpy(&nes_sockaddr->sin_addr.s_addr, &ip_addr, sizeof(__be32)); + nes_sockaddr->sin_port = port; +} + +/* + * nes_create_mapinfo - Create a mapinfo object in the port mapper data base + */ +static int nes_create_mapinfo(struct nes_cm_info *cm_info) +{ + struct sockaddr_storage local_sockaddr; + struct sockaddr_storage mapped_sockaddr; + + nes_create_sockaddr(htonl(cm_info->loc_addr), htons(cm_info->loc_port), + &local_sockaddr); + nes_create_sockaddr(htonl(cm_info->mapped_loc_addr), + htons(cm_info->mapped_loc_port), &mapped_sockaddr); + + return iwpm_create_mapinfo(&local_sockaddr, + &mapped_sockaddr, RDMA_NL_NES); +} + +/* + * nes_remove_mapinfo - Remove a mapinfo object from the port mapper data base + * and send a remove mapping op message to + * the userspace port mapper + */ +static int nes_remove_mapinfo(u32 loc_addr, u16 loc_port, + u32 mapped_loc_addr, u16 mapped_loc_port) +{ + struct sockaddr_storage local_sockaddr; + struct sockaddr_storage mapped_sockaddr; + + nes_create_sockaddr(htonl(loc_addr), htons(loc_port), &local_sockaddr); + nes_create_sockaddr(htonl(mapped_loc_addr), htons(mapped_loc_port), + &mapped_sockaddr); + + iwpm_remove_mapinfo(&local_sockaddr, &mapped_sockaddr); + return iwpm_remove_mapping(&local_sockaddr, RDMA_NL_NES); +} + +/* + * nes_form_pm_msg - Form a port mapper message with mapping info + */ +static void nes_form_pm_msg(struct nes_cm_info *cm_info, + struct iwpm_sa_data *pm_msg) +{ + nes_create_sockaddr(htonl(cm_info->loc_addr), htons(cm_info->loc_port), + &pm_msg->loc_addr); + nes_create_sockaddr(htonl(cm_info->rem_addr), htons(cm_info->rem_port), + &pm_msg->rem_addr); +} + +/* + * nes_form_reg_msg - Form a port mapper message with dev info + */ +static void nes_form_reg_msg(struct nes_vnic *nesvnic, + struct iwpm_dev_data *pm_msg) +{ + memcpy(pm_msg->dev_name, nesvnic->nesibdev->ibdev.name, + IWPM_DEVNAME_SIZE); + memcpy(pm_msg->if_name, nesvnic->netdev->name, IWPM_IFNAME_SIZE); +} + +/* + * nes_record_pm_msg - Save the received mapping info + */ +static void nes_record_pm_msg(struct nes_cm_info *cm_info, + struct iwpm_sa_data *pm_msg) +{ + struct sockaddr_in *mapped_loc_addr = + (struct sockaddr_in *)&pm_msg->mapped_loc_addr; + struct sockaddr_in *mapped_rem_addr = + (struct sockaddr_in *)&pm_msg->mapped_rem_addr; + + if (mapped_loc_addr->sin_family == AF_INET) { + cm_info->mapped_loc_addr = + ntohl(mapped_loc_addr->sin_addr.s_addr); + cm_info->mapped_loc_port = ntohs(mapped_loc_addr->sin_port); + } + if (mapped_rem_addr->sin_family == AF_INET) { + cm_info->mapped_rem_addr = + ntohl(mapped_rem_addr->sin_addr.s_addr); + cm_info->mapped_rem_port = ntohs(mapped_rem_addr->sin_port); + } +} + /** * print_core - dump a cm core */ @@ -1147,8 +1241,11 @@ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core, loc_addr, loc_port, cm_node->rem_addr, cm_node->rem_port, rem_addr, rem_port); - if ((cm_node->loc_addr == loc_addr) && (cm_node->loc_port == loc_port) && - (cm_node->rem_addr == rem_addr) && (cm_node->rem_port == rem_port)) { + if ((cm_node->mapped_loc_addr == loc_addr) && + (cm_node->mapped_loc_port == loc_port) && + (cm_node->mapped_rem_addr == rem_addr) && + (cm_node->mapped_rem_port == rem_port)) { + add_ref_cm_node(cm_node); spin_unlock_irqrestore(&cm_core->ht_lock, flags); return cm_node; @@ -1165,18 +1262,28 @@ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core, * find_listener - find a cm node listening on this addr-port pair */ static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core, - nes_addr_t dst_addr, u16 dst_port, enum nes_cm_listener_state listener_state) + nes_addr_t dst_addr, u16 dst_port, + enum nes_cm_listener_state listener_state, int local) { unsigned long flags; struct nes_cm_listener *listen_node; + nes_addr_t listen_addr; + u16 listen_port; /* walk list and find cm_node associated with this session ID */ spin_lock_irqsave(&cm_core->listen_list_lock, flags); list_for_each_entry(listen_node, &cm_core->listen_list.list, list) { + if (local) { + listen_addr = listen_node->loc_addr; + listen_port = listen_node->loc_port; + } else { + listen_addr = listen_node->mapped_loc_addr; + listen_port = listen_node->mapped_loc_port; + } /* compare node pair, return node handle if a match */ - if (((listen_node->loc_addr == dst_addr) || - listen_node->loc_addr == 0x00000000) && - (listen_node->loc_port == dst_port) && + if (((listen_addr == dst_addr) || + listen_addr == 0x00000000) && + (listen_port == dst_port) && (listener_state & listen_node->listener_state)) { atomic_inc(&listen_node->ref_count); spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); @@ -1189,7 +1296,6 @@ static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core, return NULL; } - /** * add_hte_node - add a cm node to the hash table */ @@ -1310,9 +1416,20 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); - if (listener->nesvnic) - nes_manage_apbvt(listener->nesvnic, listener->loc_port, - PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL); + if (listener->nesvnic) { + nes_manage_apbvt(listener->nesvnic, + listener->mapped_loc_port, + PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn), + NES_MANAGE_APBVT_DEL); + + nes_remove_mapinfo(listener->loc_addr, + listener->loc_port, + listener->mapped_loc_addr, + listener->mapped_loc_port); + nes_debug(NES_DBG_NLMSG, + "Delete APBVT mapped_loc_port = %04X\n", + listener->mapped_loc_port); + } nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener); @@ -1454,6 +1571,11 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, cm_node->loc_port = cm_info->loc_port; cm_node->rem_port = cm_info->rem_port; + cm_node->mapped_loc_addr = cm_info->mapped_loc_addr; + cm_node->mapped_rem_addr = cm_info->mapped_rem_addr; + cm_node->mapped_loc_port = cm_info->mapped_loc_port; + cm_node->mapped_rem_port = cm_info->mapped_rem_port; + cm_node->mpa_frame_rev = mpa_version; cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO; cm_node->mpav2_ird_ord = 0; @@ -1500,8 +1622,10 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, cm_node->loopbackpartner = NULL; /* get the mac addr for the remote node */ - oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE); - arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr, oldarpindex); + oldarpindex = nes_arp_table(nesdev, cm_node->mapped_rem_addr, + NULL, NES_ARP_RESOLVE); + arpindex = nes_addr_resolve_neigh(nesvnic, + cm_node->mapped_rem_addr, oldarpindex); if (arpindex < 0) { kfree(cm_node); return NULL; @@ -1563,11 +1687,14 @@ static int rem_ref_cm_node(struct nes_cm_core *cm_core, mini_cm_dec_refcnt_listen(cm_core, cm_node->listener, 0); } else { if (cm_node->apbvt_set && cm_node->nesvnic) { - nes_manage_apbvt(cm_node->nesvnic, cm_node->loc_port, - PCI_FUNC( - cm_node->nesvnic->nesdev->pcidev->devfn), + nes_manage_apbvt(cm_node->nesvnic, cm_node->mapped_loc_port, + PCI_FUNC(cm_node->nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL); } + nes_debug(NES_DBG_NLMSG, "Delete APBVT mapped_loc_port = %04X\n", + cm_node->mapped_loc_port); + nes_remove_mapinfo(cm_node->loc_addr, cm_node->loc_port, + cm_node->mapped_loc_addr, cm_node->mapped_loc_port); } atomic_dec(&cm_core->node_cnt); @@ -2235,17 +2362,21 @@ static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb, * mini_cm_listen - create a listen node with params */ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, - struct nes_vnic *nesvnic, struct nes_cm_info *cm_info) + struct nes_vnic *nesvnic, struct nes_cm_info *cm_info) { struct nes_cm_listener *listener; + struct iwpm_dev_data pm_reg_msg; + struct iwpm_sa_data pm_msg; unsigned long flags; + int iwpm_err = 0; nes_debug(NES_DBG_CM, "Search for 0x%08x : 0x%04x\n", cm_info->loc_addr, cm_info->loc_port); /* cannot have multiple matching listeners */ - listener = find_listener(cm_core, htonl(cm_info->loc_addr), - htons(cm_info->loc_port), NES_CM_LISTENER_EITHER_STATE); + listener = find_listener(cm_core, cm_info->loc_addr, cm_info->loc_port, + NES_CM_LISTENER_EITHER_STATE, 1); + if (listener && listener->listener_state == NES_CM_LISTENER_ACTIVE_STATE) { /* find automatically incs ref count ??? */ atomic_dec(&listener->ref_count); @@ -2254,6 +2385,22 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, } if (!listener) { + nes_form_reg_msg(nesvnic, &pm_reg_msg); + iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_NES); + if (iwpm_err) { + nes_debug(NES_DBG_NLMSG, + "Port Mapper reg pid fail (err = %d).\n", iwpm_err); + } + if (iwpm_valid_pid() && !iwpm_err) { + nes_form_pm_msg(cm_info, &pm_msg); + iwpm_err = iwpm_add_mapping(&pm_msg, RDMA_NL_NES); + if (iwpm_err) + nes_debug(NES_DBG_NLMSG, + "Port Mapper query fail (err = %d).\n", iwpm_err); + else + nes_record_pm_msg(cm_info, &pm_msg); + } + /* create a CM listen node (1/2 node to compare incoming traffic to) */ listener = kzalloc(sizeof(*listener), GFP_ATOMIC); if (!listener) { @@ -2261,8 +2408,10 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, return NULL; } - listener->loc_addr = htonl(cm_info->loc_addr); - listener->loc_port = htons(cm_info->loc_port); + listener->loc_addr = cm_info->loc_addr; + listener->loc_port = cm_info->loc_port; + listener->mapped_loc_addr = cm_info->mapped_loc_addr; + listener->mapped_loc_port = cm_info->mapped_loc_port; listener->reused_node = 0; atomic_set(&listener->ref_count, 1); @@ -2324,14 +2473,18 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, if (cm_info->loc_addr == cm_info->rem_addr) { loopbackremotelistener = find_listener(cm_core, - ntohl(nesvnic->local_ipaddr), cm_node->rem_port, - NES_CM_LISTENER_ACTIVE_STATE); + cm_node->mapped_loc_addr, cm_node->mapped_rem_port, + NES_CM_LISTENER_ACTIVE_STATE, 0); if (loopbackremotelistener == NULL) { create_event(cm_node, NES_CM_EVENT_ABORTED); } else { loopback_cm_info = *cm_info; loopback_cm_info.loc_port = cm_info->rem_port; loopback_cm_info.rem_port = cm_info->loc_port; + loopback_cm_info.mapped_loc_port = + cm_info->mapped_rem_port; + loopback_cm_info.mapped_rem_port = + cm_info->mapped_loc_port; loopback_cm_info.cm_id = loopbackremotelistener->cm_id; loopbackremotenode = make_cm_node(cm_core, nesvnic, &loopback_cm_info, loopbackremotelistener); @@ -2560,6 +2713,12 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, nfo.rem_addr = ntohl(iph->saddr); nfo.rem_port = ntohs(tcph->source); + /* If port mapper is available these should be mapped address info */ + nfo.mapped_loc_addr = ntohl(iph->daddr); + nfo.mapped_loc_port = ntohs(tcph->dest); + nfo.mapped_rem_addr = ntohl(iph->saddr); + nfo.mapped_rem_port = ntohs(tcph->source); + tmp_daddr = cpu_to_be32(iph->daddr); tmp_saddr = cpu_to_be32(iph->saddr); @@ -2568,8 +2727,8 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, do { cm_node = find_node(cm_core, - nfo.rem_port, nfo.rem_addr, - nfo.loc_port, nfo.loc_addr); + nfo.mapped_rem_port, nfo.mapped_rem_addr, + nfo.mapped_loc_port, nfo.mapped_loc_addr); if (!cm_node) { /* Only type of packet accepted are for */ @@ -2578,9 +2737,9 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, skb_handled = 0; break; } - listener = find_listener(cm_core, nfo.loc_addr, - nfo.loc_port, - NES_CM_LISTENER_ACTIVE_STATE); + listener = find_listener(cm_core, nfo.mapped_loc_addr, + nfo.mapped_loc_port, + NES_CM_LISTENER_ACTIVE_STATE, 0); if (!listener) { nfo.cm_id = NULL; nfo.conn_type = 0; @@ -3184,10 +3343,12 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) nes_cm_init_tsa_conn(nesqp, cm_node); - nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(laddr->sin_port)); - nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(raddr->sin_port)); + nesqp->nesqp_context->tcpPorts[0] = + cpu_to_le16(cm_node->mapped_loc_port); + nesqp->nesqp_context->tcpPorts[1] = + cpu_to_le16(cm_node->mapped_rem_port); - nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(raddr->sin_addr.s_addr)); + nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->mapped_rem_addr); nesqp->nesqp_context->misc2 |= cpu_to_le32( (u32)PCI_FUNC(nesdev->pcidev->devfn) << @@ -3211,9 +3372,9 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) memset(&nes_quad, 0, sizeof(nes_quad)); nes_quad.DstIpAdrIndex = cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); - nes_quad.SrcIpadr = raddr->sin_addr.s_addr; - nes_quad.TcpPorts[0] = raddr->sin_port; - nes_quad.TcpPorts[1] = laddr->sin_port; + nes_quad.SrcIpadr = htonl(cm_node->mapped_rem_addr); + nes_quad.TcpPorts[0] = htons(cm_node->mapped_rem_port); + nes_quad.TcpPorts[1] = htons(cm_node->mapped_loc_port); /* Produce hash key */ crc_value = get_crc_value(&nes_quad); @@ -3315,6 +3476,9 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) int apbvt_set = 0; struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr; struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr; + struct iwpm_dev_data pm_reg_msg; + struct iwpm_sa_data pm_msg; + int iwpm_err = 0; if (cm_id->remote_addr.ss_family != AF_INET) return -ENOSYS; @@ -3352,20 +3516,44 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) nes_debug(NES_DBG_CM, "mpa private data len =%u\n", conn_param->private_data_len); + /* set up the connection params for the node */ + cm_info.loc_addr = ntohl(laddr->sin_addr.s_addr); + cm_info.loc_port = ntohs(laddr->sin_port); + cm_info.rem_addr = ntohl(raddr->sin_addr.s_addr); + cm_info.rem_port = ntohs(raddr->sin_port); + cm_info.cm_id = cm_id; + cm_info.conn_type = NES_CM_IWARP_CONN_TYPE; + + /* No port mapper available, go with the specified peer information */ + cm_info.mapped_loc_addr = cm_info.loc_addr; + cm_info.mapped_loc_port = cm_info.loc_port; + cm_info.mapped_rem_addr = cm_info.rem_addr; + cm_info.mapped_rem_port = cm_info.rem_port; + + nes_form_reg_msg(nesvnic, &pm_reg_msg); + iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_NES); + if (iwpm_err) { + nes_debug(NES_DBG_NLMSG, + "Port Mapper reg pid fail (err = %d).\n", iwpm_err); + } + if (iwpm_valid_pid() && !iwpm_err) { + nes_form_pm_msg(&cm_info, &pm_msg); + iwpm_err = iwpm_add_and_query_mapping(&pm_msg, RDMA_NL_NES); + if (iwpm_err) + nes_debug(NES_DBG_NLMSG, + "Port Mapper query fail (err = %d).\n", iwpm_err); + else + nes_record_pm_msg(&cm_info, &pm_msg); + } + if (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr) { - nes_manage_apbvt(nesvnic, ntohs(laddr->sin_port), - PCI_FUNC(nesdev->pcidev->devfn), - NES_MANAGE_APBVT_ADD); + nes_manage_apbvt(nesvnic, cm_info.mapped_loc_port, + PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD); apbvt_set = 1; } - /* set up the connection params for the node */ - cm_info.loc_addr = htonl(laddr->sin_addr.s_addr); - cm_info.loc_port = htons(laddr->sin_port); - cm_info.rem_addr = htonl(raddr->sin_addr.s_addr); - cm_info.rem_port = htons(raddr->sin_port); - cm_info.cm_id = cm_id; - cm_info.conn_type = NES_CM_IWARP_CONN_TYPE; + if (nes_create_mapinfo(&cm_info)) + return -ENOMEM; cm_id->add_ref(cm_id); @@ -3375,10 +3563,14 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) &cm_info); if (!cm_node) { if (apbvt_set) - nes_manage_apbvt(nesvnic, ntohs(laddr->sin_port), + nes_manage_apbvt(nesvnic, cm_info.mapped_loc_port, PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL); + nes_debug(NES_DBG_NLMSG, "Delete mapped_loc_port = %04X\n", + cm_info.mapped_loc_port); + nes_remove_mapinfo(cm_info.loc_addr, cm_info.loc_port, + cm_info.mapped_loc_addr, cm_info.mapped_loc_port); cm_id->rem_ref(cm_id); return -ENOMEM; } @@ -3424,13 +3616,16 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog) nesvnic->local_ipaddr, laddr->sin_addr.s_addr); /* setup listen params in our api call struct */ - cm_info.loc_addr = nesvnic->local_ipaddr; - cm_info.loc_port = laddr->sin_port; + cm_info.loc_addr = ntohl(nesvnic->local_ipaddr); + cm_info.loc_port = ntohs(laddr->sin_port); cm_info.backlog = backlog; cm_info.cm_id = cm_id; cm_info.conn_type = NES_CM_IWARP_CONN_TYPE; + /* No port mapper available, go with the specified info */ + cm_info.mapped_loc_addr = cm_info.loc_addr; + cm_info.mapped_loc_port = cm_info.loc_port; cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info); if (!cm_node) { @@ -3442,7 +3637,10 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog) cm_id->provider_data = cm_node; if (!cm_node->reused_node) { - err = nes_manage_apbvt(nesvnic, ntohs(laddr->sin_port), + if (nes_create_mapinfo(&cm_info)) + return -ENOMEM; + + err = nes_manage_apbvt(nesvnic, cm_node->mapped_loc_port, PCI_FUNC(nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD); if (err) { @@ -3567,9 +3765,11 @@ static void cm_event_connected(struct nes_cm_event *event) nes_cm_init_tsa_conn(nesqp, cm_node); /* set the QP tsa context */ - nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(laddr->sin_port)); - nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(raddr->sin_port)); - nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(raddr->sin_addr.s_addr)); + nesqp->nesqp_context->tcpPorts[0] = + cpu_to_le16(cm_node->mapped_loc_port); + nesqp->nesqp_context->tcpPorts[1] = + cpu_to_le16(cm_node->mapped_rem_port); + nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->mapped_rem_addr); nesqp->nesqp_context->misc2 |= cpu_to_le32( (u32)PCI_FUNC(nesdev->pcidev->devfn) << @@ -3599,9 +3799,9 @@ static void cm_event_connected(struct nes_cm_event *event) nes_quad.DstIpAdrIndex = cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); - nes_quad.SrcIpadr = raddr->sin_addr.s_addr; - nes_quad.TcpPorts[0] = raddr->sin_port; - nes_quad.TcpPorts[1] = laddr->sin_port; + nes_quad.SrcIpadr = htonl(cm_node->mapped_rem_addr); + nes_quad.TcpPorts[0] = htons(cm_node->mapped_rem_port); + nes_quad.TcpPorts[1] = htons(cm_node->mapped_loc_port); /* Produce hash key */ crc_value = get_crc_value(&nes_quad); @@ -3629,7 +3829,7 @@ static void cm_event_connected(struct nes_cm_event *event) cm_event.ird = cm_node->ird_size; cm_event.ord = cm_node->ord_size; - cm_event_laddr->sin_addr.s_addr = event->cm_info.rem_addr; + cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr); ret = cm_id->event_handler(cm_id, &cm_event); nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h index 522c99cd07c4..f522cf639789 100644 --- a/drivers/infiniband/hw/nes/nes_cm.h +++ b/drivers/infiniband/hw/nes/nes_cm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2014 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -293,8 +293,8 @@ struct nes_cm_listener { struct list_head list; struct nes_cm_core *cm_core; u8 loc_mac[ETH_ALEN]; - nes_addr_t loc_addr; - u16 loc_port; + nes_addr_t loc_addr, mapped_loc_addr; + u16 loc_port, mapped_loc_port; struct iw_cm_id *cm_id; enum nes_cm_conn_type conn_type; atomic_t ref_count; @@ -308,7 +308,9 @@ struct nes_cm_listener { /* per connection node and node state information */ struct nes_cm_node { nes_addr_t loc_addr, rem_addr; + nes_addr_t mapped_loc_addr, mapped_rem_addr; u16 loc_port, rem_port; + u16 mapped_loc_port, mapped_rem_port; u8 loc_mac[ETH_ALEN]; u8 rem_mac[ETH_ALEN]; @@ -364,6 +366,10 @@ struct nes_cm_info { u16 rem_port; nes_addr_t loc_addr; nes_addr_t rem_addr; + u16 mapped_loc_port; + u16 mapped_rem_port; + nes_addr_t mapped_loc_addr; + nes_addr_t mapped_rem_addr; enum nes_cm_conn_type conn_type; int backlog; -- cgit v1.2.3 From 9eccfe109b276fddf2908d1a70f7f4449b92f08f Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 26 Mar 2014 17:08:09 -0500 Subject: RDMA/cxgb4: Add support for iWARP Port Mapper user space service Based on original work by Vipul Pandya. Signed-off-by: Steve Wise [ Fix htons -> ntohs to make sparse happy. - Roland ] Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 180 +++++++++++++++++++++++++++------ drivers/infiniband/hw/cxgb4/device.c | 81 ++++++++++++--- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 44 ++++++++ 3 files changed, 262 insertions(+), 43 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 1f863a96a480..7de6c9fcf883 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * Copyright (c) 2009-2014 Chelsio, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -294,6 +294,12 @@ void _c4iw_free_ep(struct kref *kref) dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); } + if (test_bit(RELEASE_MAPINFO, &ep->com.flags)) { + print_addr(&ep->com, __func__, "remove_mapinfo/mapping"); + iwpm_remove_mapinfo(&ep->com.local_addr, + &ep->com.mapped_local_addr); + iwpm_remove_mapping(&ep->com.local_addr, RDMA_NL_C4IW); + } kfree(ep); } @@ -528,6 +534,38 @@ static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } +/* + * c4iw_form_pm_msg - Form a port mapper message with mapping info + */ +static void c4iw_form_pm_msg(struct c4iw_ep *ep, + struct iwpm_sa_data *pm_msg) +{ + memcpy(&pm_msg->loc_addr, &ep->com.local_addr, + sizeof(ep->com.local_addr)); + memcpy(&pm_msg->rem_addr, &ep->com.remote_addr, + sizeof(ep->com.remote_addr)); +} + +/* + * c4iw_form_reg_msg - Form a port mapper message with dev info + */ +static void c4iw_form_reg_msg(struct c4iw_dev *dev, + struct iwpm_dev_data *pm_msg) +{ + memcpy(pm_msg->dev_name, dev->ibdev.name, IWPM_DEVNAME_SIZE); + memcpy(pm_msg->if_name, dev->rdev.lldi.ports[0]->name, + IWPM_IFNAME_SIZE); +} + +static void c4iw_record_pm_msg(struct c4iw_ep *ep, + struct iwpm_sa_data *pm_msg) +{ + memcpy(&ep->com.mapped_local_addr, &pm_msg->mapped_loc_addr, + sizeof(ep->com.mapped_local_addr)); + memcpy(&ep->com.mapped_remote_addr, &pm_msg->mapped_rem_addr, + sizeof(ep->com.mapped_remote_addr)); +} + static int send_connect(struct c4iw_ep *ep) { struct cpl_act_open_req *req; @@ -546,10 +584,14 @@ static int send_connect(struct c4iw_ep *ep) int sizev6 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ? sizeof(struct cpl_act_open_req6) : sizeof(struct cpl_t5_act_open_req6); - struct sockaddr_in *la = (struct sockaddr_in *)&ep->com.local_addr; - struct sockaddr_in *ra = (struct sockaddr_in *)&ep->com.remote_addr; - struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&ep->com.local_addr; - struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr; + struct sockaddr_in *la = (struct sockaddr_in *) + &ep->com.mapped_local_addr; + struct sockaddr_in *ra = (struct sockaddr_in *) + &ep->com.mapped_remote_addr; + struct sockaddr_in6 *la6 = (struct sockaddr_in6 *) + &ep->com.mapped_local_addr; + struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *) + &ep->com.mapped_remote_addr; wrlen = (ep->com.remote_addr.ss_family == AF_INET) ? roundup(sizev4, 16) : @@ -1627,10 +1669,10 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) req->le.filter = cpu_to_be32(cxgb4_select_ntuple( ep->com.dev->rdev.lldi.ports[0], ep->l2t)); - sin = (struct sockaddr_in *)&ep->com.local_addr; + sin = (struct sockaddr_in *)&ep->com.mapped_local_addr; req->le.lport = sin->sin_port; req->le.u.ipv4.lip = sin->sin_addr.s_addr; - sin = (struct sockaddr_in *)&ep->com.remote_addr; + sin = (struct sockaddr_in *)&ep->com.mapped_remote_addr; req->le.pport = sin->sin_port; req->le.u.ipv4.pip = sin->sin_addr.s_addr; req->tcb.t_state_to_astid = @@ -1870,10 +1912,10 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) struct sockaddr_in6 *ra6; ep = lookup_atid(t, atid); - la = (struct sockaddr_in *)&ep->com.local_addr; - ra = (struct sockaddr_in *)&ep->com.remote_addr; - la6 = (struct sockaddr_in6 *)&ep->com.local_addr; - ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr; + la = (struct sockaddr_in *)&ep->com.mapped_local_addr; + ra = (struct sockaddr_in *)&ep->com.mapped_remote_addr; + la6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr; + ra6 = (struct sockaddr_in6 *)&ep->com.mapped_remote_addr; PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid, status, status2errno(status)); @@ -2730,13 +2772,15 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); struct c4iw_ep *ep; int err = 0; - struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr; - struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr; - struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)&cm_id->local_addr; - struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *) - &cm_id->remote_addr; + struct sockaddr_in *laddr; + struct sockaddr_in *raddr; + struct sockaddr_in6 *laddr6; + struct sockaddr_in6 *raddr6; + struct iwpm_dev_data pm_reg_msg; + struct iwpm_sa_data pm_msg; __u8 *ra; int iptype; + int iwpm_err = 0; if ((conn_param->ord > c4iw_max_read_depth) || (conn_param->ird > c4iw_max_read_depth)) { @@ -2767,7 +2811,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (!ep->com.qp) { PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn); err = -EINVAL; - goto fail2; + goto fail1; } ref_qp(ep); PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn, @@ -2780,10 +2824,50 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (ep->atid == -1) { printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__); err = -ENOMEM; - goto fail2; + goto fail1; } insert_handle(dev, &dev->atid_idr, ep, ep->atid); + memcpy(&ep->com.local_addr, &cm_id->local_addr, + sizeof(ep->com.local_addr)); + memcpy(&ep->com.remote_addr, &cm_id->remote_addr, + sizeof(ep->com.remote_addr)); + + /* No port mapper available, go with the specified peer information */ + memcpy(&ep->com.mapped_local_addr, &cm_id->local_addr, + sizeof(ep->com.mapped_local_addr)); + memcpy(&ep->com.mapped_remote_addr, &cm_id->remote_addr, + sizeof(ep->com.mapped_remote_addr)); + + c4iw_form_reg_msg(dev, &pm_reg_msg); + iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_C4IW); + if (iwpm_err) { + PDBG("%s: Port Mapper reg pid fail (err = %d).\n", + __func__, iwpm_err); + } + if (iwpm_valid_pid() && !iwpm_err) { + c4iw_form_pm_msg(ep, &pm_msg); + iwpm_err = iwpm_add_and_query_mapping(&pm_msg, RDMA_NL_C4IW); + if (iwpm_err) + PDBG("%s: Port Mapper query fail (err = %d).\n", + __func__, iwpm_err); + else + c4iw_record_pm_msg(ep, &pm_msg); + } + if (iwpm_create_mapinfo(&ep->com.local_addr, + &ep->com.mapped_local_addr, RDMA_NL_C4IW)) { + iwpm_remove_mapping(&ep->com.local_addr, RDMA_NL_C4IW); + err = -ENOMEM; + goto fail1; + } + print_addr(&ep->com, __func__, "add_query/create_mapinfo"); + set_bit(RELEASE_MAPINFO, &ep->com.flags); + + laddr = (struct sockaddr_in *)&ep->com.mapped_local_addr; + raddr = (struct sockaddr_in *)&ep->com.mapped_remote_addr; + laddr6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr; + raddr6 = (struct sockaddr_in6 *) &ep->com.mapped_remote_addr; + if (cm_id->remote_addr.ss_family == AF_INET) { iptype = 4; ra = (__u8 *)&raddr->sin_addr; @@ -2794,7 +2878,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if ((__force int)raddr->sin_addr.s_addr == INADDR_ANY) { err = pick_local_ipaddrs(dev, cm_id); if (err) - goto fail2; + goto fail1; } /* find a route */ @@ -2814,7 +2898,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) { err = pick_local_ip6addrs(dev, cm_id); if (err) - goto fail2; + goto fail1; } /* find a route */ @@ -2830,13 +2914,13 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (!ep->dst) { printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); err = -EHOSTUNREACH; - goto fail3; + goto fail2; } err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true); if (err) { printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); - goto fail4; + goto fail3; } PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", @@ -2845,10 +2929,6 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) state_set(&ep->com, CONNECTING); ep->tos = 0; - memcpy(&ep->com.local_addr, &cm_id->local_addr, - sizeof(ep->com.local_addr)); - memcpy(&ep->com.remote_addr, &cm_id->remote_addr, - sizeof(ep->com.remote_addr)); /* send connect request to rnic */ err = send_connect(ep); @@ -2856,12 +2936,12 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) goto out; cxgb4_l2t_release(ep->l2t); -fail4: - dst_release(ep->dst); fail3: + dst_release(ep->dst); +fail2: remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); -fail2: +fail1: cm_id->rem_ref(cm_id); c4iw_put_ep(&ep->com); out: @@ -2871,7 +2951,8 @@ out: static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep) { int err; - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&ep->com.local_addr; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) + &ep->com.mapped_local_addr; c4iw_init_wr_wait(&ep->com.wr_wait); err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0], @@ -2892,7 +2973,8 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep) static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep) { int err; - struct sockaddr_in *sin = (struct sockaddr_in *)&ep->com.local_addr; + struct sockaddr_in *sin = (struct sockaddr_in *) + &ep->com.mapped_local_addr; if (dev->rdev.lldi.enable_fw_ofld_conn) { do { @@ -2927,6 +3009,9 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) int err = 0; struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); struct c4iw_listen_ep *ep; + struct iwpm_dev_data pm_reg_msg; + struct iwpm_sa_data pm_msg; + int iwpm_err = 0; might_sleep(); @@ -2961,6 +3046,37 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) goto fail2; } insert_handle(dev, &dev->stid_idr, ep, ep->stid); + + /* No port mapper available, go with the specified info */ + memcpy(&ep->com.mapped_local_addr, &cm_id->local_addr, + sizeof(ep->com.mapped_local_addr)); + + c4iw_form_reg_msg(dev, &pm_reg_msg); + iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_C4IW); + if (iwpm_err) { + PDBG("%s: Port Mapper reg pid fail (err = %d).\n", + __func__, iwpm_err); + } + if (iwpm_valid_pid() && !iwpm_err) { + memcpy(&pm_msg.loc_addr, &ep->com.local_addr, + sizeof(ep->com.local_addr)); + iwpm_err = iwpm_add_mapping(&pm_msg, RDMA_NL_C4IW); + if (iwpm_err) + PDBG("%s: Port Mapper query fail (err = %d).\n", + __func__, iwpm_err); + else + memcpy(&ep->com.mapped_local_addr, + &pm_msg.mapped_loc_addr, + sizeof(ep->com.mapped_local_addr)); + } + if (iwpm_create_mapinfo(&ep->com.local_addr, + &ep->com.mapped_local_addr, RDMA_NL_C4IW)) { + err = -ENOMEM; + goto fail3; + } + print_addr(&ep->com, __func__, "add_mapping/create_mapinfo"); + + set_bit(RELEASE_MAPINFO, &ep->com.flags); state_set(&ep->com, LISTEN); if (ep->com.local_addr.ss_family == AF_INET) err = create_server4(dev, ep); @@ -2970,6 +3086,8 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) cm_id->provider_data = ep; goto out; } + +fail3: cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, ep->com.local_addr.ss_family); fail2: diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index f4fa50a609e2..bd749e75cec4 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -77,6 +77,16 @@ struct c4iw_debugfs_data { int pos; }; +/* registered cxgb4 netlink callbacks */ +static struct ibnl_client_cbs c4iw_nl_cb_table[] = { + [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, + [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, + [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, + [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, + [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, + [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb} +}; + static int count_idrs(int id, void *p, void *data) { int *countp = data; @@ -113,35 +123,49 @@ static int dump_qp(int id, void *p, void *data) &qp->ep->com.local_addr; struct sockaddr_in *rsin = (struct sockaddr_in *) &qp->ep->com.remote_addr; + struct sockaddr_in *mapped_lsin = (struct sockaddr_in *) + &qp->ep->com.mapped_local_addr; + struct sockaddr_in *mapped_rsin = (struct sockaddr_in *) + &qp->ep->com.mapped_remote_addr; cc = snprintf(qpd->buf + qpd->pos, space, "rc qp sq id %u rq id %u state %u " "onchip %u ep tid %u state %u " - "%pI4:%u->%pI4:%u\n", + "%pI4:%u/%u->%pI4:%u/%u\n", qp->wq.sq.qid, qp->wq.rq.qid, (int)qp->attr.state, qp->wq.sq.flags & T4_SQ_ONCHIP, qp->ep->hwtid, (int)qp->ep->com.state, &lsin->sin_addr, ntohs(lsin->sin_port), - &rsin->sin_addr, ntohs(rsin->sin_port)); + ntohs(mapped_lsin->sin_port), + &rsin->sin_addr, ntohs(rsin->sin_port), + ntohs(mapped_rsin->sin_port)); } else { struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *) &qp->ep->com.local_addr; struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *) &qp->ep->com.remote_addr; + struct sockaddr_in6 *mapped_lsin6 = + (struct sockaddr_in6 *) + &qp->ep->com.mapped_local_addr; + struct sockaddr_in6 *mapped_rsin6 = + (struct sockaddr_in6 *) + &qp->ep->com.mapped_remote_addr; cc = snprintf(qpd->buf + qpd->pos, space, "rc qp sq id %u rq id %u state %u " "onchip %u ep tid %u state %u " - "%pI6:%u->%pI6:%u\n", + "%pI6:%u/%u->%pI6:%u/%u\n", qp->wq.sq.qid, qp->wq.rq.qid, (int)qp->attr.state, qp->wq.sq.flags & T4_SQ_ONCHIP, qp->ep->hwtid, (int)qp->ep->com.state, &lsin6->sin6_addr, ntohs(lsin6->sin6_port), + ntohs(mapped_lsin6->sin6_port), &rsin6->sin6_addr, - ntohs(rsin6->sin6_port)); + ntohs(rsin6->sin6_port), + ntohs(mapped_rsin6->sin6_port)); } } else cc = snprintf(qpd->buf + qpd->pos, space, @@ -386,31 +410,43 @@ static int dump_ep(int id, void *p, void *data) &ep->com.local_addr; struct sockaddr_in *rsin = (struct sockaddr_in *) &ep->com.remote_addr; + struct sockaddr_in *mapped_lsin = (struct sockaddr_in *) + &ep->com.mapped_local_addr; + struct sockaddr_in *mapped_rsin = (struct sockaddr_in *) + &ep->com.mapped_remote_addr; cc = snprintf(epd->buf + epd->pos, space, "ep %p cm_id %p qp %p state %d flags 0x%lx " "history 0x%lx hwtid %d atid %d " - "%pI4:%d <-> %pI4:%d\n", + "%pI4:%d/%d <-> %pI4:%d/%d\n", ep, ep->com.cm_id, ep->com.qp, (int)ep->com.state, ep->com.flags, ep->com.history, ep->hwtid, ep->atid, &lsin->sin_addr, ntohs(lsin->sin_port), - &rsin->sin_addr, ntohs(rsin->sin_port)); + ntohs(mapped_lsin->sin_port), + &rsin->sin_addr, ntohs(rsin->sin_port), + ntohs(mapped_rsin->sin_port)); } else { struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *) &ep->com.local_addr; struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *) &ep->com.remote_addr; + struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *) + &ep->com.mapped_local_addr; + struct sockaddr_in6 *mapped_rsin6 = (struct sockaddr_in6 *) + &ep->com.mapped_remote_addr; cc = snprintf(epd->buf + epd->pos, space, "ep %p cm_id %p qp %p state %d flags 0x%lx " "history 0x%lx hwtid %d atid %d " - "%pI6:%d <-> %pI6:%d\n", + "%pI6:%d/%d <-> %pI6:%d/%d\n", ep, ep->com.cm_id, ep->com.qp, (int)ep->com.state, ep->com.flags, ep->com.history, ep->hwtid, ep->atid, &lsin6->sin6_addr, ntohs(lsin6->sin6_port), - &rsin6->sin6_addr, ntohs(rsin6->sin6_port)); + ntohs(mapped_lsin6->sin6_port), + &rsin6->sin6_addr, ntohs(rsin6->sin6_port), + ntohs(mapped_rsin6->sin6_port)); } if (cc < space) epd->pos += cc; @@ -431,23 +467,29 @@ static int dump_listen_ep(int id, void *p, void *data) if (ep->com.local_addr.ss_family == AF_INET) { struct sockaddr_in *lsin = (struct sockaddr_in *) &ep->com.local_addr; + struct sockaddr_in *mapped_lsin = (struct sockaddr_in *) + &ep->com.mapped_local_addr; cc = snprintf(epd->buf + epd->pos, space, "ep %p cm_id %p state %d flags 0x%lx stid %d " - "backlog %d %pI4:%d\n", + "backlog %d %pI4:%d/%d\n", ep, ep->com.cm_id, (int)ep->com.state, ep->com.flags, ep->stid, ep->backlog, - &lsin->sin_addr, ntohs(lsin->sin_port)); + &lsin->sin_addr, ntohs(lsin->sin_port), + ntohs(mapped_lsin->sin_port)); } else { struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *) &ep->com.local_addr; + struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *) + &ep->com.mapped_local_addr; cc = snprintf(epd->buf + epd->pos, space, "ep %p cm_id %p state %d flags 0x%lx stid %d " - "backlog %d %pI6:%d\n", + "backlog %d %pI6:%d/%d\n", ep, ep->com.cm_id, (int)ep->com.state, ep->com.flags, ep->stid, ep->backlog, - &lsin6->sin6_addr, ntohs(lsin6->sin6_port)); + &lsin6->sin6_addr, ntohs(lsin6->sin6_port), + ntohs(mapped_lsin6->sin6_port)); } if (cc < space) epd->pos += cc; @@ -687,6 +729,7 @@ static void c4iw_dealloc(struct uld_ctx *ctx) if (ctx->dev->rdev.oc_mw_kva) iounmap(ctx->dev->rdev.oc_mw_kva); ib_dealloc_device(&ctx->dev->ibdev); + iwpm_exit(RDMA_NL_C4IW); ctx->dev = NULL; } @@ -780,6 +823,14 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) c4iw_debugfs_root); setup_debugfs(devp); } + + ret = iwpm_init(RDMA_NL_C4IW); + if (ret) { + pr_err("port mapper initialization failed with %d\n", ret); + ib_dealloc_device(&devp->ibdev); + return ERR_PTR(ret); + } + return devp; } @@ -1274,6 +1325,11 @@ static int __init c4iw_init_module(void) printk(KERN_WARNING MOD "could not create debugfs entry, continuing\n"); + if (ibnl_add_client(RDMA_NL_C4IW, RDMA_NL_IWPM_NUM_OPS, + c4iw_nl_cb_table)) + pr_err("%s[%u]: Failed to add netlink callback\n" + , __func__, __LINE__); + cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info); return 0; @@ -1291,6 +1347,7 @@ static void __exit c4iw_exit_module(void) } mutex_unlock(&dev_mutex); cxgb4_unregister_uld(CXGB4_ULD_RDMA); + ibnl_remove_client(RDMA_NL_C4IW); c4iw_cm_term(); debugfs_remove_recursive(c4iw_debugfs_root); } diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 7474b490760a..6f533fbcc4b3 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -52,6 +52,8 @@ #include #include +#include +#include #include "cxgb4.h" #include "cxgb4_uld.h" @@ -728,6 +730,7 @@ enum c4iw_ep_flags { CLOSE_SENT = 3, TIMEOUT = 4, QP_REFERENCED = 5, + RELEASE_MAPINFO = 6, }; enum c4iw_ep_history { @@ -764,6 +767,8 @@ struct c4iw_ep_common { struct mutex mutex; struct sockaddr_storage local_addr; struct sockaddr_storage remote_addr; + struct sockaddr_storage mapped_local_addr; + struct sockaddr_storage mapped_remote_addr; struct c4iw_wr_wait wr_wait; unsigned long flags; unsigned long history; @@ -807,6 +812,45 @@ struct c4iw_ep { unsigned int retry_count; }; +static inline void print_addr(struct c4iw_ep_common *epc, const char *func, + const char *msg) +{ + +#define SINA(a) (&(((struct sockaddr_in *)(a))->sin_addr.s_addr)) +#define SINP(a) ntohs(((struct sockaddr_in *)(a))->sin_port) +#define SIN6A(a) (&(((struct sockaddr_in6 *)(a))->sin6_addr)) +#define SIN6P(a) ntohs(((struct sockaddr_in6 *)(a))->sin6_port) + + if (c4iw_debug) { + switch (epc->local_addr.ss_family) { + case AF_INET: + PDBG("%s %s %pI4:%u/%u <-> %pI4:%u/%u\n", + func, msg, SINA(&epc->local_addr), + SINP(&epc->local_addr), + SINP(&epc->mapped_local_addr), + SINA(&epc->remote_addr), + SINP(&epc->remote_addr), + SINP(&epc->mapped_remote_addr)); + break; + case AF_INET6: + PDBG("%s %s %pI6:%u/%u <-> %pI6:%u/%u\n", + func, msg, SIN6A(&epc->local_addr), + SIN6P(&epc->local_addr), + SIN6P(&epc->mapped_local_addr), + SIN6A(&epc->remote_addr), + SIN6P(&epc->remote_addr), + SIN6P(&epc->mapped_remote_addr)); + break; + default: + break; + } + } +#undef SINA +#undef SINP +#undef SIN6A +#undef SIN6P +} + static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id) { return cm_id->provider_data; -- cgit v1.2.3