diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-08-06 16:43:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-08-06 16:43:36 -0700 |
commit | d7806bbd22cabc3e3b0a985cfcffa29cf156bb30 (patch) | |
tree | ef24f40c658c2f015b7f96f429e47dd16ab6e5b4 /drivers/infiniband/hw | |
parent | d6efb3ac3e6c19ab722b28bdb9252bae0b9676b6 (diff) | |
parent | 23fcc7dee2c6aba1060558683988263851e74bab (diff) | |
download | linux-d7806bbd22cabc3e3b0a985cfcffa29cf156bb30.tar.bz2 |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"A quiet cycle after the larger 5.8 effort. Substantially cleanup and
driver work with a few smaller features this time.
- Driver updates for hfi1, rxe, mlx5, hns, qedr, usnic, bnxt_re
- Removal of dead or redundant code across the drivers
- RAW resource tracker dumps to include a device specific data blob
for device objects to aide device debugging
- Further advance the IOCTL interface, remove the ability to turn it
off. Add QUERY_CONTEXT, QUERY_MR, and QUERY_PD commands
- Remove stubs related to devices with no pkey table
- A shared CQ scheme to allow multiple ULPs to share the CQ rings of
a device to give higher performance
- Several more static checker, syzkaller and rare crashers fixed"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (121 commits)
RDMA/mlx5: Fix flow destination setting for RDMA TX flow table
RDMA/rxe: Remove pkey table
RDMA/umem: Add a schedule point in ib_umem_get()
RDMA/hns: Fix the unneeded process when getting a general type of CQE error
RDMA/hns: Fix error during modify qp RTS2RTS
RDMA/hns: Delete unnecessary memset when allocating VF resource
RDMA/hns: Remove redundant parameters in set_rc_wqe()
RDMA/hns: Remove support for HIP08_A
RDMA/hns: Refactor hns_roce_v2_set_hem()
RDMA/hns: Remove redundant hardware opcode definitions
RDMA/netlink: Remove CAP_NET_RAW check when dump a raw QP
RDMA/include: Replace license text with SPDX tags
RDMA/rtrs: remove WQ_MEM_RECLAIM for rtrs_wq
RDMA/rtrs-clt: add an additional random 8 seconds before reconnecting
RDMA/cma: Execute rdma_cm destruction from a handler properly
RDMA/cma: Remove unneeded locking for req paths
RDMA/cma: Using the standard locking pattern when delivering the removal event
RDMA/cma: Simplify DEVICE_REMOVAL for internal_id
RDMA/efa: Add EFA 0xefa1 PCI ID
RDMA/efa: User/kernel compatibility handshake mechanism
...
Diffstat (limited to 'drivers/infiniband/hw')
72 files changed, 5214 insertions, 4810 deletions
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 8b6ad5cddfce..3f18efc0c297 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -842,16 +842,79 @@ static u8 __from_ib_qp_type(enum ib_qp_type type) } } +static u16 bnxt_re_setup_rwqe_size(struct bnxt_qplib_qp *qplqp, + int rsge, int max) +{ + if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) + rsge = max; + return bnxt_re_get_rwqe_size(rsge); +} + +static u16 bnxt_re_get_wqe_size(int ilsize, int nsge) +{ + u16 wqe_size, calc_ils; + + wqe_size = bnxt_re_get_swqe_size(nsge); + if (ilsize) { + calc_ils = sizeof(struct sq_send_hdr) + ilsize; + wqe_size = max_t(u16, calc_ils, wqe_size); + wqe_size = ALIGN(wqe_size, sizeof(struct sq_send_hdr)); + } + return wqe_size; +} + +static int bnxt_re_setup_swqe_size(struct bnxt_re_qp *qp, + struct ib_qp_init_attr *init_attr) +{ + struct bnxt_qplib_dev_attr *dev_attr; + struct bnxt_qplib_qp *qplqp; + struct bnxt_re_dev *rdev; + struct bnxt_qplib_q *sq; + int align, ilsize; + + rdev = qp->rdev; + qplqp = &qp->qplib_qp; + sq = &qplqp->sq; + dev_attr = &rdev->dev_attr; + + align = sizeof(struct sq_send_hdr); + ilsize = ALIGN(init_attr->cap.max_inline_data, align); + + sq->wqe_size = bnxt_re_get_wqe_size(ilsize, sq->max_sge); + if (sq->wqe_size > bnxt_re_get_swqe_size(dev_attr->max_qp_sges)) + return -EINVAL; + /* For gen p4 and gen p5 backward compatibility mode + * wqe size is fixed to 128 bytes + */ + if (sq->wqe_size < bnxt_re_get_swqe_size(dev_attr->max_qp_sges) && + qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) + sq->wqe_size = bnxt_re_get_swqe_size(dev_attr->max_qp_sges); + + if (init_attr->cap.max_inline_data) { + qplqp->max_inline_data = sq->wqe_size - + sizeof(struct sq_send_hdr); + init_attr->cap.max_inline_data = qplqp->max_inline_data; + if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) + sq->max_sge = qplqp->max_inline_data / + sizeof(struct sq_sge); + } + + return 0; +} + static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, struct bnxt_re_qp *qp, struct ib_udata *udata) { + struct bnxt_qplib_qp *qplib_qp; + struct bnxt_re_ucontext *cntx; struct bnxt_re_qp_req ureq; - struct bnxt_qplib_qp *qplib_qp = &qp->qplib_qp; - struct ib_umem *umem; int bytes = 0, psn_sz; - struct bnxt_re_ucontext *cntx = rdma_udata_to_drv_context( - udata, struct bnxt_re_ucontext, ib_uctx); + struct ib_umem *umem; + int psn_nume; + qplib_qp = &qp->qplib_qp; + cntx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, + ib_uctx); if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) return -EFAULT; @@ -859,10 +922,15 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, /* Consider mapping PSN search memory only for RC QPs. */ if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC) { psn_sz = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? - sizeof(struct sq_psn_search_ext) : - sizeof(struct sq_psn_search); - bytes += (qplib_qp->sq.max_wqe * psn_sz); + sizeof(struct sq_psn_search_ext) : + sizeof(struct sq_psn_search); + psn_nume = (qplib_qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ? + qplib_qp->sq.max_wqe : + ((qplib_qp->sq.max_wqe * qplib_qp->sq.wqe_size) / + sizeof(struct bnxt_qplib_sge)); + bytes += (psn_nume * psn_sz); } + bytes = PAGE_ALIGN(bytes); umem = ib_umem_get(&rdev->ibdev, ureq.qpsva, bytes, IB_ACCESS_LOCAL_WRITE); @@ -975,7 +1043,7 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp qp->qplib_qp.sig_type = true; /* Shadow QP SQ depth should be same as QP1 RQ depth */ - qp->qplib_qp.sq.wqe_size = bnxt_re_get_swqe_size(); + qp->qplib_qp.sq.wqe_size = bnxt_re_get_wqe_size(0, 6); qp->qplib_qp.sq.max_wqe = qp1_qp->rq.max_wqe; qp->qplib_qp.sq.max_sge = 2; /* Q full delta can be 1 since it is internal QP */ @@ -986,7 +1054,7 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp qp->qplib_qp.scq = qp1_qp->scq; qp->qplib_qp.rcq = qp1_qp->rcq; - qp->qplib_qp.rq.wqe_size = bnxt_re_get_rwqe_size(); + qp->qplib_qp.rq.wqe_size = bnxt_re_get_rwqe_size(6); qp->qplib_qp.rq.max_wqe = qp1_qp->rq.max_wqe; qp->qplib_qp.rq.max_sge = qp1_qp->rq.max_sge; /* Q full delta can be 1 since it is internal QP */ @@ -1041,19 +1109,21 @@ static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp, qplqp->srq = &srq->qplib_srq; rq->max_wqe = 0; } else { - rq->wqe_size = bnxt_re_get_rwqe_size(); + rq->max_sge = init_attr->cap.max_recv_sge; + if (rq->max_sge > dev_attr->max_qp_sges) + rq->max_sge = dev_attr->max_qp_sges; + init_attr->cap.max_recv_sge = rq->max_sge; + rq->wqe_size = bnxt_re_setup_rwqe_size(qplqp, rq->max_sge, + dev_attr->max_qp_sges); /* Allocate 1 more than what's provided so posting max doesn't * mean empty. */ entries = roundup_pow_of_two(init_attr->cap.max_recv_wr + 1); rq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); - rq->q_full_delta = rq->max_wqe - init_attr->cap.max_recv_wr; - rq->max_sge = init_attr->cap.max_recv_sge; - if (rq->max_sge > dev_attr->max_qp_sges) - rq->max_sge = dev_attr->max_qp_sges; + rq->q_full_delta = 0; + rq->sg_info.pgsize = PAGE_SIZE; + rq->sg_info.pgshft = PAGE_SHIFT; } - rq->sg_info.pgsize = PAGE_SIZE; - rq->sg_info.pgshft = PAGE_SHIFT; return 0; } @@ -1068,41 +1138,48 @@ static void bnxt_re_adjust_gsi_rq_attr(struct bnxt_re_qp *qp) qplqp = &qp->qplib_qp; dev_attr = &rdev->dev_attr; - qplqp->rq.max_sge = dev_attr->max_qp_sges; - if (qplqp->rq.max_sge > dev_attr->max_qp_sges) + if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) { qplqp->rq.max_sge = dev_attr->max_qp_sges; - qplqp->rq.max_sge = 6; + if (qplqp->rq.max_sge > dev_attr->max_qp_sges) + qplqp->rq.max_sge = dev_attr->max_qp_sges; + qplqp->rq.max_sge = 6; + } } -static void bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata) +static int bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) { struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_qplib_qp *qplqp; struct bnxt_re_dev *rdev; struct bnxt_qplib_q *sq; int entries; + int diff; + int rc; rdev = qp->rdev; qplqp = &qp->qplib_qp; sq = &qplqp->sq; dev_attr = &rdev->dev_attr; - sq->wqe_size = bnxt_re_get_swqe_size(); sq->max_sge = init_attr->cap.max_send_sge; - if (sq->max_sge > dev_attr->max_qp_sges) + if (sq->max_sge > dev_attr->max_qp_sges) { sq->max_sge = dev_attr->max_qp_sges; - /* - * Change the SQ depth if user has requested minimum using - * configfs. Only supported for kernel consumers - */ + init_attr->cap.max_send_sge = sq->max_sge; + } + + rc = bnxt_re_setup_swqe_size(qp, init_attr); + if (rc) + return rc; + entries = init_attr->cap.max_send_wr; /* Allocate 128 + 1 more than what's provided */ - entries = roundup_pow_of_two(entries + BNXT_QPLIB_RESERVED_QP_WRS + 1); - sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + - BNXT_QPLIB_RESERVED_QP_WRS + 1); - sq->q_full_delta = BNXT_QPLIB_RESERVED_QP_WRS + 1; + diff = (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) ? + 0 : BNXT_QPLIB_RESERVED_QP_WRS; + entries = roundup_pow_of_two(entries + diff + 1); + sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + diff + 1); + sq->q_full_delta = diff + 1; /* * Reserving one slot for Phantom WQE. Application can * post one extra entry in this case. But allowing this to avoid @@ -1111,6 +1188,8 @@ static void bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, qplqp->sq.q_full_delta -= 1; qplqp->sq.sg_info.pgsize = PAGE_SIZE; qplqp->sq.sg_info.pgshft = PAGE_SHIFT; + + return 0; } static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp, @@ -1125,13 +1204,16 @@ static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp, qplqp = &qp->qplib_qp; dev_attr = &rdev->dev_attr; - entries = roundup_pow_of_two(init_attr->cap.max_send_wr + 1); - qplqp->sq.max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); - qplqp->sq.q_full_delta = qplqp->sq.max_wqe - - init_attr->cap.max_send_wr; - qplqp->sq.max_sge++; /* Need one extra sge to put UD header */ - if (qplqp->sq.max_sge > dev_attr->max_qp_sges) - qplqp->sq.max_sge = dev_attr->max_qp_sges; + if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) { + entries = roundup_pow_of_two(init_attr->cap.max_send_wr + 1); + qplqp->sq.max_wqe = min_t(u32, entries, + dev_attr->max_qp_wqes + 1); + qplqp->sq.q_full_delta = qplqp->sq.max_wqe - + init_attr->cap.max_send_wr; + qplqp->sq.max_sge++; /* Need one extra sge to put UD header */ + if (qplqp->sq.max_sge > dev_attr->max_qp_sges) + qplqp->sq.max_sge = dev_attr->max_qp_sges; + } } static int bnxt_re_init_qp_type(struct bnxt_re_dev *rdev, @@ -1183,6 +1265,7 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, goto out; } qplqp->type = (u8)qptype; + qplqp->wqe_mode = rdev->chip_ctx->modes.wqe_mode; if (init_attr->qp_type == IB_QPT_RC) { qplqp->max_rd_atomic = dev_attr->max_qp_rd_atom; @@ -1226,7 +1309,9 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, bnxt_re_adjust_gsi_rq_attr(qp); /* Setup SQ */ - bnxt_re_init_sq_attr(qp, init_attr, udata); + rc = bnxt_re_init_sq_attr(qp, init_attr, udata); + if (rc) + goto out; if (init_attr->qp_type == IB_QPT_GSI) bnxt_re_adjust_gsi_sq_attr(qp, init_attr); @@ -1574,8 +1659,9 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, entries = dev_attr->max_srq_wqes + 1; srq->qplib_srq.max_wqe = entries; - srq->qplib_srq.wqe_size = bnxt_re_get_rwqe_size(); srq->qplib_srq.max_sge = srq_init_attr->attr.max_sge; + srq->qplib_srq.wqe_size = + bnxt_re_get_rwqe_size(srq->qplib_srq.max_sge); srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit; srq->srq_limit = srq_init_attr->attr.srq_limit; srq->qplib_srq.eventq_hw_ring_id = rdev->nq[0].ring_id; @@ -3569,7 +3655,7 @@ int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents, } struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); struct bnxt_re_dev *rdev = pd->rdev; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index e5fbbeba6d28..1daeb30e06fd 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -136,14 +136,14 @@ struct bnxt_re_ucontext { spinlock_t sh_lock; /* protect shpg */ }; -static inline u16 bnxt_re_get_swqe_size(void) +static inline u16 bnxt_re_get_swqe_size(int nsge) { - return sizeof(struct sq_send); + return sizeof(struct sq_send_hdr) + nsge * sizeof(struct sq_sge); } -static inline u16 bnxt_re_get_rwqe_size(void) +static inline u16 bnxt_re_get_rwqe_size(int nsge) { - return sizeof(struct rq_wqe); + return sizeof(struct rq_wqe_hdr) + (nsge * sizeof(struct sq_sge)); } int bnxt_re_query_device(struct ib_device *ibdev, @@ -201,7 +201,7 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *pd, int mr_access_flags); int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); + u32 max_num_sg); int bnxt_re_dereg_mr(struct ib_mr *mr, struct ib_udata *udata); struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index b12fbc857f94..dad0df8a2467 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -82,6 +82,15 @@ static void bnxt_re_remove_device(struct bnxt_re_dev *rdev); static void bnxt_re_dealloc_driver(struct ib_device *ib_dev); static void bnxt_re_stop_irq(void *handle); +static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev, u8 mode) +{ + struct bnxt_qplib_chip_ctx *cctx; + + cctx = rdev->chip_ctx; + cctx->modes.wqe_mode = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? + mode : BNXT_QPLIB_WQE_MODE_STATIC; +} + static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev) { struct bnxt_qplib_chip_ctx *chip_ctx; @@ -97,7 +106,7 @@ static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev) kfree(chip_ctx); } -static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev) +static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode) { struct bnxt_qplib_chip_ctx *chip_ctx; struct bnxt_en_dev *en_dev; @@ -117,6 +126,7 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev) rdev->qplib_res.cctx = rdev->chip_ctx; rdev->rcfw.res = &rdev->qplib_res; + bnxt_re_set_drv_mode(rdev, wqe_mode); return 0; } @@ -1386,7 +1396,7 @@ static void bnxt_re_worker(struct work_struct *work) schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000)); } -static int bnxt_re_dev_init(struct bnxt_re_dev *rdev) +static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) { struct bnxt_qplib_creq_ctx *creq; struct bnxt_re_ring_attr rattr; @@ -1406,7 +1416,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev) } set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); - rc = bnxt_re_setup_chip_ctx(rdev); + rc = bnxt_re_setup_chip_ctx(rdev, wqe_mode); if (rc) { ibdev_err(&rdev->ibdev, "Failed to get chip context\n"); return -EINVAL; @@ -1585,7 +1595,7 @@ static void bnxt_re_remove_device(struct bnxt_re_dev *rdev) } static int bnxt_re_add_device(struct bnxt_re_dev **rdev, - struct net_device *netdev) + struct net_device *netdev, u8 wqe_mode) { int rc; @@ -1599,7 +1609,7 @@ static int bnxt_re_add_device(struct bnxt_re_dev **rdev, } pci_dev_get((*rdev)->en_dev->pdev); - rc = bnxt_re_dev_init(*rdev); + rc = bnxt_re_dev_init(*rdev, wqe_mode); if (rc) { pci_dev_put((*rdev)->en_dev->pdev); bnxt_re_dev_unreg(*rdev); @@ -1711,7 +1721,8 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier, case NETDEV_REGISTER: if (rdev) break; - rc = bnxt_re_add_device(&rdev, real_dev); + rc = bnxt_re_add_device(&rdev, real_dev, + BNXT_QPLIB_WQE_MODE_STATIC); if (!rc) sch_work = true; release = false; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index c5e29577cd43..117b42349a28 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -178,11 +178,11 @@ static void bnxt_qplib_free_qp_hdr_buf(struct bnxt_qplib_res *res, if (qp->rq_hdr_buf) dma_free_coherent(&res->pdev->dev, - rq->hwq.max_elements * qp->rq_hdr_buf_size, + rq->max_wqe * qp->rq_hdr_buf_size, qp->rq_hdr_buf, qp->rq_hdr_buf_map); if (qp->sq_hdr_buf) dma_free_coherent(&res->pdev->dev, - sq->hwq.max_elements * qp->sq_hdr_buf_size, + sq->max_wqe * qp->sq_hdr_buf_size, qp->sq_hdr_buf, qp->sq_hdr_buf_map); qp->rq_hdr_buf = NULL; qp->sq_hdr_buf = NULL; @@ -199,10 +199,9 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res, struct bnxt_qplib_q *sq = &qp->sq; int rc = 0; - if (qp->sq_hdr_buf_size && sq->hwq.max_elements) { + if (qp->sq_hdr_buf_size && sq->max_wqe) { qp->sq_hdr_buf = dma_alloc_coherent(&res->pdev->dev, - sq->hwq.max_elements * - qp->sq_hdr_buf_size, + sq->max_wqe * qp->sq_hdr_buf_size, &qp->sq_hdr_buf_map, GFP_KERNEL); if (!qp->sq_hdr_buf) { rc = -ENOMEM; @@ -212,9 +211,9 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res, } } - if (qp->rq_hdr_buf_size && rq->hwq.max_elements) { + if (qp->rq_hdr_buf_size && rq->max_wqe) { qp->rq_hdr_buf = dma_alloc_coherent(&res->pdev->dev, - rq->hwq.max_elements * + rq->max_wqe * qp->rq_hdr_buf_size, &qp->rq_hdr_buf_map, GFP_KERNEL); @@ -661,6 +660,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, srq->dbinfo.hwq = &srq->hwq; srq->dbinfo.xid = srq->id; srq->dbinfo.db = srq->dpi->dbr; + srq->dbinfo.max_slot = 1; srq->dbinfo.priv_db = res->dpi_tbl.dbr_bar_reg_iomem; if (srq->threshold) bnxt_qplib_armen_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ_ARMENA); @@ -784,6 +784,28 @@ done: } /* QP */ + +static int bnxt_qplib_alloc_init_swq(struct bnxt_qplib_q *que) +{ + int rc = 0; + int indx; + + que->swq = kcalloc(que->max_wqe, sizeof(*que->swq), GFP_KERNEL); + if (!que->swq) { + rc = -ENOMEM; + goto out; + } + + que->swq_start = 0; + que->swq_last = que->max_wqe - 1; + for (indx = 0; indx < que->max_wqe; indx++) + que->swq[indx].next_idx = indx + 1; + que->swq[que->swq_last].next_idx = 0; /* Make it circular */ + que->swq_last = 0; +out: + return rc; +} + int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) { struct bnxt_qplib_hwq_attr hwq_attr = {}; @@ -808,71 +830,63 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) /* SQ */ hwq_attr.res = res; hwq_attr.sginfo = &sq->sg_info; - hwq_attr.depth = sq->max_wqe; - hwq_attr.stride = sq->wqe_size; + hwq_attr.stride = sizeof(struct sq_sge); + hwq_attr.depth = bnxt_qplib_get_depth(sq); hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr); if (rc) goto exit; - sq->swq = kcalloc(sq->hwq.max_elements, sizeof(*sq->swq), GFP_KERNEL); - if (!sq->swq) { - rc = -ENOMEM; + rc = bnxt_qplib_alloc_init_swq(sq); + if (rc) goto fail_sq; - } + + req.sq_size = cpu_to_le32(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)); pbl = &sq->hwq.pbl[PBL_LVL_0]; req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) << CMDQ_CREATE_QP1_SQ_PG_SIZE_SFT); pg_sz_lvl |= (sq->hwq.level & CMDQ_CREATE_QP1_SQ_LVL_MASK); req.sq_pg_size_sq_lvl = pg_sz_lvl; + req.sq_fwo_sq_sge = + cpu_to_le16((sq->max_sge & CMDQ_CREATE_QP1_SQ_SGE_MASK) << + CMDQ_CREATE_QP1_SQ_SGE_SFT); + req.scq_cid = cpu_to_le32(qp->scq->id); - if (qp->scq) - req.scq_cid = cpu_to_le32(qp->scq->id); /* RQ */ if (rq->max_wqe) { hwq_attr.res = res; hwq_attr.sginfo = &rq->sg_info; - hwq_attr.stride = rq->wqe_size; - hwq_attr.depth = qp->rq.max_wqe; + hwq_attr.stride = sizeof(struct sq_sge); + hwq_attr.depth = bnxt_qplib_get_depth(rq); hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr); if (rc) - goto fail_sq; - - rq->swq = kcalloc(rq->hwq.max_elements, sizeof(*rq->swq), - GFP_KERNEL); - if (!rq->swq) { - rc = -ENOMEM; + goto sq_swq; + rc = bnxt_qplib_alloc_init_swq(rq); + if (rc) goto fail_rq; - } + req.rq_size = cpu_to_le32(rq->max_wqe); pbl = &rq->hwq.pbl[PBL_LVL_0]; req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); pg_sz_lvl = (bnxt_qplib_base_pg_size(&rq->hwq) << CMDQ_CREATE_QP1_RQ_PG_SIZE_SFT); pg_sz_lvl |= (rq->hwq.level & CMDQ_CREATE_QP1_RQ_LVL_MASK); req.rq_pg_size_rq_lvl = pg_sz_lvl; - if (qp->rcq) - req.rcq_cid = cpu_to_le32(qp->rcq->id); + req.rq_fwo_rq_sge = + cpu_to_le16((rq->max_sge & + CMDQ_CREATE_QP1_RQ_SGE_MASK) << + CMDQ_CREATE_QP1_RQ_SGE_SFT); } + req.rcq_cid = cpu_to_le32(qp->rcq->id); /* Header buffer - allow hdr_buf pass in */ rc = bnxt_qplib_alloc_qp_hdr_buf(res, qp); if (rc) { rc = -ENOMEM; - goto fail; + goto rq_rwq; } qp_flags |= CMDQ_CREATE_QP1_QP_FLAGS_RESERVED_LKEY_ENABLE; req.qp_flags = cpu_to_le32(qp_flags); - req.sq_size = cpu_to_le32(sq->hwq.max_elements); - req.rq_size = cpu_to_le32(rq->hwq.max_elements); - - req.sq_fwo_sq_sge = - cpu_to_le16((sq->max_sge & CMDQ_CREATE_QP1_SQ_SGE_MASK) << - CMDQ_CREATE_QP1_SQ_SGE_SFT); - req.rq_fwo_rq_sge = - cpu_to_le16((rq->max_sge & CMDQ_CREATE_QP1_RQ_SGE_MASK) << - CMDQ_CREATE_QP1_RQ_SGE_SFT); - req.pd_id = cpu_to_le32(qp->pd->id); rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, @@ -886,10 +900,12 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) sq->dbinfo.hwq = &sq->hwq; sq->dbinfo.xid = qp->id; sq->dbinfo.db = qp->dpi->dbr; + sq->dbinfo.max_slot = bnxt_qplib_set_sq_max_slot(qp->wqe_mode); if (rq->max_wqe) { rq->dbinfo.hwq = &rq->hwq; rq->dbinfo.xid = qp->id; rq->dbinfo.db = qp->dpi->dbr; + rq->dbinfo.max_slot = bnxt_qplib_set_rq_max_slot(rq->wqe_size); } rcfw->qp_tbl[qp->id].qp_id = qp->id; rcfw->qp_tbl[qp->id].qp_handle = (void *)qp; @@ -898,12 +914,14 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) fail: bnxt_qplib_free_qp_hdr_buf(res, qp); +rq_rwq: + kfree(rq->swq); fail_rq: bnxt_qplib_free_hwq(res, &rq->hwq); - kfree(rq->swq); +sq_swq: + kfree(sq->swq); fail_sq: bnxt_qplib_free_hwq(res, &sq->hwq); - kfree(sq->swq); exit: return rc; } @@ -912,26 +930,18 @@ static void bnxt_qplib_init_psn_ptr(struct bnxt_qplib_qp *qp, int size) { struct bnxt_qplib_hwq *hwq; struct bnxt_qplib_q *sq; - u64 fpsne, psne, psn_pg; - u16 indx_pad = 0, indx; - u16 pg_num, pg_indx; - u64 *page; + u64 fpsne, psn_pg; + u16 indx_pad = 0; sq = &qp->sq; hwq = &sq->hwq; - - fpsne = (u64)bnxt_qplib_get_qe(hwq, hwq->max_elements, &psn_pg); + fpsne = (u64)bnxt_qplib_get_qe(hwq, hwq->depth, &psn_pg); if (!IS_ALIGNED(fpsne, PAGE_SIZE)) indx_pad = ALIGN(fpsne, PAGE_SIZE) / size; - page = (u64 *)psn_pg; - for (indx = 0; indx < hwq->max_elements; indx++) { - pg_num = (indx + indx_pad) / (PAGE_SIZE / size); - pg_indx = (indx + indx_pad) % (PAGE_SIZE / size); - psne = page[pg_num] + pg_indx * size; - sq->swq[indx].psn_ext = (struct sq_psn_search_ext *)psne; - sq->swq[indx].psn_search = (struct sq_psn_search *)psne; - } + hwq->pad_pgofft = indx_pad; + hwq->pad_pg = (u64 *)psn_pg; + hwq->pad_stride = size; } int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) @@ -944,12 +954,12 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) struct creq_create_qp_resp resp; int rc, req_size, psn_sz = 0; struct bnxt_qplib_hwq *xrrq; - u16 cmd_flags = 0, max_ssge; struct bnxt_qplib_pbl *pbl; struct cmdq_create_qp req; + u16 cmd_flags = 0; u32 qp_flags = 0; u8 pg_sz_lvl; - u16 max_rsge; + u16 nsge; RCFW_CMD_PREP(req, CREATE_QP, cmd_flags); @@ -967,97 +977,78 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) hwq_attr.res = res; hwq_attr.sginfo = &sq->sg_info; - hwq_attr.stride = sq->wqe_size; - hwq_attr.depth = sq->max_wqe; + hwq_attr.stride = sizeof(struct sq_sge); + hwq_attr.depth = bnxt_qplib_get_depth(sq); hwq_attr.aux_stride = psn_sz; - hwq_attr.aux_depth = hwq_attr.depth; + hwq_attr.aux_depth = bnxt_qplib_set_sq_size(sq, qp->wqe_mode); hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr); if (rc) goto exit; - sq->swq = kcalloc(sq->hwq.max_elements, sizeof(*sq->swq), GFP_KERNEL); - if (!sq->swq) { - rc = -ENOMEM; + rc = bnxt_qplib_alloc_init_swq(sq); + if (rc) goto fail_sq; - } if (psn_sz) bnxt_qplib_init_psn_ptr(qp, psn_sz); + req.sq_size = cpu_to_le32(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)); pbl = &sq->hwq.pbl[PBL_LVL_0]; req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) << CMDQ_CREATE_QP_SQ_PG_SIZE_SFT); pg_sz_lvl |= (sq->hwq.level & CMDQ_CREATE_QP_SQ_LVL_MASK); req.sq_pg_size_sq_lvl = pg_sz_lvl; - - if (qp->scq) - req.scq_cid = cpu_to_le32(qp->scq->id); + req.sq_fwo_sq_sge = + cpu_to_le16(((sq->max_sge & CMDQ_CREATE_QP_SQ_SGE_MASK) << + CMDQ_CREATE_QP_SQ_SGE_SFT) | 0); + req.scq_cid = cpu_to_le32(qp->scq->id); /* RQ */ - if (rq->max_wqe) { + if (!qp->srq) { hwq_attr.res = res; hwq_attr.sginfo = &rq->sg_info; - hwq_attr.stride = rq->wqe_size; - hwq_attr.depth = rq->max_wqe; + hwq_attr.stride = sizeof(struct sq_sge); + hwq_attr.depth = bnxt_qplib_get_depth(rq); hwq_attr.aux_stride = 0; hwq_attr.aux_depth = 0; hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr); if (rc) - goto fail_sq; - - rq->swq = kcalloc(rq->hwq.max_elements, sizeof(*rq->swq), - GFP_KERNEL); - if (!rq->swq) { - rc = -ENOMEM; + goto sq_swq; + rc = bnxt_qplib_alloc_init_swq(rq); + if (rc) goto fail_rq; - } + + req.rq_size = cpu_to_le32(rq->max_wqe); pbl = &rq->hwq.pbl[PBL_LVL_0]; req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); pg_sz_lvl = (bnxt_qplib_base_pg_size(&rq->hwq) << CMDQ_CREATE_QP_RQ_PG_SIZE_SFT); pg_sz_lvl |= (rq->hwq.level & CMDQ_CREATE_QP_RQ_LVL_MASK); req.rq_pg_size_rq_lvl = pg_sz_lvl; + nsge = (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ? + 6 : rq->max_sge; + req.rq_fwo_rq_sge = + cpu_to_le16(((nsge & + CMDQ_CREATE_QP_RQ_SGE_MASK) << + CMDQ_CREATE_QP_RQ_SGE_SFT) | 0); } else { /* SRQ */ - if (qp->srq) { - qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_SRQ_USED; - req.srq_cid = cpu_to_le32(qp->srq->id); - } + qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_SRQ_USED; + req.srq_cid = cpu_to_le32(qp->srq->id); } - - if (qp->rcq) - req.rcq_cid = cpu_to_le32(qp->rcq->id); + req.rcq_cid = cpu_to_le32(qp->rcq->id); qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE; qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED; if (qp->sig_type) qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION; + if (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) + qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED; req.qp_flags = cpu_to_le32(qp_flags); - req.sq_size = cpu_to_le32(sq->hwq.max_elements); - req.rq_size = cpu_to_le32(rq->hwq.max_elements); - qp->sq_hdr_buf = NULL; - qp->rq_hdr_buf = NULL; - - rc = bnxt_qplib_alloc_qp_hdr_buf(res, qp); - if (rc) - goto fail_rq; - - /* CTRL-22434: Irrespective of the requested SGE count on the SQ - * always create the QP with max send sges possible if the requested - * inline size is greater than 0. - */ - max_ssge = qp->max_inline_data ? 6 : sq->max_sge; - req.sq_fwo_sq_sge = cpu_to_le16( - ((max_ssge & CMDQ_CREATE_QP_SQ_SGE_MASK) - << CMDQ_CREATE_QP_SQ_SGE_SFT) | 0); - max_rsge = bnxt_qplib_is_chip_gen_p5(res->cctx) ? 6 : rq->max_sge; - req.rq_fwo_rq_sge = cpu_to_le16( - ((max_rsge & CMDQ_CREATE_QP_RQ_SGE_MASK) - << CMDQ_CREATE_QP_RQ_SGE_SFT) | 0); /* ORRQ and IRRQ */ if (psn_sz) { xrrq = &qp->orrq; @@ -1078,7 +1069,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) hwq_attr.type = HWQ_TYPE_CTX; rc = bnxt_qplib_alloc_init_hwq(xrrq, &hwq_attr); if (rc) - goto fail_buf_free; + goto rq_swq; pbl = &xrrq->pbl[PBL_LVL_0]; req.orrq_addr = cpu_to_le64(pbl->pg_map_arr[0]); @@ -1113,30 +1104,29 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) sq->dbinfo.hwq = &sq->hwq; sq->dbinfo.xid = qp->id; sq->dbinfo.db = qp->dpi->dbr; + sq->dbinfo.max_slot = bnxt_qplib_set_sq_max_slot(qp->wqe_mode); if (rq->max_wqe) { rq->dbinfo.hwq = &rq->hwq; rq->dbinfo.xid = qp->id; rq->dbinfo.db = qp->dpi->dbr; + rq->dbinfo.max_slot = bnxt_qplib_set_rq_max_slot(rq->wqe_size); } rcfw->qp_tbl[qp->id].qp_id = qp->id; rcfw->qp_tbl[qp->id].qp_handle = (void *)qp; return 0; - fail: - if (qp->irrq.max_elements) - bnxt_qplib_free_hwq(res, &qp->irrq); + bnxt_qplib_free_hwq(res, &qp->irrq); fail_orrq: - if (qp->orrq.max_elements) - bnxt_qplib_free_hwq(res, &qp->orrq); -fail_buf_free: - bnxt_qplib_free_qp_hdr_buf(res, qp); + bnxt_qplib_free_hwq(res, &qp->orrq); +rq_swq: + kfree(rq->swq); fail_rq: bnxt_qplib_free_hwq(res, &rq->hwq); - kfree(rq->swq); +sq_swq: + kfree(sq->swq); fail_sq: bnxt_qplib_free_hwq(res, &sq->hwq); - kfree(sq->swq); exit: return rc; } @@ -1512,7 +1502,7 @@ void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp, memset(sge, 0, sizeof(*sge)); if (qp->sq_hdr_buf) { - sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq); + sw_prod = sq->swq_start; sge->addr = (dma_addr_t)(qp->sq_hdr_buf_map + sw_prod * qp->sq_hdr_buf_size); sge->lkey = 0xFFFFFFFF; @@ -1526,7 +1516,7 @@ u32 bnxt_qplib_get_rq_prod_index(struct bnxt_qplib_qp *qp) { struct bnxt_qplib_q *rq = &qp->rq; - return HWQ_CMP(rq->hwq.prod, &rq->hwq); + return rq->swq_start; } dma_addr_t bnxt_qplib_get_qp_buf_from_index(struct bnxt_qplib_qp *qp, u32 index) @@ -1543,7 +1533,7 @@ void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp, memset(sge, 0, sizeof(*sge)); if (qp->rq_hdr_buf) { - sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); + sw_prod = rq->swq_start; sge->addr = (dma_addr_t)(qp->rq_hdr_buf_map + sw_prod * qp->rq_hdr_buf_size); sge->lkey = 0xFFFFFFFF; @@ -1562,6 +1552,8 @@ static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp, u32 flg_npsn; u32 op_spsn; + if (!swq->psn_search) + return; psns = swq->psn_search; psns_ext = swq->psn_ext; @@ -1575,12 +1567,122 @@ static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp, if (bnxt_qplib_is_chip_gen_p5(qp->cctx)) { psns_ext->opcode_start_psn = cpu_to_le32(op_spsn); psns_ext->flags_next_psn = cpu_to_le32(flg_npsn); + psns_ext->start_slot_idx = cpu_to_le16(swq->slot_idx); } else { psns->opcode_start_psn = cpu_to_le32(op_spsn); psns->flags_next_psn = cpu_to_le32(flg_npsn); } } +static int bnxt_qplib_put_inline(struct bnxt_qplib_qp *qp, + struct bnxt_qplib_swqe *wqe, + u16 *idx) +{ + struct bnxt_qplib_hwq *hwq; + int len, t_len, offt; + bool pull_dst = true; + void *il_dst = NULL; + void *il_src = NULL; + int t_cplen, cplen; + int indx; + + hwq = &qp->sq.hwq; + t_len = 0; + for (indx = 0; indx < wqe->num_sge; indx++) { + len = wqe->sg_list[indx].size; + il_src = (void *)wqe->sg_list[indx].addr; + t_len += len; + if (t_len > qp->max_inline_data) + goto bad; + while (len) { + if (pull_dst) { + pull_dst = false; + il_dst = bnxt_qplib_get_prod_qe(hwq, *idx); + (*idx)++; + t_cplen = 0; + offt = 0; + } + cplen = min_t(int, len, sizeof(struct sq_sge)); + cplen = min_t(int, cplen, + (sizeof(struct sq_sge) - offt)); + memcpy(il_dst, il_src, cplen); + t_cplen += cplen; + il_src += cplen; + il_dst += cplen; + offt += cplen; + len -= cplen; + if (t_cplen == sizeof(struct sq_sge)) + pull_dst = true; + } + } + + return t_len; +bad: + return -ENOMEM; +} + +static u32 bnxt_qplib_put_sges(struct bnxt_qplib_hwq *hwq, + struct bnxt_qplib_sge *ssge, + u16 nsge, u16 *idx) +{ + struct sq_sge *dsge; + int indx, len = 0; + + for (indx = 0; indx < nsge; indx++, (*idx)++) { + dsge = bnxt_qplib_get_prod_qe(hwq, *idx); + dsge->va_or_pa = cpu_to_le64(ssge[indx].addr); + dsge->l_key = cpu_to_le32(ssge[indx].lkey); + dsge->size = cpu_to_le32(ssge[indx].size); + len += ssge[indx].size; + } + + return len; +} + +static u16 bnxt_qplib_required_slots(struct bnxt_qplib_qp *qp, + struct bnxt_qplib_swqe *wqe, + u16 *wqe_sz, u16 *qdf, u8 mode) +{ + u32 ilsize, bytes; + u16 nsge; + u16 slot; + + nsge = wqe->num_sge; + /* Adding sq_send_hdr is a misnomer, for rq also hdr size is same. */ + bytes = sizeof(struct sq_send_hdr) + nsge * sizeof(struct sq_sge); + if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) { + ilsize = bnxt_qplib_calc_ilsize(wqe, qp->max_inline_data); + bytes = ALIGN(ilsize, sizeof(struct sq_sge)); + bytes += sizeof(struct sq_send_hdr); + } + + *qdf = __xlate_qfd(qp->sq.q_full_delta, bytes); + slot = bytes >> 4; + *wqe_sz = slot; + if (mode == BNXT_QPLIB_WQE_MODE_STATIC) + slot = 8; + return slot; +} + +static void bnxt_qplib_pull_psn_buff(struct bnxt_qplib_q *sq, + struct bnxt_qplib_swq *swq) +{ + struct bnxt_qplib_hwq *hwq; + u32 pg_num, pg_indx; + void *buff; + u32 tail; + + hwq = &sq->hwq; + if (!hwq->pad_pg) + return; + tail = swq->slot_idx / sq->dbinfo.max_slot; + pg_num = (tail + hwq->pad_pgofft) / (PAGE_SIZE / hwq->pad_stride); + pg_indx = (tail + hwq->pad_pgofft) % (PAGE_SIZE / hwq->pad_stride); + buff = (void *)(hwq->pad_pg[pg_num] + pg_indx * hwq->pad_stride); + swq->psn_ext = buff; + swq->psn_search = buff; +} + void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp) { struct bnxt_qplib_q *sq = &qp->sq; @@ -1594,88 +1696,84 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, struct bnxt_qplib_nq_work *nq_work = NULL; int i, rc = 0, data_len = 0, pkt_num = 0; struct bnxt_qplib_q *sq = &qp->sq; - struct sq_send *hw_sq_send_hdr; + struct bnxt_qplib_hwq *hwq; struct bnxt_qplib_swq *swq; bool sch_handler = false; - struct sq_sge *hw_sge; - u8 wqe_size16; + u16 wqe_sz, qdf = 0; + void *base_hdr; + void *ext_hdr; __le32 temp32; - u32 sw_prod; + u32 wqe_idx; + u32 slots; + u16 idx; - if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS) { - if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { - sch_handler = true; - dev_dbg(&sq->hwq.pdev->dev, - "%s Error QP. Scheduling for poll_cq\n", - __func__); - goto queue_err; - } + hwq = &sq->hwq; + if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS && + qp->state != CMDQ_MODIFY_QP_NEW_STATE_ERR) { + dev_err(&hwq->pdev->dev, + "QPLIB: FP: QP (0x%x) is in the 0x%x state", + qp->id, qp->state); + rc = -EINVAL; + goto done; } - if (bnxt_qplib_queue_full(sq)) { - dev_err(&sq->hwq.pdev->dev, + slots = bnxt_qplib_required_slots(qp, wqe, &wqe_sz, &qdf, qp->wqe_mode); + if (bnxt_qplib_queue_full(sq, slots + qdf)) { + dev_err(&hwq->pdev->dev, "prod = %#x cons = %#x qdepth = %#x delta = %#x\n", - sq->hwq.prod, sq->hwq.cons, sq->hwq.max_elements, - sq->q_full_delta); + hwq->prod, hwq->cons, hwq->depth, sq->q_full_delta); rc = -ENOMEM; goto done; } - sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq); - swq = &sq->swq[sw_prod]; + + swq = bnxt_qplib_get_swqe(sq, &wqe_idx); + bnxt_qplib_pull_psn_buff(sq, swq); + + idx = 0; + swq->slot_idx = hwq->prod; + swq->slots = slots; swq->wr_id = wqe->wr_id; swq->type = wqe->type; swq->flags = wqe->flags; + swq->start_psn = sq->psn & BTH_PSN_MASK; if (qp->sig_type) swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP; - swq->start_psn = sq->psn & BTH_PSN_MASK; - - hw_sq_send_hdr = bnxt_qplib_get_qe(&sq->hwq, sw_prod, NULL); - memset(hw_sq_send_hdr, 0, sq->wqe_size); - if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) { - /* Copy the inline data */ - if (wqe->inline_len > BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) { - dev_warn(&sq->hwq.pdev->dev, - "Inline data length > 96 detected\n"); - data_len = BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH; - } else { - data_len = wqe->inline_len; - } - memcpy(hw_sq_send_hdr->data, wqe->inline_data, data_len); - wqe_size16 = (data_len + 15) >> 4; - } else { - for (i = 0, hw_sge = (struct sq_sge *)hw_sq_send_hdr->data; - i < wqe->num_sge; i++, hw_sge++) { - hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr); - hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey); - hw_sge->size = cpu_to_le32(wqe->sg_list[i].size); - data_len += wqe->sg_list[i].size; - } - /* Each SGE entry = 1 WQE size16 */ - wqe_size16 = wqe->num_sge; - /* HW requires wqe size has room for atleast one SGE even if - * none was supplied by ULP - */ - if (!wqe->num_sge) - wqe_size16++; + if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { + sch_handler = true; + dev_dbg(&hwq->pdev->dev, + "%s Error QP. Scheduling for poll_cq\n", __func__); + goto queue_err; } + base_hdr = bnxt_qplib_get_prod_qe(hwq, idx++); + ext_hdr = bnxt_qplib_get_prod_qe(hwq, idx++); + memset(base_hdr, 0, sizeof(struct sq_sge)); + memset(ext_hdr, 0, sizeof(struct sq_sge)); + + if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) + /* Copy the inline data */ + data_len = bnxt_qplib_put_inline(qp, wqe, &idx); + else + data_len = bnxt_qplib_put_sges(hwq, wqe->sg_list, wqe->num_sge, + &idx); + if (data_len < 0) + goto queue_err; /* Specifics */ switch (wqe->type) { case BNXT_QPLIB_SWQE_TYPE_SEND: if (qp->type == CMDQ_CREATE_QP1_TYPE_GSI) { + struct sq_send_raweth_qp1_hdr *sqe = base_hdr; + struct sq_raw_ext_hdr *ext_sqe = ext_hdr; /* Assemble info for Raw Ethertype QPs */ - struct sq_send_raweth_qp1 *sqe = - (struct sq_send_raweth_qp1 *)hw_sq_send_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; - sqe->wqe_size = wqe_size16 + - ((offsetof(typeof(*sqe), data) + 15) >> 4); + sqe->wqe_size = wqe_sz; sqe->cfa_action = cpu_to_le16(wqe->rawqp1.cfa_action); sqe->lflags = cpu_to_le16(wqe->rawqp1.lflags); sqe->length = cpu_to_le32(data_len); - sqe->cfa_meta = cpu_to_le32((wqe->rawqp1.cfa_meta & + ext_sqe->cfa_meta = cpu_to_le32((wqe->rawqp1.cfa_meta & SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_MASK) << SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_SFT); @@ -1685,27 +1783,24 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM: case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV: { - struct sq_send *sqe = (struct sq_send *)hw_sq_send_hdr; + struct sq_ud_ext_hdr *ext_sqe = ext_hdr; + struct sq_send_hdr *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; - sqe->wqe_size = wqe_size16 + - ((offsetof(typeof(*sqe), data) + 15) >> 4); - sqe->inv_key_or_imm_data = cpu_to_le32( - wqe->send.inv_key); + sqe->wqe_size = wqe_sz; + sqe->inv_key_or_imm_data = cpu_to_le32(wqe->send.inv_key); if (qp->type == CMDQ_CREATE_QP_TYPE_UD || qp->type == CMDQ_CREATE_QP_TYPE_GSI) { sqe->q_key = cpu_to_le32(wqe->send.q_key); - sqe->dst_qp = cpu_to_le32( - wqe->send.dst_qp & SQ_SEND_DST_QP_MASK); sqe->length = cpu_to_le32(data_len); - sqe->avid = cpu_to_le32(wqe->send.avid & - SQ_SEND_AVID_MASK); sq->psn = (sq->psn + 1) & BTH_PSN_MASK; + ext_sqe->dst_qp = cpu_to_le32(wqe->send.dst_qp & + SQ_SEND_DST_QP_MASK); + ext_sqe->avid = cpu_to_le32(wqe->send.avid & + SQ_SEND_AVID_MASK); } else { sqe->length = cpu_to_le32(data_len); - sqe->dst_qp = 0; - sqe->avid = 0; if (qp->mtu) pkt_num = (data_len + qp->mtu - 1) / qp->mtu; if (!pkt_num) @@ -1718,16 +1813,16 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, case BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM: case BNXT_QPLIB_SWQE_TYPE_RDMA_READ: { - struct sq_rdma *sqe = (struct sq_rdma *)hw_sq_send_hdr; + struct sq_rdma_ext_hdr *ext_sqe = ext_hdr; + struct sq_rdma_hdr *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; - sqe->wqe_size = wqe_size16 + - ((offsetof(typeof(*sqe), data) + 15) >> 4); + sqe->wqe_size = wqe_sz; sqe->imm_data = cpu_to_le32(wqe->rdma.inv_key); sqe->length = cpu_to_le32((u32)data_len); - sqe->remote_va = cpu_to_le64(wqe->rdma.remote_va); - sqe->remote_key = cpu_to_le32(wqe->rdma.r_key); + ext_sqe->remote_va = cpu_to_le64(wqe->rdma.remote_va); + ext_sqe->remote_key = cpu_to_le32(wqe->rdma.r_key); if (qp->mtu) pkt_num = (data_len + qp->mtu - 1) / qp->mtu; if (!pkt_num) @@ -1738,14 +1833,15 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, case BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP: case BNXT_QPLIB_SWQE_TYPE_ATOMIC_FETCH_AND_ADD: { - struct sq_atomic *sqe = (struct sq_atomic *)hw_sq_send_hdr; + struct sq_atomic_ext_hdr *ext_sqe = ext_hdr; + struct sq_atomic_hdr *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; sqe->remote_key = cpu_to_le32(wqe->atomic.r_key); sqe->remote_va = cpu_to_le64(wqe->atomic.remote_va); - sqe->swap_data = cpu_to_le64(wqe->atomic.swap_data); - sqe->cmp_data = cpu_to_le64(wqe->atomic.cmp_data); + ext_sqe->swap_data = cpu_to_le64(wqe->atomic.swap_data); + ext_sqe->cmp_data = cpu_to_le64(wqe->atomic.cmp_data); if (qp->mtu) pkt_num = (data_len + qp->mtu - 1) / qp->mtu; if (!pkt_num) @@ -1755,8 +1851,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, } case BNXT_QPLIB_SWQE_TYPE_LOCAL_INV: { - struct sq_localinvalidate *sqe = - (struct sq_localinvalidate *)hw_sq_send_hdr; + struct sq_localinvalidate *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; @@ -1766,7 +1861,8 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, } case BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR: { - struct sq_fr_pmr *sqe = (struct sq_fr_pmr *)hw_sq_send_hdr; + struct sq_fr_pmr_ext_hdr *ext_sqe = ext_hdr; + struct sq_fr_pmr_hdr *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; @@ -1790,14 +1886,15 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, wqe->frmr.pbl_ptr[i] = cpu_to_le64( wqe->frmr.page_list[i] | PTU_PTE_VALID); - sqe->pblptr = cpu_to_le64(wqe->frmr.pbl_dma_ptr); - sqe->va = cpu_to_le64(wqe->frmr.va); + ext_sqe->pblptr = cpu_to_le64(wqe->frmr.pbl_dma_ptr); + ext_sqe->va = cpu_to_le64(wqe->frmr.va); break; } case BNXT_QPLIB_SWQE_TYPE_BIND_MW: { - struct sq_bind *sqe = (struct sq_bind *)hw_sq_send_hdr; + struct sq_bind_ext_hdr *ext_sqe = ext_hdr; + struct sq_bind_hdr *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; @@ -1806,9 +1903,8 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, (wqe->bind.zero_based ? SQ_BIND_ZERO_BASED : 0); sqe->parent_l_key = cpu_to_le32(wqe->bind.parent_l_key); sqe->l_key = cpu_to_le32(wqe->bind.r_key); - sqe->va = cpu_to_le64(wqe->bind.va); - temp32 = cpu_to_le32(wqe->bind.length); - memcpy(&sqe->length, &temp32, sizeof(wqe->bind.length)); + ext_sqe->va = cpu_to_le64(wqe->bind.va); + ext_sqe->length_lo = cpu_to_le32(wqe->bind.length); break; } default: @@ -1817,23 +1913,11 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, goto done; } swq->next_psn = sq->psn & BTH_PSN_MASK; - if (qp->type == CMDQ_CREATE_QP_TYPE_RC) - bnxt_qplib_fill_psn_search(qp, wqe, swq); + bnxt_qplib_fill_psn_search(qp, wqe, swq); queue_err: - if (sch_handler) { - /* Store the ULP info in the software structures */ - sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq); - swq = &sq->swq[sw_prod]; - swq->wr_id = wqe->wr_id; - swq->type = wqe->type; - swq->flags = wqe->flags; - if (qp->sig_type) - swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP; - swq->start_psn = sq->psn & BTH_PSN_MASK; - } - sq->hwq.prod++; + bnxt_qplib_swq_mod_start(sq, wqe_idx); + bnxt_qplib_hwq_incr_prod(hwq, swq->slots); qp->wqe_cnt++; - done: if (sch_handler) { nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC); @@ -1843,7 +1927,7 @@ done: INIT_WORK(&nq_work->work, bnxt_qpn_cqn_sched_task); queue_work(qp->scq->nq->cqn_wq, &nq_work->work); } else { - dev_err(&sq->hwq.pdev->dev, + dev_err(&hwq->pdev->dev, "FP: Failed to allocate SQ nq_work!\n"); rc = -ENOMEM; } @@ -1863,58 +1947,65 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, { struct bnxt_qplib_nq_work *nq_work = NULL; struct bnxt_qplib_q *rq = &qp->rq; + struct rq_wqe_hdr *base_hdr; + struct rq_ext_hdr *ext_hdr; + struct bnxt_qplib_hwq *hwq; + struct bnxt_qplib_swq *swq; bool sch_handler = false; - struct sq_sge *hw_sge; - struct rq_wqe *rqe; - int i, rc = 0; - u32 sw_prod; + u16 wqe_sz, idx; + u32 wqe_idx; + int rc = 0; - if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { - sch_handler = true; - dev_dbg(&rq->hwq.pdev->dev, - "%s: Error QP. Scheduling for poll_cq\n", __func__); - goto queue_err; + hwq = &rq->hwq; + if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_RESET) { + dev_err(&hwq->pdev->dev, + "QPLIB: FP: QP (0x%x) is in the 0x%x state", + qp->id, qp->state); + rc = -EINVAL; + goto done; } - if (bnxt_qplib_queue_full(rq)) { - dev_err(&rq->hwq.pdev->dev, + + if (bnxt_qplib_queue_full(rq, rq->dbinfo.max_slot)) { + dev_err(&hwq->pdev->dev, "FP: QP (0x%x) RQ is full!\n", qp->id); rc = -EINVAL; goto done; } - sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); - rq->swq[sw_prod].wr_id = wqe->wr_id; - rqe = bnxt_qplib_get_qe(&rq->hwq, sw_prod, NULL); - memset(rqe, 0, rq->wqe_size); + swq = bnxt_qplib_get_swqe(rq, &wqe_idx); + swq->wr_id = wqe->wr_id; + swq->slots = rq->dbinfo.max_slot; - /* Calculate wqe_size16 and data_len */ - for (i = 0, hw_sge = (struct sq_sge *)rqe->data; - i < wqe->num_sge; i++, hw_sge++) { - hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr); - hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey); - hw_sge->size = cpu_to_le32(wqe->sg_list[i].size); + if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { + sch_handler = true; + dev_dbg(&hwq->pdev->dev, + "%s: Error QP. Scheduling for poll_cq\n", __func__); + goto queue_err; } - rqe->wqe_type = wqe->type; - rqe->flags = wqe->flags; - rqe->wqe_size = wqe->num_sge + - ((offsetof(typeof(*rqe), data) + 15) >> 4); - /* HW requires wqe size has room for atleast one SGE even if none - * was supplied by ULP - */ - if (!wqe->num_sge) - rqe->wqe_size++; - - /* Supply the rqe->wr_id index to the wr_id_tbl for now */ - rqe->wr_id[0] = cpu_to_le32(sw_prod); + idx = 0; + base_hdr = bnxt_qplib_get_prod_qe(hwq, idx++); + ext_hdr = bnxt_qplib_get_prod_qe(hwq, idx++); + memset(base_hdr, 0, sizeof(struct sq_sge)); + memset(ext_hdr, 0, sizeof(struct sq_sge)); + wqe_sz = (sizeof(struct rq_wqe_hdr) + + wqe->num_sge * sizeof(struct sq_sge)) >> 4; + bnxt_qplib_put_sges(hwq, wqe->sg_list, wqe->num_sge, &idx); + if (!wqe->num_sge) { + struct sq_sge *sge; + + sge = bnxt_qplib_get_prod_qe(hwq, idx++); + sge->size = 0; + wqe_sz++; + } + base_hdr->wqe_type = wqe->type; + base_hdr->flags = wqe->flags; + base_hdr->wqe_size = wqe_sz; + base_hdr->wr_id[0] = cpu_to_le32(wqe_idx); queue_err: - if (sch_handler) { - /* Store the ULP info in the software structures */ - sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); - rq->swq[sw_prod].wr_id = wqe->wr_id; - } - - rq->hwq.prod++; + bnxt_qplib_swq_mod_start(rq, wqe_idx); + bnxt_qplib_hwq_incr_prod(hwq, swq->slots); +done: if (sch_handler) { nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC); if (nq_work) { @@ -1923,12 +2014,12 @@ queue_err: INIT_WORK(&nq_work->work, bnxt_qpn_cqn_sched_task); queue_work(qp->rcq->nq->cqn_wq, &nq_work->work); } else { - dev_err(&rq->hwq.pdev->dev, + dev_err(&hwq->pdev->dev, "FP: Failed to allocate RQ nq_work!\n"); rc = -ENOMEM; } } -done: + return rc; } @@ -2026,20 +2117,19 @@ int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp, struct bnxt_qplib_cqe **pcqe, int *budget) { - u32 sw_prod, sw_cons; struct bnxt_qplib_cqe *cqe; + u32 start, last; int rc = 0; /* Now complete all outstanding SQEs with FLUSHED_ERR */ - sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq); + start = sq->swq_start; cqe = *pcqe; while (*budget) { - sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq); - if (sw_cons == sw_prod) { + last = sq->swq_last; + if (start == last) break; - } /* Skip the FENCE WQE completions */ - if (sq->swq[sw_cons].wr_id == BNXT_QPLIB_FENCE_WRID) { + if (sq->swq[last].wr_id == BNXT_QPLIB_FENCE_WRID) { bnxt_qplib_cancel_phantom_processing(qp); goto skip_compl; } @@ -2047,16 +2137,17 @@ static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp, cqe->status = CQ_REQ_STATUS_WORK_REQUEST_FLUSHED_ERR; cqe->opcode = CQ_BASE_CQE_TYPE_REQ; cqe->qp_handle = (u64)(unsigned long)qp; - cqe->wr_id = sq->swq[sw_cons].wr_id; + cqe->wr_id = sq->swq[last].wr_id; cqe->src_qp = qp->id; - cqe->type = sq->swq[sw_cons].type; + cqe->type = sq->swq[last].type; cqe++; (*budget)--; skip_compl: - sq->hwq.cons++; + bnxt_qplib_hwq_incr_cons(&sq->hwq, sq->swq[last].slots); + sq->swq_last = sq->swq[last].next_idx; } *pcqe = cqe; - if (!(*budget) && HWQ_CMP(sq->hwq.cons, &sq->hwq) != sw_prod) + if (!(*budget) && sq->swq_last != start) /* Out of budget */ rc = -EAGAIN; @@ -2067,9 +2158,9 @@ static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp, struct bnxt_qplib_cqe **pcqe, int *budget) { struct bnxt_qplib_cqe *cqe; - u32 sw_prod, sw_cons; - int rc = 0; + u32 start, last; int opcode = 0; + int rc = 0; switch (qp->type) { case CMDQ_CREATE_QP1_TYPE_GSI: @@ -2085,24 +2176,25 @@ static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp, } /* Flush the rest of the RQ */ - sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); + start = rq->swq_start; cqe = *pcqe; while (*budget) { - sw_cons = HWQ_CMP(rq->hwq.cons, &rq->hwq); - if (sw_cons == sw_prod) + last = rq->swq_last; + if (last == start) break; memset(cqe, 0, sizeof(*cqe)); cqe->status = CQ_RES_RC_STATUS_WORK_REQUEST_FLUSHED_ERR; cqe->opcode = opcode; cqe->qp_handle = (unsigned long)qp; - cqe->wr_id = rq->swq[sw_cons].wr_id; + cqe->wr_id = rq->swq[last].wr_id; cqe++; (*budget)--; - rq->hwq.cons++; + bnxt_qplib_hwq_incr_cons(&rq->hwq, rq->swq[last].slots); + rq->swq_last = rq->swq[last].next_idx; } *pcqe = cqe; - if (!*budget && HWQ_CMP(rq->hwq.cons, &rq->hwq) != sw_prod) + if (!*budget && rq->swq_last != start) /* Out of budget */ rc = -EAGAIN; @@ -2125,7 +2217,7 @@ void bnxt_qplib_mark_qp_error(void *qp_handle) * CQE is track from sw_cq_cons to max_element but valid only if VALID=1 */ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, - u32 cq_cons, u32 sw_sq_cons, u32 cqe_sq_cons) + u32 cq_cons, u32 swq_last, u32 cqe_sq_cons) { u32 peek_sw_cq_cons, peek_raw_cq_cons, peek_sq_cons_idx; struct bnxt_qplib_q *sq = &qp->sq; @@ -2138,7 +2230,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, /* Normal mode */ /* Check for the psn_search marking before completing */ - swq = &sq->swq[sw_sq_cons]; + swq = &sq->swq[swq_last]; if (swq->psn_search && le32_to_cpu(swq->psn_search->flags_next_psn) & 0x80000000) { /* Unmark */ @@ -2147,7 +2239,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, & ~0x80000000); dev_dbg(&cq->hwq.pdev->dev, "FP: Process Req cq_cons=0x%x qp=0x%x sq cons sw=0x%x cqe=0x%x marked!\n", - cq_cons, qp->id, sw_sq_cons, cqe_sq_cons); + cq_cons, qp->id, swq_last, cqe_sq_cons); sq->condition = true; sq->send_phantom = true; @@ -2184,9 +2276,10 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, le64_to_cpu (peek_req_hwcqe->qp_handle)); peek_sq = &peek_qp->sq; - peek_sq_cons_idx = HWQ_CMP(le16_to_cpu( - peek_req_hwcqe->sq_cons_idx) - 1 - , &sq->hwq); + peek_sq_cons_idx = + ((le16_to_cpu( + peek_req_hwcqe->sq_cons_idx) + - 1) % sq->max_wqe); /* If the hwcqe's sq's wr_id matches */ if (peek_sq == sq && sq->swq[peek_sq_cons_idx].wr_id == @@ -2214,7 +2307,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, } dev_err(&cq->hwq.pdev->dev, "Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x\n", - cq_cons, qp->id, sw_sq_cons, cqe_sq_cons); + cq_cons, qp->id, swq_last, cqe_sq_cons); rc = -EINVAL; } out: @@ -2226,11 +2319,11 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe **pcqe, int *budget, u32 cq_cons, struct bnxt_qplib_qp **lib_qp) { - u32 sw_sq_cons, cqe_sq_cons; struct bnxt_qplib_swq *swq; struct bnxt_qplib_cqe *cqe; struct bnxt_qplib_qp *qp; struct bnxt_qplib_q *sq; + u32 cqe_sq_cons; int rc = 0; qp = (struct bnxt_qplib_qp *)((unsigned long) @@ -2242,14 +2335,7 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, } sq = &qp->sq; - cqe_sq_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq); - if (cqe_sq_cons > sq->hwq.max_elements) { - dev_err(&cq->hwq.pdev->dev, - "FP: CQ Process req reported sq_cons_idx 0x%x which exceeded max 0x%x\n", - cqe_sq_cons, sq->hwq.max_elements); - return -EINVAL; - } - + cqe_sq_cons = le16_to_cpu(hwcqe->sq_cons_idx) % sq->max_wqe; if (qp->sq.flushed) { dev_dbg(&cq->hwq.pdev->dev, "%s: QP in Flush QP = %p\n", __func__, qp); @@ -2261,12 +2347,11 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, */ cqe = *pcqe; while (*budget) { - sw_sq_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq); - if (sw_sq_cons == cqe_sq_cons) + if (sq->swq_last == cqe_sq_cons) /* Done */ break; - swq = &sq->swq[sw_sq_cons]; + swq = &sq->swq[sq->swq_last]; memset(cqe, 0, sizeof(*cqe)); cqe->opcode = CQ_BASE_CQE_TYPE_REQ; cqe->qp_handle = (u64)(unsigned long)qp; @@ -2280,12 +2365,12 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, * of the request being signaled or not, it must complete with * the hwcqe error status */ - if (HWQ_CMP((sw_sq_cons + 1), &sq->hwq) == cqe_sq_cons && + if (swq->next_idx == cqe_sq_cons && hwcqe->status != CQ_REQ_STATUS_OK) { cqe->status = hwcqe->status; dev_err(&cq->hwq.pdev->dev, "FP: CQ Processed Req wr_id[%d] = 0x%llx with status 0x%x\n", - sw_sq_cons, cqe->wr_id, cqe->status); + sq->swq_last, cqe->wr_id, cqe->status); cqe++; (*budget)--; bnxt_qplib_mark_qp_error(qp); @@ -2293,7 +2378,7 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, bnxt_qplib_add_flush_qp(qp); } else { /* Before we complete, do WA 9060 */ - if (do_wa9060(qp, cq, cq_cons, sw_sq_cons, + if (do_wa9060(qp, cq, cq_cons, sq->swq_last, cqe_sq_cons)) { *lib_qp = qp; goto out; @@ -2305,13 +2390,14 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, } } skip: - sq->hwq.cons++; + bnxt_qplib_hwq_incr_cons(&sq->hwq, swq->slots); + sq->swq_last = swq->next_idx; if (sq->single) break; } out: *pcqe = cqe; - if (HWQ_CMP(sq->hwq.cons, &sq->hwq) != cqe_sq_cons) { + if (sq->swq_last != cqe_sq_cons) { /* Out of budget */ rc = -EAGAIN; goto done; @@ -2386,17 +2472,23 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, (*budget)--; *pcqe = cqe; } else { + struct bnxt_qplib_swq *swq; + rq = &qp->rq; - if (wr_id_idx >= rq->hwq.max_elements) { + if (wr_id_idx > (rq->max_wqe - 1)) { dev_err(&cq->hwq.pdev->dev, "FP: CQ Process RC wr_id idx 0x%x exceeded RQ max 0x%x\n", - wr_id_idx, rq->hwq.max_elements); + wr_id_idx, rq->max_wqe); return -EINVAL; } - cqe->wr_id = rq->swq[wr_id_idx].wr_id; + if (wr_id_idx != rq->swq_last) + return -EINVAL; + swq = &rq->swq[rq->swq_last]; + cqe->wr_id = swq->wr_id; cqe++; (*budget)--; - rq->hwq.cons++; + bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots); + rq->swq_last = swq->next_idx; *pcqe = cqe; if (hwcqe->status != CQ_RES_RC_STATUS_OK) { @@ -2467,18 +2559,24 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, (*budget)--; *pcqe = cqe; } else { + struct bnxt_qplib_swq *swq; + rq = &qp->rq; - if (wr_id_idx >= rq->hwq.max_elements) { + if (wr_id_idx > (rq->max_wqe - 1)) { dev_err(&cq->hwq.pdev->dev, "FP: CQ Process UD wr_id idx 0x%x exceeded RQ max 0x%x\n", - wr_id_idx, rq->hwq.max_elements); + wr_id_idx, rq->max_wqe); return -EINVAL; } - cqe->wr_id = rq->swq[wr_id_idx].wr_id; + if (rq->swq_last != wr_id_idx) + return -EINVAL; + swq = &rq->swq[rq->swq_last]; + cqe->wr_id = swq->wr_id; cqe++; (*budget)--; - rq->hwq.cons++; + bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots); + rq->swq_last = swq->next_idx; *pcqe = cqe; if (hwcqe->status != CQ_RES_RC_STATUS_OK) { @@ -2569,17 +2667,23 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, (*budget)--; *pcqe = cqe; } else { + struct bnxt_qplib_swq *swq; + rq = &qp->rq; - if (wr_id_idx >= rq->hwq.max_elements) { + if (wr_id_idx > (rq->max_wqe - 1)) { dev_err(&cq->hwq.pdev->dev, "FP: CQ Process Raw/QP1 RQ wr_id idx 0x%x exceeded RQ max 0x%x\n", - wr_id_idx, rq->hwq.max_elements); + wr_id_idx, rq->max_wqe); return -EINVAL; } - cqe->wr_id = rq->swq[wr_id_idx].wr_id; + if (rq->swq_last != wr_id_idx) + return -EINVAL; + swq = &rq->swq[rq->swq_last]; + cqe->wr_id = swq->wr_id; cqe++; (*budget)--; - rq->hwq.cons++; + bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots); + rq->swq_last = swq->next_idx; *pcqe = cqe; if (hwcqe->status != CQ_RES_RC_STATUS_OK) { @@ -2601,7 +2705,7 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, struct bnxt_qplib_qp *qp; struct bnxt_qplib_q *sq, *rq; struct bnxt_qplib_cqe *cqe; - u32 sw_cons = 0, cqe_cons; + u32 swq_last = 0, cqe_cons; int rc = 0; /* Check the Status */ @@ -2627,13 +2731,7 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, cqe_cons = le16_to_cpu(hwcqe->sq_cons_idx); if (cqe_cons == 0xFFFF) goto do_rq; - - if (cqe_cons > sq->hwq.max_elements) { - dev_err(&cq->hwq.pdev->dev, - "FP: CQ Process terminal reported sq_cons_idx 0x%x which exceeded max 0x%x\n", - cqe_cons, sq->hwq.max_elements); - goto do_rq; - } + cqe_cons %= sq->max_wqe; if (qp->sq.flushed) { dev_dbg(&cq->hwq.pdev->dev, @@ -2647,24 +2745,25 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, */ cqe = *pcqe; while (*budget) { - sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq); - if (sw_cons == cqe_cons) + swq_last = sq->swq_last; + if (swq_last == cqe_cons) break; - if (sq->swq[sw_cons].flags & SQ_SEND_FLAGS_SIGNAL_COMP) { + if (sq->swq[swq_last].flags & SQ_SEND_FLAGS_SIGNAL_COMP) { memset(cqe, 0, sizeof(*cqe)); cqe->status = CQ_REQ_STATUS_OK; cqe->opcode = CQ_BASE_CQE_TYPE_REQ; cqe->qp_handle = (u64)(unsigned long)qp; cqe->src_qp = qp->id; - cqe->wr_id = sq->swq[sw_cons].wr_id; - cqe->type = sq->swq[sw_cons].type; + cqe->wr_id = sq->swq[swq_last].wr_id; + cqe->type = sq->swq[swq_last].type; cqe++; (*budget)--; } - sq->hwq.cons++; + bnxt_qplib_hwq_incr_cons(&sq->hwq, sq->swq[swq_last].slots); + sq->swq_last = sq->swq[swq_last].next_idx; } *pcqe = cqe; - if (!(*budget) && sw_cons != cqe_cons) { + if (!(*budget) && swq_last != cqe_cons) { /* Out of budget */ rc = -EAGAIN; goto sq_done; @@ -2676,10 +2775,10 @@ do_rq: cqe_cons = le16_to_cpu(hwcqe->rq_cons_idx); if (cqe_cons == 0xFFFF) { goto done; - } else if (cqe_cons > rq->hwq.max_elements) { + } else if (cqe_cons > rq->max_wqe - 1) { dev_err(&cq->hwq.pdev->dev, "FP: CQ Processed terminal reported rq_cons_idx 0x%x exceeds max 0x%x\n", - cqe_cons, rq->hwq.max_elements); + cqe_cons, rq->max_wqe); goto done; } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 568ca390322c..f50784405e27 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -39,6 +39,51 @@ #ifndef __BNXT_QPLIB_FP_H__ #define __BNXT_QPLIB_FP_H__ +/* Few helper structures temporarily defined here + * should get rid of these when roce_hsi.h is updated + * in original code base + */ +struct sq_ud_ext_hdr { + __le32 dst_qp; + __le32 avid; + __le64 rsvd; +}; + +struct sq_raw_ext_hdr { + __le32 cfa_meta; + __le32 rsvd0; + __le64 rsvd1; +}; + +struct sq_rdma_ext_hdr { + __le64 remote_va; + __le32 remote_key; + __le32 rsvd; +}; + +struct sq_atomic_ext_hdr { + __le64 swap_data; + __le64 cmp_data; +}; + +struct sq_fr_pmr_ext_hdr { + __le64 pblptr; + __le64 va; +}; + +struct sq_bind_ext_hdr { + __le64 va; + __le32 length_lo; + __le32 length_hi; +}; + +struct rq_ext_hdr { + __le64 rsvd1; + __le64 rsvd2; +}; + +/* Helper structures end */ + struct bnxt_qplib_srq { struct bnxt_qplib_pd *pd; struct bnxt_qplib_dpi *dpi; @@ -74,6 +119,8 @@ struct bnxt_qplib_swq { u8 flags; u32 start_psn; u32 next_psn; + u32 slot_idx; + u8 slots; struct sq_psn_search *psn_search; struct sq_psn_search_ext *psn_ext; }; @@ -213,6 +260,8 @@ struct bnxt_qplib_q { u32 phantom_cqe_cnt; u32 next_cq_cons; bool flushed; + u32 swq_start; + u32 swq_last; }; struct bnxt_qplib_qp { @@ -224,9 +273,10 @@ struct bnxt_qplib_qp { u32 id; u8 type; u8 sig_type; - u32 modify_flags; + u8 wqe_mode; u8 state; u8 cur_qp_state; + u64 modify_flags; u32 max_inline_data; u32 mtu; u8 path_mtu; @@ -300,11 +350,18 @@ struct bnxt_qplib_qp { (!!((hdr)->cqe_type_toggle & CQ_BASE_TOGGLE) == \ !((raw_cons) & (cp_bit))) -static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *qplib_q) +static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *que, + u8 slots) { - return HWQ_CMP((qplib_q->hwq.prod + qplib_q->q_full_delta), - &qplib_q->hwq) == HWQ_CMP(qplib_q->hwq.cons, - &qplib_q->hwq); + struct bnxt_qplib_hwq *hwq; + int avail; + + hwq = &que->hwq; + /* False full is possible, retrying post-send makes sense */ + avail = hwq->cons - hwq->prod; + if (hwq->cons <= hwq->prod) + avail += hwq->depth; + return avail <= slots; } struct bnxt_qplib_cqe { @@ -489,4 +546,64 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, int num_cqes); void bnxt_qplib_flush_cqn_wq(struct bnxt_qplib_qp *qp); + +static inline void *bnxt_qplib_get_swqe(struct bnxt_qplib_q *que, u32 *swq_idx) +{ + u32 idx; + + idx = que->swq_start; + if (swq_idx) + *swq_idx = idx; + return &que->swq[idx]; +} + +static inline void bnxt_qplib_swq_mod_start(struct bnxt_qplib_q *que, u32 idx) +{ + que->swq_start = que->swq[idx].next_idx; +} + +static inline u32 bnxt_qplib_get_depth(struct bnxt_qplib_q *que) +{ + return (que->wqe_size * que->max_wqe) / sizeof(struct sq_sge); +} + +static inline u32 bnxt_qplib_set_sq_size(struct bnxt_qplib_q *que, u8 wqe_mode) +{ + return (wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ? + que->max_wqe : bnxt_qplib_get_depth(que); +} + +static inline u32 bnxt_qplib_set_sq_max_slot(u8 wqe_mode) +{ + return (wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ? + sizeof(struct sq_send) / sizeof(struct sq_sge) : 1; +} + +static inline u32 bnxt_qplib_set_rq_max_slot(u32 wqe_size) +{ + return (wqe_size / sizeof(struct sq_sge)); +} + +static inline u16 __xlate_qfd(u16 delta, u16 wqe_bytes) +{ + /* For Cu/Wh delta = 128, stride = 16, wqe_bytes = 128 + * For Gen-p5 B/C mode delta = 0, stride = 16, wqe_bytes = 128. + * For Gen-p5 delta = 0, stride = 16, 32 <= wqe_bytes <= 512. + * when 8916 is disabled. + */ + return (delta * wqe_bytes) / sizeof(struct sq_sge); +} + +static inline u16 bnxt_qplib_calc_ilsize(struct bnxt_qplib_swqe *wqe, u16 max) +{ + u16 size = 0; + int indx; + + for (indx = 0; indx < wqe->num_sge; indx++) + size += wqe->sg_list[indx].size; + if (size > max) + size = max; + + return size; +} #endif /* __BNXT_QPLIB_FP_H__ */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index c29cbd3a2d7b..9da470d1e4a3 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -41,6 +41,28 @@ extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero; +#define CHIP_NUM_57508 0x1750 +#define CHIP_NUM_57504 0x1751 +#define CHIP_NUM_57502 0x1752 + +enum bnxt_qplib_wqe_mode { + BNXT_QPLIB_WQE_MODE_STATIC = 0x00, + BNXT_QPLIB_WQE_MODE_VARIABLE = 0x01, + BNXT_QPLIB_WQE_MODE_INVALID = 0x02 +}; + +struct bnxt_qplib_drv_modes { + u8 wqe_mode; + /* Other modes to follow here */ +}; + +struct bnxt_qplib_chip_ctx { + u16 chip_num; + u8 chip_rev; + u8 chip_metal; + struct bnxt_qplib_drv_modes modes; +}; + #define PTR_CNT_PER_PG (PAGE_SIZE / sizeof(void *)) #define PTR_MAX_IDX_PER_PG (PTR_CNT_PER_PG - 1) #define PTR_PG(x) (((x) & ~PTR_MAX_IDX_PER_PG) / PTR_CNT_PER_PG) @@ -141,6 +163,9 @@ struct bnxt_qplib_hwq { u32 cons; /* raw */ u8 cp_bit; u8 is_user; + u64 *pad_pg; + u32 pad_stride; + u32 pad_pgofft; }; struct bnxt_qplib_db_info { @@ -148,6 +173,7 @@ struct bnxt_qplib_db_info { void __iomem *priv_db; struct bnxt_qplib_hwq *hwq; u32 xid; + u32 max_slot; }; /* Tables */ @@ -230,16 +256,6 @@ struct bnxt_qplib_ctx { u64 hwrm_intf_ver; }; -struct bnxt_qplib_chip_ctx { - u16 chip_num; - u8 chip_rev; - u8 chip_metal; -}; - -#define CHIP_NUM_57508 0x1750 -#define CHIP_NUM_57504 0x1751 -#define CHIP_NUM_57502 0x1752 - struct bnxt_qplib_res { struct pci_dev *pdev; struct bnxt_qplib_chip_ctx *cctx; @@ -317,6 +333,14 @@ static inline void *bnxt_qplib_get_qe(struct bnxt_qplib_hwq *hwq, return (void *)(hwq->pbl_ptr[pg_num] + hwq->element_size * pg_idx); } +static inline void *bnxt_qplib_get_prod_qe(struct bnxt_qplib_hwq *hwq, u32 idx) +{ + idx += hwq->prod; + if (idx >= hwq->depth) + idx -= hwq->depth; + return bnxt_qplib_get_qe(hwq, idx, NULL); +} + #define to_bnxt_qplib(ptr, type, member) \ container_of(ptr, type, member) @@ -351,6 +375,17 @@ int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res, struct bnxt_qplib_ctx *ctx, bool virt_fn, bool is_p5); +static inline void bnxt_qplib_hwq_incr_prod(struct bnxt_qplib_hwq *hwq, u32 cnt) +{ + hwq->prod = (hwq->prod + cnt) % hwq->depth; +} + +static inline void bnxt_qplib_hwq_incr_cons(struct bnxt_qplib_hwq *hwq, + u32 cnt) +{ + hwq->cons = (hwq->cons + cnt) % hwq->depth; +} + static inline void bnxt_qplib_ring_db32(struct bnxt_qplib_db_info *info, bool arm) { @@ -383,8 +418,7 @@ static inline void bnxt_qplib_ring_prod_db(struct bnxt_qplib_db_info *info, key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | type; key <<= 32; - key |= (info->hwq->prod & (info->hwq->max_elements - 1)) & - DBC_DBC_INDEX_MASK; + key |= ((info->hwq->prod / info->max_slot)) & DBC_DBC_INDEX_MASK; writeq(key, info->db); } diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h index 6f00f07420b7..3e40e0d76efd 100644 --- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h +++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h @@ -1126,6 +1126,7 @@ struct cmdq_create_qp { #define CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION 0x2UL #define CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE 0x4UL #define CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED 0x8UL + #define CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED 0x10UL u8 type; #define CMDQ_CREATE_QP_TYPE_RC 0x2UL #define CMDQ_CREATE_QP_TYPE_UD 0x4UL diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index e8e11bd95e42..2b2b009b371a 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -980,7 +980,7 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); void c4iw_qp_add_ref(struct ib_qp *qp); void c4iw_qp_rem_ref(struct ib_qp *qp); struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); + u32 max_num_sg); int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int c4iw_dealloc_mw(struct ib_mw *mw); @@ -1053,8 +1053,9 @@ int c4iw_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp); -typedef int c4iw_restrack_func(struct sk_buff *msg, - struct rdma_restrack_entry *res); -extern c4iw_restrack_func *c4iw_restrack_funcs[RDMA_RESTRACK_MAX]; +int c4iw_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr); +int c4iw_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ibcq); +int c4iw_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ibqp); +int c4iw_fill_res_cm_id_entry(struct sk_buff *msg, struct rdma_cm_id *cm_id); #endif diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 962dc97a8ff2..73936c3341b7 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -399,7 +399,6 @@ static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag) mmid = stag >> 8; mhp->ibmr.rkey = mhp->ibmr.lkey = stag; mhp->ibmr.length = mhp->attr.len; - mhp->ibmr.iova = mhp->attr.va_fbo; mhp->ibmr.page_size = 1U << (mhp->attr.page_size + 12); pr_debug("mmid 0x%x mhp %p\n", mmid, mhp); return xa_insert_irq(&mhp->rhp->mrs, mmid, mhp, GFP_KERNEL); @@ -691,7 +690,7 @@ int c4iw_dealloc_mw(struct ib_mw *mw) } struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { struct c4iw_dev *rhp; struct c4iw_pd *php; diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index ba83d942997c..6c579d2d3997 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -236,14 +236,6 @@ static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_udata *udata) return 0; } -static int c4iw_query_pkey(struct ib_device *ibdev, u8 port, u16 index, - u16 *pkey) -{ - pr_debug("ibdev %p\n", ibdev); - *pkey = 0; - return 0; -} - static int c4iw_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) { @@ -317,7 +309,6 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port, IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; props->gid_tbl_len = 1; - props->pkey_tbl_len = 1; props->max_msg_sz = -1; return ret; @@ -439,7 +430,6 @@ static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num, if (err) return err; - immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; return 0; @@ -458,13 +448,6 @@ static void get_dev_fw_str(struct ib_device *dev, char *str) FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers)); } -static int fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res) -{ - return (res->type < ARRAY_SIZE(c4iw_restrack_funcs) && - c4iw_restrack_funcs[res->type]) ? - c4iw_restrack_funcs[res->type](msg, res) : 0; -} - static const struct ib_device_ops c4iw_dev_ops = { .owner = THIS_MODULE, .driver_id = RDMA_DRIVER_CXGB4, @@ -485,7 +468,9 @@ static const struct ib_device_ops c4iw_dev_ops = { .destroy_cq = c4iw_destroy_cq, .destroy_qp = c4iw_destroy_qp, .destroy_srq = c4iw_destroy_srq, - .fill_res_entry = fill_res_entry, + .fill_res_cq_entry = c4iw_fill_res_cq_entry, + .fill_res_cm_id_entry = c4iw_fill_res_cm_id_entry, + .fill_res_mr_entry = c4iw_fill_res_mr_entry, .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = c4iw_get_dma_mr, .get_hw_stats = c4iw_get_mib, @@ -508,7 +493,6 @@ static const struct ib_device_ops c4iw_dev_ops = { .post_srq_recv = c4iw_post_srq_recv, .query_device = c4iw_query_device, .query_gid = c4iw_query_gid, - .query_pkey = c4iw_query_pkey, .query_port = c4iw_query_port, .query_qp = c4iw_ib_query_qp, .reg_user_mr = c4iw_reg_user_mr, diff --git a/drivers/infiniband/hw/cxgb4/restrack.c b/drivers/infiniband/hw/cxgb4/restrack.c index f82d46ed969d..b32e6516d65f 100644 --- a/drivers/infiniband/hw/cxgb4/restrack.c +++ b/drivers/infiniband/hw/cxgb4/restrack.c @@ -134,10 +134,8 @@ err: return -EMSGSIZE; } -static int fill_res_qp_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) +int c4iw_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ibqp) { - struct ib_qp *ibqp = container_of(res, struct ib_qp, res); struct t4_swsqe *fsp = NULL, *lsp = NULL; struct c4iw_qp *qhp = to_c4iw_qp(ibqp); u16 first_sq_idx = 0, last_sq_idx = 0; @@ -195,10 +193,9 @@ union union_ep { struct c4iw_ep ep; }; -static int fill_res_ep_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) +int c4iw_fill_res_cm_id_entry(struct sk_buff *msg, + struct rdma_cm_id *cm_id) { - struct rdma_cm_id *cm_id = rdma_res_to_id(res); struct nlattr *table_attr; struct c4iw_ep_common *epcp; struct c4iw_listen_ep *listen_ep = NULL; @@ -372,10 +369,8 @@ err: return -EMSGSIZE; } -static int fill_res_cq_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) +int c4iw_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ibcq) { - struct ib_cq *ibcq = container_of(res, struct ib_cq, res); struct c4iw_cq *chp = to_c4iw_cq(ibcq); struct nlattr *table_attr; struct t4_cqe hwcqes[2]; @@ -433,10 +428,8 @@ err: return -EMSGSIZE; } -static int fill_res_mr_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) +int c4iw_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr) { - struct ib_mr *ibmr = container_of(res, struct ib_mr, res); struct c4iw_mr *mhp = to_c4iw_mr(ibmr); struct c4iw_dev *dev = mhp->rhp; u32 stag = mhp->attr.stag; @@ -492,10 +485,3 @@ err_cancel_table: err: return -EMSGSIZE; } - -c4iw_restrack_func *c4iw_restrack_funcs[RDMA_RESTRACK_MAX] = { - [RDMA_RESTRACK_QP] = fill_res_qp_entry, - [RDMA_RESTRACK_CM_ID] = fill_res_ep_entry, - [RDMA_RESTRACK_CQ] = fill_res_cq_entry, - [RDMA_RESTRACK_MR] = fill_res_mr_entry, -}; diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index bef2bd291054..5484b08bbc5d 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -606,8 +606,8 @@ struct efa_admin_feature_queue_attr_desc { /* Number of sub-CQs to be created for each CQ */ u16 sub_cqs_per_cq; - /* MBZ */ - u16 reserved; + /* Minimum number of WQEs per SQ */ + u16 min_sq_depth; /* Maximum number of SGEs (buffers) allowed for a single send WQE */ u16 max_wr_send_sges; @@ -632,6 +632,17 @@ struct efa_admin_feature_queue_attr_desc { /* Maximum number of SGEs for a single RDMA read WQE */ u16 max_wr_rdma_sges; + + /* + * Maximum number of bytes that can be written to SQ between two + * consecutive doorbells (in units of 64B). Driver must ensure that only + * complete WQEs are written to queue before issuing a doorbell. + * Examples: max_tx_batch=16 and WQE size = 64B, means up to 16 WQEs can + * be written to SQ between two consecutive doorbells. max_tx_batch=11 + * and WQE size = 128B, means up to 5 WQEs can be written to SQ between + * two consecutive doorbells. Zero means unlimited. + */ + u16 max_tx_batch; }; struct efa_admin_feature_aenq_desc { diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index fabd8df2e78f..6ac23627f65a 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -480,6 +480,8 @@ int efa_com_get_device_attr(struct efa_com_dev *edev, result->max_llq_size = resp.u.queue_attr.max_llq_size; result->sub_cqs_per_cq = resp.u.queue_attr.sub_cqs_per_cq; result->max_wr_rdma_sge = resp.u.queue_attr.max_wr_rdma_sges; + result->max_tx_batch = resp.u.queue_attr.max_tx_batch; + result->min_sq_depth = resp.u.queue_attr.min_sq_depth; err = efa_com_get_feature(edev, &resp, EFA_ADMIN_NETWORK_ATTR); if (err) { diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index 41ce4a476ee6..190bac23f585 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -127,6 +127,8 @@ struct efa_com_get_device_attr_result { u16 max_sq_sge; u16 max_rq_sge; u16 max_wr_rdma_sge; + u16 max_tx_batch; + u16 min_sq_depth; u8 db_bar; }; diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 82145574c928..92d701146320 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -12,10 +12,12 @@ #include "efa.h" -#define PCI_DEV_ID_EFA_VF 0xefa0 +#define PCI_DEV_ID_EFA0_VF 0xefa0 +#define PCI_DEV_ID_EFA1_VF 0xefa1 static const struct pci_device_id efa_pci_tbl[] = { - { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA_VF) }, + { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA0_VF) }, + { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA1_VF) }, { } }; diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 7dd082441333..9e201f169289 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1502,11 +1502,39 @@ static int efa_dealloc_uar(struct efa_dev *dev, u16 uarn) return efa_com_dealloc_uar(&dev->edev, ¶ms); } +#define EFA_CHECK_USER_COMP(_dev, _comp_mask, _attr, _mask, _attr_str) \ + (_attr_str = (!(_dev)->dev_attr._attr || ((_comp_mask) & (_mask))) ? \ + NULL : #_attr) + +static int efa_user_comp_handshake(const struct ib_ucontext *ibucontext, + const struct efa_ibv_alloc_ucontext_cmd *cmd) +{ + struct efa_dev *dev = to_edev(ibucontext->device); + char *attr_str; + + if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, max_tx_batch, + EFA_ALLOC_UCONTEXT_CMD_COMP_TX_BATCH, attr_str)) + goto err; + + if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, min_sq_depth, + EFA_ALLOC_UCONTEXT_CMD_COMP_MIN_SQ_WR, + attr_str)) + goto err; + + return 0; + +err: + ibdev_dbg(&dev->ibdev, "Userspace handshake failed for %s attribute\n", + attr_str); + return -EOPNOTSUPP; +} + int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) { struct efa_ucontext *ucontext = to_eucontext(ibucontext); struct efa_dev *dev = to_edev(ibucontext->device); struct efa_ibv_alloc_ucontext_resp resp = {}; + struct efa_ibv_alloc_ucontext_cmd cmd = {}; struct efa_com_alloc_uar_result result; int err; @@ -1515,6 +1543,18 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) * we will ack input fields in our response. */ + err = ib_copy_from_udata(&cmd, udata, + min(sizeof(cmd), udata->inlen)); + if (err) { + ibdev_dbg(&dev->ibdev, + "Cannot copy udata for alloc_ucontext\n"); + goto err_out; + } + + err = efa_user_comp_handshake(ibucontext, &cmd); + if (err) + goto err_out; + err = efa_com_alloc_uar(&dev->edev, &result); if (err) goto err_out; @@ -1526,6 +1566,8 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) resp.sub_cqs_per_cq = dev->dev_attr.sub_cqs_per_cq; resp.inline_buf_size = dev->dev_attr.inline_buf_size; resp.max_llq_size = dev->dev_attr.max_llq_size; + resp.max_tx_batch = dev->dev_attr.max_tx_batch; + resp.min_sq_wr = dev->dev_attr.min_sq_depth; if (udata && udata->outlen) { err = ib_copy_to_udata(udata, &resp, diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 15f9c635f292..7eaf99538216 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -7317,11 +7317,11 @@ static u16 link_width_to_bits(struct hfi1_devdata *dd, u16 width) case 1: return OPA_LINK_WIDTH_1X; case 2: return OPA_LINK_WIDTH_2X; case 3: return OPA_LINK_WIDTH_3X; + case 4: return OPA_LINK_WIDTH_4X; default: dd_dev_info(dd, "%s: invalid width %d, using 4\n", __func__, width); - /* fall through */ - case 4: return OPA_LINK_WIDTH_4X; + return OPA_LINK_WIDTH_4X; } } @@ -7376,12 +7376,13 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width, case 0: dd->pport[0].link_speed_active = OPA_LINK_SPEED_12_5G; break; + case 1: + dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G; + break; default: dd_dev_err(dd, "%s: unexpected max rate %d, using 25Gb\n", __func__, (int)max_rate); - /* fall through */ - case 1: dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G; break; } @@ -12878,11 +12879,6 @@ bail: static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate) { switch (chip_lstate) { - default: - dd_dev_err(dd, - "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n", - chip_lstate); - /* fall through */ case LSTATE_DOWN: return IB_PORT_DOWN; case LSTATE_INIT: @@ -12891,6 +12887,11 @@ static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate) return IB_PORT_ARMED; case LSTATE_ACTIVE: return IB_PORT_ACTIVE; + default: + dd_dev_err(dd, + "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n", + chip_lstate); + return IB_PORT_DOWN; } } @@ -12898,10 +12899,6 @@ u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate) { /* look at the HFI meta-states only */ switch (chip_pstate & 0xf0) { - default: - dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n", - chip_pstate); - /* fall through */ case PLS_DISABLED: return IB_PORTPHYSSTATE_DISABLED; case PLS_OFFLINE: @@ -12914,6 +12911,10 @@ u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate) return IB_PORTPHYSSTATE_LINKUP; case PLS_PHYTEST: return IB_PORTPHYSSTATE_PHY_TEST; + default: + dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n", + chip_pstate); + return IB_PORTPHYSSTATE_DISABLED; } } diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 2b57ba70ddd6..0e83d4b61e46 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -1868,11 +1868,8 @@ int parse_platform_config(struct hfi1_devdata *dd) 2; break; case PLATFORM_CONFIG_RX_PRESET_TABLE: - /* fall through */ case PLATFORM_CONFIG_TX_PRESET_TABLE: - /* fall through */ case PLATFORM_CONFIG_QSFP_ATTEN_TABLE: - /* fall through */ case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE: pcfgcache->config_tables[table_type].num_table = table_length_dwords; @@ -1890,15 +1887,10 @@ int parse_platform_config(struct hfi1_devdata *dd) /* metadata table */ switch (table_type) { case PLATFORM_CONFIG_SYSTEM_TABLE: - /* fall through */ case PLATFORM_CONFIG_PORT_TABLE: - /* fall through */ case PLATFORM_CONFIG_RX_PRESET_TABLE: - /* fall through */ case PLATFORM_CONFIG_TX_PRESET_TABLE: - /* fall through */ case PLATFORM_CONFIG_QSFP_ATTEN_TABLE: - /* fall through */ case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE: break; default: @@ -2027,15 +2019,10 @@ static int get_platform_fw_field_metadata(struct hfi1_devdata *dd, int table, switch (table) { case PLATFORM_CONFIG_SYSTEM_TABLE: - /* fall through */ case PLATFORM_CONFIG_PORT_TABLE: - /* fall through */ case PLATFORM_CONFIG_RX_PRESET_TABLE: - /* fall through */ case PLATFORM_CONFIG_TX_PRESET_TABLE: - /* fall through */ case PLATFORM_CONFIG_QSFP_ATTEN_TABLE: - /* fall through */ case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE: if (field && field < platform_config_table_limits[table]) src_ptr = @@ -2138,11 +2125,8 @@ int get_platform_config_field(struct hfi1_devdata *dd, pcfgcache->config_tables[table_type].table; break; case PLATFORM_CONFIG_RX_PRESET_TABLE: - /* fall through */ case PLATFORM_CONFIG_TX_PRESET_TABLE: - /* fall through */ case PLATFORM_CONFIG_QSFP_ATTEN_TABLE: - /* fall through */ case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE: src_ptr = pcfgcache->config_tables[table_type].table; diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 7073f237a949..3222e3acb79c 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -721,7 +721,7 @@ static int check_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad, /* Bad mkey not a violation below level 2 */ if (ibp->rvp.mkeyprot < 2) break; - /* fall through */ + fallthrough; case IB_MGMT_METHOD_SET: case IB_MGMT_METHOD_TRAP_REPRESS: if (ibp->rvp.mkey_violations != 0xFFFF) @@ -1272,7 +1272,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, case IB_PORT_NOP: if (phys_state == IB_PORTPHYSSTATE_NOP) break; - /* FALLTHROUGH */ + fallthrough; case IB_PORT_DOWN: if (phys_state == IB_PORTPHYSSTATE_NOP) { link_state = HLS_DN_DOWNDEF; @@ -2300,7 +2300,6 @@ static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data, * can be changed from the default values */ case OPA_VLARB_PREEMPT_ELEMENTS: - /* FALLTHROUGH */ case OPA_VLARB_PREEMPT_MATRIX: smp->status |= IB_SMP_UNSUP_METH_ATTR; break; @@ -4170,7 +4169,7 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; if (ibp->rvp.port_cap_flags & IB_PORT_SM) return IB_MAD_RESULT_SUCCESS; - /* FALLTHROUGH */ + fallthrough; default: smp->status |= IB_SMP_UNSUP_METH_ATTR; ret = reply((struct ib_mad_hdr *)smp); @@ -4240,7 +4239,7 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; if (ibp->rvp.port_cap_flags & IB_PORT_SM) return IB_MAD_RESULT_SUCCESS; - /* FALLTHROUGH */ + fallthrough; default: smp->status |= IB_SMP_UNSUP_METH_ATTR; ret = reply((struct ib_mad_hdr *)smp); diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 1a6268d61977..18d32f053d26 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -306,7 +306,7 @@ int pcie_speeds(struct hfi1_devdata *dd) ret = pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &linkcap); if (ret) { dd_dev_err(dd, "Unable to read from PCI config\n"); - return ret; + return pcibios_err_to_errno(ret); } if ((linkcap & PCI_EXP_LNKCAP_SLS) != PCI_EXP_LNKCAP_SLS_8_0GB) { @@ -334,10 +334,14 @@ int pcie_speeds(struct hfi1_devdata *dd) return 0; } -/* restore command and BARs after a reset has wiped them out */ +/** + * Restore command and BARs after a reset has wiped them out + * + * Returns 0 on success, otherwise a negative error value + */ int restore_pci_variables(struct hfi1_devdata *dd) { - int ret = 0; + int ret; ret = pci_write_config_word(dd->pcidev, PCI_COMMAND, dd->pci_command); if (ret) @@ -386,13 +390,17 @@ int restore_pci_variables(struct hfi1_devdata *dd) error: dd_dev_err(dd, "Unable to write to PCI config\n"); - return ret; + return pcibios_err_to_errno(ret); } -/* Save BARs and command to rewrite after device reset */ +/** + * Save BARs and command to rewrite after device reset + * + * Returns 0 on success, otherwise a negative error value + */ int save_pci_variables(struct hfi1_devdata *dd) { - int ret = 0; + int ret; ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, &dd->pcibar0); @@ -441,7 +449,7 @@ int save_pci_variables(struct hfi1_devdata *dd) error: dd_dev_err(dd, "Unable to read from PCI config\n"); - return ret; + return pcibios_err_to_errno(ret); } /* diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 79126b2b14ab..ff864f6f0266 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -86,7 +86,7 @@ void pio_send_control(struct hfi1_devdata *dd, int op) switch (op) { case PSC_GLOBAL_ENABLE: reg |= SEND_CTRL_SEND_ENABLE_SMASK; - /* Fall through */ + fallthrough; case PSC_DATA_VL_ENABLE: mask = 0; for (i = 0; i < ARRAY_SIZE(dd->vld); i++) diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c index 03024cec78dd..b12e4665c9ab 100644 --- a/drivers/infiniband/hw/hfi1/pio_copy.c +++ b/drivers/infiniband/hw/hfi1/pio_copy.c @@ -191,22 +191,22 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n) switch (n) { case 7: *dest++ = *src++; - /* fall through */ + fallthrough; case 6: *dest++ = *src++; - /* fall through */ + fallthrough; case 5: *dest++ = *src++; - /* fall through */ + fallthrough; case 4: *dest++ = *src++; - /* fall through */ + fallthrough; case 3: *dest++ = *src++; - /* fall through */ + fallthrough; case 2: *dest++ = *src++; - /* fall through */ + fallthrough; case 1: *dest++ = *src++; /* fall through */ diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index 36593f2efe26..4642d6ceb890 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -668,8 +668,8 @@ static u8 aoc_low_power_setting(struct hfi1_pportdata *ppd) /* active optical cables only */ switch ((cache[QSFP_MOD_TECH_OFFS] & 0xF0) >> 4) { - case 0x0 ... 0x9: /* fallthrough */ - case 0xC: /* fallthrough */ + case 0x0 ... 0x9: fallthrough; + case 0xC: fallthrough; case 0xE: /* active AOC */ power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]); @@ -899,8 +899,8 @@ static int tune_qsfp(struct hfi1_pportdata *ppd, *ptr_tuning_method = OPA_PASSIVE_TUNING; break; - case 0x0 ... 0x9: /* fallthrough */ - case 0xC: /* fallthrough */ + case 0x0 ... 0x9: fallthrough; + case 0xC: fallthrough; case 0xE: ret = tune_active_qsfp(ppd, ptr_tx_preset, ptr_rx_preset, ptr_total_atten); @@ -909,7 +909,7 @@ static int tune_qsfp(struct hfi1_pportdata *ppd, *ptr_tuning_method = OPA_ACTIVE_TUNING; break; - case 0xD: /* fallthrough */ + case 0xD: fallthrough; case 0xF: default: dd_dev_warn(ppd->dd, "%s: Unknown/unsupported cable\n", diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index be62284e42d9..356518e17fa6 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -312,7 +312,7 @@ int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send) switch (qp->ibqp.qp_type) { case IB_QPT_RC: hfi1_setup_tid_rdma_wqe(qp, wqe); - /* fall through */ + fallthrough; case IB_QPT_UC: if (wqe->length > 0x80000000U) return -EINVAL; diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h index b670321365d3..b0d053d12129 100644 --- a/drivers/infiniband/hw/hfi1/qp.h +++ b/drivers/infiniband/hw/hfi1/qp.h @@ -113,20 +113,6 @@ static inline void clear_ahg(struct rvt_qp *qp) } /** - * hfi1_create_qp - create a queue pair for a device - * @ibpd: the protection domain who's device we create the queue pair for - * @init_attr: the attributes of the queue pair - * @udata: user data for libibverbs.so - * - * Returns the queue pair on success, otherwise returns an errno. - * - * Called by the ib_create_qp() core verbs function. - */ -struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata); - -/** * hfi1_qp_wakeup - wake up on the indicated event * @qp: the QP * @flag: flag the qp on which the qp is stalled diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c index b5966991d647..8386c84c2d92 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.c +++ b/drivers/infiniband/hw/hfi1/qsfp.c @@ -231,7 +231,7 @@ static int i2c_bus_write(struct hfi1_devdata *dd, struct hfi1_i2c_bus *i2c, break; case 2: offset_bytes[1] = (offset >> 8) & 0xff; - /* fall through */ + fallthrough; case 1: num_msgs = 2; offset_bytes[0] = offset & 0xff; @@ -279,7 +279,7 @@ static int i2c_bus_read(struct hfi1_devdata *dd, struct hfi1_i2c_bus *bus, break; case 2: offset_bytes[1] = (offset >> 8) & 0xff; - /* fall through */ + fallthrough; case 1: num_msgs = 2; offset_bytes[0] = offset & 0xff; diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index f1734e5e9ac4..1bb5f57152d3 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -141,7 +141,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, case OP(RDMA_READ_RESPONSE_ONLY): e = &qp->s_ack_queue[qp->s_tail_ack_queue]; release_rdma_sge_mr(e); - /* FALLTHROUGH */ + fallthrough; case OP(ATOMIC_ACKNOWLEDGE): /* * We can increment the tail pointer now that the last @@ -160,7 +160,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, qp->s_acked_ack_queue = next; qp->s_tail_ack_queue = next; trace_hfi1_rsp_make_rc_ack(qp, e->psn); - /* FALLTHROUGH */ + fallthrough; case OP(SEND_ONLY): case OP(ACKNOWLEDGE): /* Check for no next entry in the queue. */ @@ -267,7 +267,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, case OP(RDMA_READ_RESPONSE_FIRST): qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); - /* FALLTHROUGH */ + fallthrough; case OP(RDMA_READ_RESPONSE_MIDDLE): ps->s_txreq->ss = &qp->s_ack_rdma_sge; ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr; @@ -881,8 +881,7 @@ no_flow_control: goto bail; } qp->s_num_rd_atomic++; - - /* FALLTHROUGH */ + fallthrough; case IB_WR_OPFN: if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; @@ -946,10 +945,10 @@ no_flow_control: * See restart_rc(). */ qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu); - /* FALLTHROUGH */ + fallthrough; case OP(SEND_FIRST): qp->s_state = OP(SEND_MIDDLE); - /* FALLTHROUGH */ + fallthrough; case OP(SEND_MIDDLE): bth2 = mask_psn(qp->s_psn++); ss = &qp->s_sge; @@ -991,10 +990,10 @@ no_flow_control: * See restart_rc(). */ qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu); - /* FALLTHROUGH */ + fallthrough; case OP(RDMA_WRITE_FIRST): qp->s_state = OP(RDMA_WRITE_MIDDLE); - /* FALLTHROUGH */ + fallthrough; case OP(RDMA_WRITE_MIDDLE): bth2 = mask_psn(qp->s_psn++); ss = &qp->s_sge; @@ -2901,7 +2900,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) if (!ret) goto rnr_nak; qp->r_rcv_len = 0; - /* FALLTHROUGH */ + fallthrough; case OP(SEND_MIDDLE): case OP(RDMA_WRITE_MIDDLE): send_middle: @@ -2941,7 +2940,7 @@ send_middle: goto no_immediate_data; if (opcode == OP(SEND_ONLY_WITH_INVALIDATE)) goto send_last_inv; - /* FALLTHROUGH -- for SEND_ONLY_WITH_IMMEDIATE */ + fallthrough; /* for SEND_ONLY_WITH_IMMEDIATE */ case OP(SEND_LAST_WITH_IMMEDIATE): send_last_imm: wc.ex.imm_data = ohdr->u.imm_data; @@ -2957,7 +2956,7 @@ send_last_inv: goto send_last; case OP(RDMA_WRITE_LAST): copy_last = rvt_is_user_qp(qp); - /* fall through */ + fallthrough; case OP(SEND_LAST): no_immediate_data: wc.wc_flags = 0; @@ -3010,7 +3009,7 @@ send_last: case OP(RDMA_WRITE_ONLY): copy_last = rvt_is_user_qp(qp); - /* fall through */ + fallthrough; case OP(RDMA_WRITE_FIRST): case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index c93ea021cf49..04575c9afd61 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -2584,7 +2584,7 @@ static void __sdma_process_event(struct sdma_engine *sde, * 7220, e.g. */ ss->go_s99_running = 1; - /* fall through -- and start dma engine */ + fallthrough; /* and start dma engine */ case sdma_event_e10_go_hw_start: /* This reference means the state machine is started */ sdma_get(&sde->state); @@ -2726,7 +2726,6 @@ static void __sdma_process_event(struct sdma_engine *sde, case sdma_event_e70_go_idle: break; case sdma_event_e85_link_down: - /* fall through */ case sdma_event_e80_hw_freeze: sdma_set_state(sde, sdma_state_s80_hw_freeze); atomic_dec(&sde->dd->sdma_unfreeze_count); @@ -3007,7 +3006,7 @@ static void __sdma_process_event(struct sdma_engine *sde, case sdma_event_e60_hw_halted: need_progress = 1; sdma_err_progress_check_schedule(sde); - /* fall through */ + fallthrough; case sdma_event_e90_sw_halted: /* * SW initiated halt does not perform engines @@ -3021,7 +3020,7 @@ static void __sdma_process_event(struct sdma_engine *sde, break; case sdma_event_e85_link_down: ss->go_s99_running = 0; - /* fall through */ + fallthrough; case sdma_event_e80_hw_freeze: sdma_set_state(sde, sdma_state_s80_hw_freeze); atomic_dec(&sde->dd->sdma_unfreeze_count); @@ -3252,7 +3251,7 @@ void _sdma_txreq_ahgadd( tx->num_desc++; tx->descs[2].qw[0] = 0; tx->descs[2].qw[1] = 0; - /* FALLTHROUGH */ + fallthrough; case SDMA_AHG_APPLY_UPDATE2: tx->num_desc++; tx->descs[1].qw[0] = 0; diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index facff133139a..9af82ff933d7 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -3227,7 +3227,7 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe) case IB_WR_RDMA_READ: if (prev->wr.opcode != IB_WR_TID_RDMA_WRITE) break; - /* fall through */ + fallthrough; case IB_WR_TID_RDMA_READ: switch (prev->wr.opcode) { case IB_WR_RDMA_READ: @@ -5067,7 +5067,7 @@ int hfi1_make_tid_rdma_pkt(struct rvt_qp *qp, struct hfi1_pkt_state *ps) if (priv->s_state == TID_OP(WRITE_REQ)) hfi1_tid_rdma_restart_req(qp, wqe, &bth2); priv->s_state = TID_OP(WRITE_DATA); - /* fall through */ + fallthrough; case TID_OP(WRITE_DATA): /* diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 0c77f18120ed..1fb918399da0 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -216,7 +216,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) case OP(SEND_FIRST): qp->s_state = OP(SEND_MIDDLE); - /* FALLTHROUGH */ + fallthrough; case OP(SEND_MIDDLE): len = qp->s_len; if (len > pmtu) { @@ -241,7 +241,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) case OP(RDMA_WRITE_FIRST): qp->s_state = OP(RDMA_WRITE_MIDDLE); - /* FALLTHROUGH */ + fallthrough; case OP(RDMA_WRITE_MIDDLE): len = qp->s_len; if (len > pmtu) { @@ -414,7 +414,7 @@ send_first: goto no_immediate_data; else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE)) goto send_last_imm; - /* FALLTHROUGH */ + fallthrough; case OP(SEND_MIDDLE): /* Check for invalid length PMTU or posted rwqe len. */ /* @@ -515,7 +515,7 @@ rdma_first: wc.ex.imm_data = ohdr->u.rc.imm_data; goto rdma_last_imm; } - /* FALLTHROUGH */ + fallthrough; case OP(RDMA_WRITE_MIDDLE): /* Check for invalid length PMTU or posted rwqe len. */ if (unlikely(tlen != (hdrsize + pmtu + 4))) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 479fa557993e..da9888deff8c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -37,9 +37,8 @@ #define DRV_NAME "hns_roce" -/* hip08 is a pci device, it includes two version according pci version id */ -#define PCI_REVISION_ID_HIP08_A 0x20 -#define PCI_REVISION_ID_HIP08_B 0x21 +/* hip08 is a pci device */ +#define PCI_REVISION_ID_HIP08 0x21 #define HNS_ROCE_HW_VER1 ('h' << 24 | 'i' << 16 | '0' << 8 | '6') @@ -348,20 +347,22 @@ struct hns_roce_buf_attr { bool mtt_only; /* only alloc buffer-required MTT memory */ }; +struct hns_roce_hem_cfg { + dma_addr_t root_ba; /* root BA table's address */ + bool is_direct; /* addressing without BA table */ + unsigned int ba_pg_shift; /* BA table page shift */ + unsigned int buf_pg_shift; /* buffer page shift */ + unsigned int buf_pg_count; /* buffer page count */ + struct hns_roce_buf_region region[HNS_ROCE_MAX_BT_REGION]; + int region_count; +}; + /* memory translate region */ struct hns_roce_mtr { struct hns_roce_hem_list hem_list; /* multi-hop addressing resource */ struct ib_umem *umem; /* user space buffer */ struct hns_roce_buf *kmem; /* kernel space buffer */ - struct { - dma_addr_t root_ba; /* root BA table's address */ - bool is_direct; /* addressing without BA table */ - unsigned int ba_pg_shift; /* BA table page shift */ - unsigned int buf_pg_shift; /* buffer page shift */ - int buf_pg_count; /* buffer page count */ - struct hns_roce_buf_region region[HNS_ROCE_MAX_BT_REGION]; - unsigned int region_count; - } hem_cfg; /* config for hardware addressing */ + struct hns_roce_hem_cfg hem_cfg; /* config for hardware addressing */ }; struct hns_roce_mw { @@ -1192,7 +1193,7 @@ int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_pd *pd, struct ib_udata *udata); struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); + u32 max_num_sg); int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); @@ -1267,6 +1268,6 @@ void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev); int hns_roce_init(struct hns_roce_dev *hr_dev); void hns_roce_exit(struct hns_roce_dev *hr_dev); -int hns_roce_fill_res_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res); +int hns_roce_fill_res_cq_entry(struct sk_buff *msg, + struct ib_cq *ib_cq); #endif /* _HNS_ROCE_DEVICE_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index cf39f560b800..07b4c85d341d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -2483,7 +2483,6 @@ static int find_wqe_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, u64 *sq_ba, u64 *rq_ba, dma_addr_t *bt_ba) { struct ib_device *ibdev = &hr_dev->ib_dev; - int rq_pa_start; int count; count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, sq_ba, 1, bt_ba); @@ -2491,9 +2490,9 @@ static int find_wqe_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ibdev_err(ibdev, "Failed to find SQ ba\n"); return -ENOBUFS; } - rq_pa_start = hr_qp->rq.offset >> hr_qp->mtr.hem_cfg.buf_pg_shift; - count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, rq_pa_start, rq_ba, 1, - NULL); + + count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, rq_ba, + 1, NULL); if (!count) { ibdev_err(ibdev, "Failed to find RQ ba\n"); return -ENOBUFS; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 0618ced45bf8..d2968594664b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -91,10 +91,11 @@ static u32 to_hr_opcode(u32 ib_opcode) } static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, - void *wqe, const struct ib_reg_wr *wr) + const struct ib_reg_wr *wr) { + struct hns_roce_wqe_frmr_seg *fseg = + (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe); struct hns_roce_mr *mr = to_hr_mr(wr->mr); - struct hns_roce_wqe_frmr_seg *fseg = wqe; u64 pbl_ba; /* use ib_access_flags */ @@ -128,14 +129,16 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0); } -static void set_atomic_seg(const struct ib_send_wr *wr, void *wqe, +static void set_atomic_seg(const struct ib_send_wr *wr, struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, unsigned int valid_num_sge) { - struct hns_roce_wqe_atomic_seg *aseg; + struct hns_roce_v2_wqe_data_seg *dseg = + (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe); + struct hns_roce_wqe_atomic_seg *aseg = + (void *)dseg + sizeof(struct hns_roce_v2_wqe_data_seg); - set_data_seg_v2(wqe, wr->sg_list); - aseg = wqe + sizeof(struct hns_roce_v2_wqe_data_seg); + set_data_seg_v2(dseg, wr->sg_list); if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { aseg->fetchadd_swap_data = cpu_to_le64(atomic_wr(wr)->swap); @@ -143,7 +146,7 @@ static void set_atomic_seg(const struct ib_send_wr *wr, void *wqe, } else { aseg->fetchadd_swap_data = cpu_to_le64(atomic_wr(wr)->compare_add); - aseg->cmp_data = 0; + aseg->cmp_data = 0; } roce_set_field(rc_sq_wqe->byte_16, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, @@ -176,13 +179,15 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, - void *wqe, unsigned int *sge_ind, + unsigned int *sge_ind, unsigned int valid_num_sge) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); - struct hns_roce_v2_wqe_data_seg *dseg = wqe; + struct hns_roce_v2_wqe_data_seg *dseg = + (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe); struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_qp *qp = to_hr_qp(ibqp); + void *wqe = dseg; int j = 0; int i; @@ -438,7 +443,6 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S, owner_bit); - wqe += sizeof(struct hns_roce_v2_rc_send_wqe); switch (wr->opcode) { case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: @@ -451,7 +455,7 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey); break; case IB_WR_REG_MR: - set_frmr_seg(rc_sq_wqe, wqe, reg_wr(wr)); + set_frmr_seg(rc_sq_wqe, reg_wr(wr)); break; case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: @@ -468,10 +472,10 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) - set_atomic_seg(wr, wqe, rc_sq_wqe, valid_num_sge); + set_atomic_seg(wr, rc_sq_wqe, valid_num_sge); else if (wr->opcode != IB_WR_REG_MR) ret = set_rwqe_data_seg(&qp->ibqp, wr, rc_sq_wqe, - wqe, &curr_idx, valid_num_sge); + &curr_idx, valid_num_sge); *sge_idx = curr_idx; @@ -1510,8 +1514,6 @@ static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev) req_a = (struct hns_roce_vf_res_a *)desc[0].data; req_b = (struct hns_roce_vf_res_b *)desc[1].data; - memset(req_a, 0, sizeof(*req_a)); - memset(req_b, 0, sizeof(*req_b)); for (i = 0; i < 2; i++) { hns_roce_cmq_setup_basic_desc(&desc[i], HNS_ROCE_OPC_ALLOC_VF_RES, false); @@ -1744,27 +1746,25 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->max_srq_wrs = HNS_ROCE_V2_MAX_SRQ_WR; caps->max_srq_sges = HNS_ROCE_V2_MAX_SRQ_SGE; - if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) { - caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | HNS_ROCE_CAP_FLAG_MW | - HNS_ROCE_CAP_FLAG_SRQ | HNS_ROCE_CAP_FLAG_FRMR | - HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL; + caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | HNS_ROCE_CAP_FLAG_MW | + HNS_ROCE_CAP_FLAG_SRQ | HNS_ROCE_CAP_FLAG_FRMR | + HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL; - caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM; - caps->qpc_timer_entry_sz = HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ; - caps->qpc_timer_ba_pg_sz = 0; - caps->qpc_timer_buf_pg_sz = 0; - caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; - caps->num_cqc_timer = HNS_ROCE_V2_MAX_CQC_TIMER_NUM; - caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ; - caps->cqc_timer_ba_pg_sz = 0; - caps->cqc_timer_buf_pg_sz = 0; - caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; + caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM; + caps->qpc_timer_entry_sz = HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ; + caps->qpc_timer_ba_pg_sz = 0; + caps->qpc_timer_buf_pg_sz = 0; + caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; + caps->num_cqc_timer = HNS_ROCE_V2_MAX_CQC_TIMER_NUM; + caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ; + caps->cqc_timer_ba_pg_sz = 0; + caps->cqc_timer_buf_pg_sz = 0; + caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; - caps->sccc_entry_sz = HNS_ROCE_V2_SCCC_ENTRY_SZ; - caps->sccc_ba_pg_sz = 0; - caps->sccc_buf_pg_sz = 0; - caps->sccc_hop_num = HNS_ROCE_SCCC_HOP_NUM; - } + caps->sccc_entry_sz = HNS_ROCE_V2_SCCC_ENTRY_SZ; + caps->sccc_ba_pg_sz = 0; + caps->sccc_buf_pg_sz = 0; + caps->sccc_hop_num = HNS_ROCE_SCCC_HOP_NUM; } static void calc_pg_sz(int obj_num, int obj_size, int hop_num, int ctx_bt_num, @@ -1995,20 +1995,18 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->srqc_bt_num, &caps->srqc_buf_pg_sz, &caps->srqc_ba_pg_sz, HEM_TYPE_SRQC); - if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) { - caps->sccc_hop_num = ctx_hop_num; - caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; - caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; + caps->sccc_hop_num = ctx_hop_num; + caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; + caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; - calc_pg_sz(caps->num_qps, caps->sccc_entry_sz, - caps->sccc_hop_num, caps->sccc_bt_num, - &caps->sccc_buf_pg_sz, &caps->sccc_ba_pg_sz, - HEM_TYPE_SCCC); - calc_pg_sz(caps->num_cqc_timer, caps->cqc_timer_entry_sz, - caps->cqc_timer_hop_num, caps->cqc_timer_bt_num, - &caps->cqc_timer_buf_pg_sz, - &caps->cqc_timer_ba_pg_sz, HEM_TYPE_CQC_TIMER); - } + calc_pg_sz(caps->num_qps, caps->sccc_entry_sz, + caps->sccc_hop_num, caps->sccc_bt_num, + &caps->sccc_buf_pg_sz, &caps->sccc_ba_pg_sz, + HEM_TYPE_SCCC); + calc_pg_sz(caps->num_cqc_timer, caps->cqc_timer_entry_sz, + caps->cqc_timer_hop_num, caps->cqc_timer_bt_num, + &caps->cqc_timer_buf_pg_sz, + &caps->cqc_timer_ba_pg_sz, HEM_TYPE_CQC_TIMER); calc_pg_sz(caps->num_cqe_segs, caps->mtt_entry_sz, caps->cqe_hop_num, 1, &caps->cqe_buf_pg_sz, &caps->cqe_ba_pg_sz, HEM_TYPE_CQE); @@ -2055,22 +2053,19 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) return ret; } - if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) { - ret = hns_roce_query_pf_timer_resource(hr_dev); - if (ret) { - dev_err(hr_dev->dev, - "Query pf timer resource fail, ret = %d.\n", - ret); - return ret; - } + ret = hns_roce_query_pf_timer_resource(hr_dev); + if (ret) { + dev_err(hr_dev->dev, + "failed to query pf timer resource, ret = %d.\n", ret); + return ret; + } - ret = hns_roce_set_vf_switch_param(hr_dev, 0); - if (ret) { - dev_err(hr_dev->dev, - "Set function switch param fail, ret = %d.\n", - ret); - return ret; - } + ret = hns_roce_set_vf_switch_param(hr_dev, 0); + if (ret) { + dev_err(hr_dev->dev, + "failed to set function switch param, ret = %d.\n", + ret); + return ret; } hr_dev->vendor_part_id = hr_dev->pci_dev->device; @@ -2336,8 +2331,7 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev) { struct hns_roce_v2_priv *priv = hr_dev->priv; - if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) - hns_roce_function_clear(hr_dev); + hns_roce_function_clear(hr_dev); hns_roce_free_link_table(hr_dev, &priv->tpq); hns_roce_free_link_table(hr_dev, &priv->tsq); @@ -3053,6 +3047,7 @@ static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, IB_WC_RETRY_EXC_ERR }, { HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR, IB_WC_RNR_RETRY_EXC_ERR }, { HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR, IB_WC_REM_ABORT_ERR }, + { HNS_ROCE_CQE_V2_GENERAL_ERR, IB_WC_GENERAL_ERR} }; u32 cqe_status = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_STATUS_M, @@ -3075,6 +3070,14 @@ static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, sizeof(*cqe), false); /* + * For hns ROCEE, GENERAL_ERR is an error type that is not defined in + * the standard protocol, the driver must ignore it and needn't to set + * the QP to an error state. + */ + if (cqe_status == HNS_ROCE_CQE_V2_GENERAL_ERR) + return; + + /* * Hip08 hardware cannot flush the WQEs in SQ/RQ if the QP state gets * into errored mode. Hence, as a workaround to this hardware * limitation, driver needs to assist in flushing. But the flushing @@ -3170,51 +3173,51 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, /* SQ corresponding to CQE */ switch (roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_OPCODE_M, V2_CQE_BYTE_4_OPCODE_S) & 0x1f) { - case HNS_ROCE_SQ_OPCODE_SEND: + case HNS_ROCE_V2_WQE_OP_SEND: wc->opcode = IB_WC_SEND; break; - case HNS_ROCE_SQ_OPCODE_SEND_WITH_INV: + case HNS_ROCE_V2_WQE_OP_SEND_WITH_INV: wc->opcode = IB_WC_SEND; break; - case HNS_ROCE_SQ_OPCODE_SEND_WITH_IMM: + case HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM: wc->opcode = IB_WC_SEND; wc->wc_flags |= IB_WC_WITH_IMM; break; - case HNS_ROCE_SQ_OPCODE_RDMA_READ: + case HNS_ROCE_V2_WQE_OP_RDMA_READ: wc->opcode = IB_WC_RDMA_READ; wc->byte_len = le32_to_cpu(cqe->byte_cnt); break; - case HNS_ROCE_SQ_OPCODE_RDMA_WRITE: + case HNS_ROCE_V2_WQE_OP_RDMA_WRITE: wc->opcode = IB_WC_RDMA_WRITE; break; - case HNS_ROCE_SQ_OPCODE_RDMA_WRITE_WITH_IMM: + case HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM: wc->opcode = IB_WC_RDMA_WRITE; wc->wc_flags |= IB_WC_WITH_IMM; break; - case HNS_ROCE_SQ_OPCODE_LOCAL_INV: + case HNS_ROCE_V2_WQE_OP_LOCAL_INV: wc->opcode = IB_WC_LOCAL_INV; wc->wc_flags |= IB_WC_WITH_INVALIDATE; break; - case HNS_ROCE_SQ_OPCODE_ATOMIC_COMP_AND_SWAP: + case HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP: wc->opcode = IB_WC_COMP_SWAP; wc->byte_len = 8; break; - case HNS_ROCE_SQ_OPCODE_ATOMIC_FETCH_AND_ADD: + case HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD: wc->opcode = IB_WC_FETCH_ADD; wc->byte_len = 8; break; - case HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_COMP_AND_SWAP: + case HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP: wc->opcode = IB_WC_MASKED_COMP_SWAP; wc->byte_len = 8; break; - case HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_FETCH_AND_ADD: + case HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD: wc->opcode = IB_WC_MASKED_FETCH_ADD; wc->byte_len = 8; break; - case HNS_ROCE_SQ_OPCODE_FAST_REG_WR: + case HNS_ROCE_V2_WQE_OP_FAST_REG_PMR: wc->opcode = IB_WC_REG_MR; break; - case HNS_ROCE_SQ_OPCODE_BIND_MW: + case HNS_ROCE_V2_WQE_OP_BIND_MW: wc->opcode = IB_WC_REG_MR; break; default: @@ -3374,11 +3377,33 @@ static int get_op_for_set_hem(struct hns_roce_dev *hr_dev, u32 type, return op + step_idx; } +static int set_hem_to_hw(struct hns_roce_dev *hr_dev, int obj, u64 bt_ba, + u32 hem_type, int step_idx) +{ + struct hns_roce_cmd_mailbox *mailbox; + int ret; + int op; + + op = get_op_for_set_hem(hr_dev, hem_type, step_idx); + if (op < 0) + return 0; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + ret = hns_roce_cmd_mbox(hr_dev, bt_ba, mailbox->dma, obj, + 0, op, HNS_ROCE_CMD_TIMEOUT_MSECS); + + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + + return ret; +} + static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, int obj, int step_idx) { - struct hns_roce_cmd_mailbox *mailbox; struct hns_roce_hem_iter iter; struct hns_roce_hem_mhop mhop; struct hns_roce_hem *hem; @@ -3390,7 +3415,6 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev, u64 bt_ba = 0; u32 chunk_ba_num; u32 hop_num; - int op; if (!hns_roce_check_whether_mhop(hr_dev, table->type)) return 0; @@ -3412,14 +3436,6 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev, hem_idx = i; } - op = get_op_for_set_hem(hr_dev, table->type, step_idx); - if (op == -EINVAL) - return 0; - - mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); - if (table->type == HEM_TYPE_SCCC) obj = mhop.l0_idx; @@ -3428,11 +3444,8 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev, for (hns_roce_hem_first(hem, &iter); !hns_roce_hem_last(&iter); hns_roce_hem_next(&iter)) { bt_ba = hns_roce_hem_addr(&iter); - - /* configure the ba, tag, and op */ - ret = hns_roce_cmd_mbox(hr_dev, bt_ba, mailbox->dma, - obj, 0, op, - HNS_ROCE_CMD_TIMEOUT_MSECS); + ret = set_hem_to_hw(hr_dev, obj, bt_ba, table->type, + step_idx); } } else { if (step_idx == 0) @@ -3440,12 +3453,9 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev, else if (step_idx == 1 && hop_num == 2) bt_ba = table->bt_l1_dma_addr[l1_idx]; - /* configure the ba, tag, and op */ - ret = hns_roce_cmd_mbox(hr_dev, bt_ba, mailbox->dma, obj, - 0, op, HNS_ROCE_CMD_TIMEOUT_MSECS); + ret = set_hem_to_hw(hr_dev, obj, bt_ba, table->type, step_idx); } - hns_roce_free_cmd_mailbox(hr_dev, mailbox); return ret; } @@ -3745,51 +3755,23 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, } } -static bool check_wqe_rq_mtt_count(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, int mtt_cnt, - u32 page_size) -{ - struct ib_device *ibdev = &hr_dev->ib_dev; - - if (hr_qp->rq.wqe_cnt < 1) - return true; - - if (mtt_cnt < 1) { - ibdev_err(ibdev, "failed to find RQWQE buf ba of QP(0x%lx)\n", - hr_qp->qpn); - return false; - } - - if (mtt_cnt < MTT_MIN_COUNT && - (hr_qp->rq.offset + page_size) < hr_qp->buff_size) { - ibdev_err(ibdev, - "failed to find next RQWQE buf ba of QP(0x%lx)\n", - hr_qp->qpn); - return false; - } - - return true; -} - static int config_qp_rq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *qpc_mask) { - struct ib_qp *ibqp = &hr_qp->ibqp; u64 mtts[MTT_MIN_COUNT] = { 0 }; u64 wqe_sge_ba; - u32 page_size; int count; /* Search qp buf's mtts */ - page_size = 1 << hr_qp->mtr.hem_cfg.buf_pg_shift; - count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, - hr_qp->rq.offset / page_size, mtts, + count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, mtts, MTT_MIN_COUNT, &wqe_sge_ba); - if (!ibqp->srq) - if (!check_wqe_rq_mtt_count(hr_dev, hr_qp, count, page_size)) - return -EINVAL; + if (hr_qp->rq.wqe_cnt && count < 1) { + ibdev_err(&hr_dev->ib_dev, + "failed to find RQ WQE, QPN = 0x%lx.\n", hr_qp->qpn); + return -EINVAL; + } context->wqe_sge_ba = cpu_to_le32(wqe_sge_ba >> 3); qpc_mask->wqe_sge_ba = 0; @@ -3891,7 +3873,6 @@ static int config_qp_sq_buf(struct hns_roce_dev *hr_dev, struct ib_device *ibdev = &hr_dev->ib_dev; u64 sge_cur_blk = 0; u64 sq_cur_blk = 0; - u32 page_size; int count; /* search qp buf's mtts */ @@ -3902,9 +3883,8 @@ static int config_qp_sq_buf(struct hns_roce_dev *hr_dev, return -EINVAL; } if (hr_qp->sge.sge_cnt > 0) { - page_size = 1 << hr_qp->mtr.hem_cfg.buf_pg_shift; count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, - hr_qp->sge.offset / page_size, + hr_qp->sge.offset, &sge_cur_blk, 1, NULL); if (count < 1) { ibdev_err(ibdev, "failed to find QP(0x%lx) SGE buf.\n", @@ -4265,12 +4245,13 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M, V2_QPC_BYTE_24_HOP_LIMIT_S, 0); - if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B && is_udp) + if (is_udp) roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S, grh->traffic_class >> 2); else roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S, grh->traffic_class); + roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S, 0); roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, @@ -4301,7 +4282,9 @@ static bool check_qp_state(enum ib_qp_state cur_state, [IB_QPS_RTR] = { [IB_QPS_RESET] = true, [IB_QPS_RTS] = true, [IB_QPS_ERR] = true }, - [IB_QPS_RTS] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true }, + [IB_QPS_RTS] = { [IB_QPS_RESET] = true, + [IB_QPS_RTS] = true, + [IB_QPS_ERR] = true }, [IB_QPS_SQD] = {}, [IB_QPS_SQE] = {}, [IB_QPS_ERR] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index e176b0aaa4ac..1fb1c583d0f8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -179,27 +179,11 @@ enum { HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD = 0x9, HNS_ROCE_V2_WQE_OP_FAST_REG_PMR = 0xa, HNS_ROCE_V2_WQE_OP_LOCAL_INV = 0xb, - HNS_ROCE_V2_WQE_OP_BIND_MW_TYPE = 0xc, + HNS_ROCE_V2_WQE_OP_BIND_MW = 0xc, HNS_ROCE_V2_WQE_OP_MASK = 0x1f, }; enum { - HNS_ROCE_SQ_OPCODE_SEND = 0x0, - HNS_ROCE_SQ_OPCODE_SEND_WITH_INV = 0x1, - HNS_ROCE_SQ_OPCODE_SEND_WITH_IMM = 0x2, - HNS_ROCE_SQ_OPCODE_RDMA_WRITE = 0x3, - HNS_ROCE_SQ_OPCODE_RDMA_WRITE_WITH_IMM = 0x4, - HNS_ROCE_SQ_OPCODE_RDMA_READ = 0x5, - HNS_ROCE_SQ_OPCODE_ATOMIC_COMP_AND_SWAP = 0x6, - HNS_ROCE_SQ_OPCODE_ATOMIC_FETCH_AND_ADD = 0x7, - HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_COMP_AND_SWAP = 0x8, - HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_FETCH_AND_ADD = 0x9, - HNS_ROCE_SQ_OPCODE_FAST_REG_WR = 0xa, - HNS_ROCE_SQ_OPCODE_LOCAL_INV = 0xb, - HNS_ROCE_SQ_OPCODE_BIND_MW = 0xc, -}; - -enum { /* rq operations */ HNS_ROCE_V2_OPCODE_RDMA_WRITE_IMM = 0x0, HNS_ROCE_V2_OPCODE_SEND = 0x1, @@ -230,6 +214,7 @@ enum { HNS_ROCE_CQE_V2_TRANSPORT_RETRY_EXC_ERR = 0x15, HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR = 0x16, HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR = 0x22, + HNS_ROCE_CQE_V2_GENERAL_ERR = 0x23, HNS_ROCE_V2_CQE_STATUS_MASK = 0xff, }; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 50763cf4fa3d..5907cfd878a6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -428,7 +428,7 @@ static const struct ib_device_ops hns_roce_dev_ops = { .destroy_ah = hns_roce_destroy_ah, .destroy_cq = hns_roce_destroy_cq, .disassociate_ucontext = hns_roce_disassociate_ucontext, - .fill_res_entry = hns_roce_fill_res_entry, + .fill_res_cq_entry = hns_roce_fill_res_cq_entry, .get_dma_mr = hns_roce_get_dma_mr, .get_link_layer = hns_roce_get_link_layer, .get_port_immutable = hns_roce_port_immutable, diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 6b226a5eb7db..e5df3884b41d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -415,7 +415,7 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) } struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct device *dev = hr_dev->dev; @@ -871,6 +871,15 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int err; int i; + /* + * Only use the first page address as root ba when hopnum is 0, this + * is because the addresses of all pages are consecutive in this case. + */ + if (mtr->hem_cfg.is_direct) { + mtr->hem_cfg.root_ba = pages[0]; + return 0; + } + for (i = 0; i < mtr->hem_cfg.region_count; i++) { r = &mtr->hem_cfg.region[i]; if (r->offset + r->count > page_cnt) { @@ -896,6 +905,8 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr) { + struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; + int start_index; int mtt_count; int total = 0; __le64 *mtts; @@ -907,26 +918,32 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, goto done; /* no mtt memory in direct mode, so just return the buffer address */ - if (mtr->hem_cfg.is_direct) { - npage = offset; - for (total = 0; total < mtt_max; total++, npage++) { - addr = mtr->hem_cfg.root_ba + - (npage << mtr->hem_cfg.buf_pg_shift); - + if (cfg->is_direct) { + start_index = offset >> HNS_HW_PAGE_SHIFT; + for (mtt_count = 0; mtt_count < cfg->region_count && + total < mtt_max; mtt_count++) { + npage = cfg->region[mtt_count].offset; + if (npage < start_index) + continue; + + addr = cfg->root_ba + (npage << HNS_HW_PAGE_SHIFT); if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) mtt_buf[total] = to_hr_hw_page_addr(addr); else mtt_buf[total] = addr; + + total++; } goto done; } + start_index = offset >> cfg->buf_pg_shift; left = mtt_max; while (left > 0) { mtt_count = 0; mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, - offset + total, + start_index + total, &mtt_count, NULL); if (!mtts || !mtt_count) goto done; @@ -939,104 +956,136 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, done: if (base_addr) - *base_addr = mtr->hem_cfg.root_ba; + *base_addr = cfg->root_ba; return total; } -/* convert buffer size to page index and page count */ -static unsigned int mtr_init_region(struct hns_roce_buf_attr *attr, - int page_cnt, - struct hns_roce_buf_region *regions, - int region_cnt, unsigned int page_shift) +static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev, + struct hns_roce_buf_attr *attr, + struct hns_roce_hem_cfg *cfg, + unsigned int *buf_page_shift) { - unsigned int page_size = 1 << page_shift; - int max_region = attr->region_count; struct hns_roce_buf_region *r; - unsigned int i = 0; - int page_idx = 0; - - for (; i < region_cnt && i < max_region && page_idx < page_cnt; i++) { - r = ®ions[i]; - r->hopnum = attr->region[i].hopnum == HNS_ROCE_HOP_NUM_0 ? - 0 : attr->region[i].hopnum; - r->offset = page_idx; - r->count = DIV_ROUND_UP(attr->region[i].size, page_size); - page_idx += r->count; + unsigned int page_shift = 0; + int page_cnt = 0; + size_t buf_size; + int region_cnt; + + if (cfg->is_direct) { + buf_size = cfg->buf_pg_count << cfg->buf_pg_shift; + page_cnt = DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE); + /* + * When HEM buffer use level-0 addressing, the page size equals + * the buffer size, and the the page size = 4K * 2^N. + */ + cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT + order_base_2(page_cnt); + if (attr->region_count > 1) { + cfg->buf_pg_count = page_cnt; + page_shift = HNS_HW_PAGE_SHIFT; + } else { + cfg->buf_pg_count = 1; + page_shift = cfg->buf_pg_shift; + if (buf_size != 1 << page_shift) { + ibdev_err(&hr_dev->ib_dev, + "failed to check direct size %zu shift %d.\n", + buf_size, page_shift); + return -EINVAL; + } + } + } else { + page_shift = cfg->buf_pg_shift; + } + + /* convert buffer size to page index and page count */ + for (page_cnt = 0, region_cnt = 0; page_cnt < cfg->buf_pg_count && + region_cnt < attr->region_count && + region_cnt < ARRAY_SIZE(cfg->region); region_cnt++) { + r = &cfg->region[region_cnt]; + r->offset = page_cnt; + buf_size = hr_hw_page_align(attr->region[region_cnt].size); + r->count = DIV_ROUND_UP(buf_size, 1 << page_shift); + page_cnt += r->count; + r->hopnum = to_hr_hem_hopnum(attr->region[region_cnt].hopnum, + r->count); + } + + if (region_cnt < 1) { + ibdev_err(&hr_dev->ib_dev, + "failed to check mtr region count, pages = %d.\n", + cfg->buf_pg_count); + return -ENOBUFS; } - return i; + cfg->region_count = region_cnt; + *buf_page_shift = page_shift; + + return page_cnt; } /** * hns_roce_mtr_create - Create hns memory translate region. * * @mtr: memory translate region - * @init_attr: init attribute for creating mtr - * @page_shift: page shift for multi-hop base address table + * @buf_attr: buffer attribute for creating mtr + * @ba_page_shift: page shift for multi-hop base address table * @udata: user space context, if it's NULL, means kernel space * @user_addr: userspace virtual address to start at - * @buf_alloced: mtr has private buffer, true means need to alloc */ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, struct hns_roce_buf_attr *buf_attr, - unsigned int page_shift, struct ib_udata *udata, + unsigned int ba_page_shift, struct ib_udata *udata, unsigned long user_addr) { + struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; struct ib_device *ibdev = &hr_dev->ib_dev; + unsigned int buf_page_shift = 0; dma_addr_t *pages = NULL; - int region_cnt = 0; int all_pg_cnt; int get_pg_cnt; - bool has_mtt; - int err = 0; + int ret = 0; + + /* if disable mtt, all pages must in a continuous address range */ + cfg->is_direct = !mtr_has_mtt(buf_attr); - has_mtt = mtr_has_mtt(buf_attr); /* if buffer only need mtt, just init the hem cfg */ if (buf_attr->mtt_only) { - mtr->hem_cfg.buf_pg_shift = buf_attr->page_shift; - mtr->hem_cfg.buf_pg_count = mtr_bufs_size(buf_attr) >> - buf_attr->page_shift; + cfg->buf_pg_shift = buf_attr->page_shift; + cfg->buf_pg_count = mtr_bufs_size(buf_attr) >> + buf_attr->page_shift; mtr->umem = NULL; mtr->kmem = NULL; } else { - err = mtr_alloc_bufs(hr_dev, mtr, buf_attr, !has_mtt, udata, - user_addr); - if (err) { - ibdev_err(ibdev, "Failed to alloc mtr bufs, err %d\n", - err); - return err; + ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, cfg->is_direct, + udata, user_addr); + if (ret) { + ibdev_err(ibdev, + "failed to alloc mtr bufs, ret = %d.\n", ret); + return ret; } } - /* alloc mtt memory */ - all_pg_cnt = mtr->hem_cfg.buf_pg_count; - hns_roce_hem_list_init(&mtr->hem_list); - mtr->hem_cfg.is_direct = !has_mtt; - mtr->hem_cfg.ba_pg_shift = page_shift; - mtr->hem_cfg.region_count = 0; - region_cnt = mtr_init_region(buf_attr, all_pg_cnt, - mtr->hem_cfg.region, - ARRAY_SIZE(mtr->hem_cfg.region), - mtr->hem_cfg.buf_pg_shift); - if (region_cnt < 1) { - err = -ENOBUFS; - ibdev_err(ibdev, "failed to init mtr region %d\n", region_cnt); + all_pg_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, cfg, &buf_page_shift); + if (all_pg_cnt < 1) { + ret = -ENOBUFS; + ibdev_err(ibdev, "failed to init mtr buf cfg.\n"); goto err_alloc_bufs; } - mtr->hem_cfg.region_count = region_cnt; - - if (has_mtt) { - err = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, - mtr->hem_cfg.region, region_cnt, - page_shift); - if (err) { - ibdev_err(ibdev, "Failed to request mtr hem, err %d\n", - err); + hns_roce_hem_list_init(&mtr->hem_list); + if (!cfg->is_direct) { + ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, + cfg->region, cfg->region_count, + ba_page_shift); + if (ret) { + ibdev_err(ibdev, "failed to request mtr hem, ret = %d.\n", + ret); goto err_alloc_bufs; } - mtr->hem_cfg.root_ba = mtr->hem_list.root_ba; + cfg->root_ba = mtr->hem_list.root_ba; + cfg->ba_pg_shift = ba_page_shift; + } else { + cfg->ba_pg_shift = cfg->buf_pg_shift; } /* no buffer to map */ @@ -1046,31 +1095,26 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, /* alloc a tmp array to store buffer's dma address */ pages = kvcalloc(all_pg_cnt, sizeof(dma_addr_t), GFP_KERNEL); if (!pages) { - err = -ENOMEM; - ibdev_err(ibdev, "Failed to alloc mtr page list %d\n", + ret = -ENOMEM; + ibdev_err(ibdev, "failed to alloc mtr page list %d.\n", all_pg_cnt); goto err_alloc_hem_list; } get_pg_cnt = mtr_get_pages(hr_dev, mtr, pages, all_pg_cnt, - mtr->hem_cfg.buf_pg_shift); + buf_page_shift); if (get_pg_cnt != all_pg_cnt) { - ibdev_err(ibdev, "Failed to get mtr page %d != %d\n", + ibdev_err(ibdev, "failed to get mtr page %d != %d.\n", get_pg_cnt, all_pg_cnt); - err = -ENOBUFS; + ret = -ENOBUFS; goto err_alloc_page_list; } - if (!has_mtt) { - mtr->hem_cfg.root_ba = pages[0]; - } else { - /* write buffer's dma address to BA table */ - err = hns_roce_mtr_map(hr_dev, mtr, pages, all_pg_cnt); - if (err) { - ibdev_err(ibdev, "Failed to map mtr pages, err %d\n", - err); - goto err_alloc_page_list; - } + /* write buffer's dma address to BA table */ + ret = hns_roce_mtr_map(hr_dev, mtr, pages, all_pg_cnt); + if (ret) { + ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret); + goto err_alloc_page_list; } /* drop tmp array */ @@ -1082,7 +1126,7 @@ err_alloc_hem_list: hns_roce_hem_list_release(hr_dev, &mtr->hem_list); err_alloc_bufs: mtr_free_bufs(hr_dev, mtr); - return err; + return ret; } void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index a0a47bd66975..e94ca130ff5e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -411,7 +411,6 @@ static int set_extend_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt, struct hns_roce_qp *hr_qp, struct ib_qp_cap *cap) { - struct ib_device *ibdev = &hr_dev->ib_dev; u32 cnt; cnt = max(1U, cap->max_send_sge); @@ -431,15 +430,6 @@ static int set_extend_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt, } else if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) { cnt = roundup_pow_of_two(sq_wqe_cnt * (hr_qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE)); - - if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A) { - if (cnt > hr_dev->caps.max_extend_sg) { - ibdev_err(ibdev, - "failed to check exSGE num, exSGE num = %d.\n", - cnt); - return -EINVAL; - } - } } else { cnt = 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index 06871731ac43..259444c0a630 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -76,10 +76,9 @@ err: return -EMSGSIZE; } -static int hns_roce_fill_res_cq_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) +int hns_roce_fill_res_cq_entry(struct sk_buff *msg, + struct ib_cq *ib_cq) { - struct ib_cq *ib_cq = container_of(res, struct ib_cq, res); struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); struct hns_roce_v2_cq_context *context; @@ -119,12 +118,3 @@ err: kfree(context); return ret; } - -int hns_roce_fill_res_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) -{ - if (res->type == RDMA_RESTRACK_CQ) - return hns_roce_fill_res_cq_entry(msg, res); - - return 0; -} diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 19af29a48c55..6957e4f3404b 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -101,7 +101,6 @@ static int i40iw_query_port(struct ib_device *ibdev, props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP | IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; props->gid_tbl_len = 1; - props->pkey_tbl_len = 1; props->active_width = IB_WIDTH_4X; props->active_speed = 1; props->max_msg_sz = I40IW_MAX_OUTBOUND_MESSAGE_SIZE; @@ -1543,10 +1542,9 @@ static int i40iw_hw_alloc_stag(struct i40iw_device *iwdev, struct i40iw_mr *iwmr * @pd: ibpd pointer * @mr_type: memory for stag registrion * @max_num_sg: man number of pages - * @udata: user data or NULL for kernel objects */ static struct ib_mr *i40iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { struct i40iw_pd *iwpd = to_iwpd(pd); struct i40iw_device *iwdev = to_iwdev(pd->device); @@ -2460,7 +2458,6 @@ static int i40iw_port_immutable(struct ib_device *ibdev, u8 port_num, if (err) return err; - immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; return 0; @@ -2616,22 +2613,6 @@ static int i40iw_query_gid(struct ib_device *ibdev, return 0; } -/** - * i40iw_query_pkey - Query partition key - * @ibdev: device pointer from stack - * @port: port number - * @index: index of pkey - * @pkey: pointer to store the pkey - */ -static int i40iw_query_pkey(struct ib_device *ibdev, - u8 port, - u16 index, - u16 *pkey) -{ - *pkey = 0; - return 0; -} - static const struct ib_device_ops i40iw_dev_ops = { .owner = THIS_MODULE, .driver_id = RDMA_DRIVER_I40IW, @@ -2671,7 +2652,6 @@ static const struct ib_device_ops i40iw_dev_ops = { .post_send = i40iw_post_send, .query_device = i40iw_query_device, .query_gid = i40iw_query_gid, - .query_pkey = i40iw_query_pkey, .query_port = i40iw_query_port, .query_qp = i40iw_query_qp, .reg_user_mr = i40iw_reg_user_mr, diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 816d28854a8e..5e7910a517da 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1219,56 +1219,47 @@ static void mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn); } -static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev, - struct ib_udata *udata) +static int mlx4_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata) { - struct mlx4_ib_xrcd *xrcd; + struct mlx4_ib_dev *dev = to_mdev(ibxrcd->device); + struct mlx4_ib_xrcd *xrcd = to_mxrcd(ibxrcd); struct ib_cq_init_attr cq_attr = {}; int err; - if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) - return ERR_PTR(-ENOSYS); - - xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL); - if (!xrcd) - return ERR_PTR(-ENOMEM); + if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) + return -EOPNOTSUPP; - err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn); + err = mlx4_xrcd_alloc(dev->dev, &xrcd->xrcdn); if (err) - goto err1; + return err; - xrcd->pd = ib_alloc_pd(ibdev, 0); + xrcd->pd = ib_alloc_pd(ibxrcd->device, 0); if (IS_ERR(xrcd->pd)) { err = PTR_ERR(xrcd->pd); goto err2; } cq_attr.cqe = 1; - xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, &cq_attr); + xrcd->cq = ib_create_cq(ibxrcd->device, NULL, NULL, xrcd, &cq_attr); if (IS_ERR(xrcd->cq)) { err = PTR_ERR(xrcd->cq); goto err3; } - return &xrcd->ibxrcd; + return 0; err3: ib_dealloc_pd(xrcd->pd); err2: - mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn); -err1: - kfree(xrcd); - return ERR_PTR(err); + mlx4_xrcd_free(dev->dev, xrcd->xrcdn); + return err; } -static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) +static void mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) { ib_destroy_cq(to_mxrcd(xrcd)->cq); ib_dealloc_pd(to_mxrcd(xrcd)->pd); mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn); - kfree(xrcd); - - return 0; } static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid) @@ -2607,6 +2598,8 @@ static const struct ib_device_ops mlx4_ib_dev_mw_ops = { static const struct ib_device_ops mlx4_ib_dev_xrc_ops = { .alloc_xrcd = mlx4_ib_alloc_xrcd, .dealloc_xrcd = mlx4_ib_dealloc_xrcd, + + INIT_RDMA_OBJ_SIZE(ib_xrcd, mlx4_ib_xrcd, ibxrcd), }; static const struct ib_device_ops mlx4_ib_dev_fs_ops = { diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 6f4ea1067095..38e87a700a2a 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -729,7 +729,7 @@ struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata); int mlx4_ib_dealloc_mw(struct ib_mw *mw); struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); + u32 max_num_sg); int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 7e0b205c05eb..1d5ef0de12c9 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -439,7 +439,6 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; mr->ibmr.length = length; - mr->ibmr.iova = virt_addr; mr->ibmr.page_size = 1U << shift; return &mr->ibmr; @@ -655,7 +654,7 @@ int mlx4_ib_dealloc_mw(struct ib_mw *ibmw) } struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { struct mlx4_ib_dev *dev = to_mdev(pd->device); struct mlx4_ib_mr *mr; diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index 8cca61c671f8..b4c009bb0db6 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o mlx5_ib-y := ah.o \ cmd.o \ cong.o \ + counters.o \ cq.o \ doorbell.o \ gsi.o \ @@ -22,5 +23,6 @@ mlx5_ib-y := ah.o \ mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o \ - flow.o \ - qos.o + fs.o \ + qos.o \ + std_types.o diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index cc24c711e92a..ebb2f108b64f 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -148,18 +148,6 @@ void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length) spin_unlock(&dm->lock); } -int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out) -{ - u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; - int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); - - MLX5_SET(ppcnt_reg, in, local_port, 1); - - MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP); - return mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPCNT, - 0, 0); -} - void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid) { u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {}; diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index f4d8558db434..1d192a8ca87d 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -41,7 +41,6 @@ int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey); int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey); int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, void *out); -int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out); int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr, u64 length, u32 alignment); void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length); diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c new file mode 100644 index 000000000000..145f3cb40ccb --- /dev/null +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -0,0 +1,709 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. + */ + +#include "mlx5_ib.h" +#include <linux/mlx5/eswitch.h> +#include "counters.h" +#include "ib_rep.h" +#include "qp.h" + +struct mlx5_ib_counter { + const char *name; + size_t offset; +}; + +#define INIT_Q_COUNTER(_name) \ + { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)} + +static const struct mlx5_ib_counter basic_q_cnts[] = { + INIT_Q_COUNTER(rx_write_requests), + INIT_Q_COUNTER(rx_read_requests), + INIT_Q_COUNTER(rx_atomic_requests), + INIT_Q_COUNTER(out_of_buffer), +}; + +static const struct mlx5_ib_counter out_of_seq_q_cnts[] = { + INIT_Q_COUNTER(out_of_sequence), +}; + +static const struct mlx5_ib_counter retrans_q_cnts[] = { + INIT_Q_COUNTER(duplicate_request), + INIT_Q_COUNTER(rnr_nak_retry_err), + INIT_Q_COUNTER(packet_seq_err), + INIT_Q_COUNTER(implied_nak_seq_err), + INIT_Q_COUNTER(local_ack_timeout_err), +}; + +#define INIT_CONG_COUNTER(_name) \ + { .name = #_name, .offset = \ + MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)} + +static const struct mlx5_ib_counter cong_cnts[] = { + INIT_CONG_COUNTER(rp_cnp_ignored), + INIT_CONG_COUNTER(rp_cnp_handled), + INIT_CONG_COUNTER(np_ecn_marked_roce_packets), + INIT_CONG_COUNTER(np_cnp_sent), +}; + +static const struct mlx5_ib_counter extended_err_cnts[] = { + INIT_Q_COUNTER(resp_local_length_error), + INIT_Q_COUNTER(resp_cqe_error), + INIT_Q_COUNTER(req_cqe_error), + INIT_Q_COUNTER(req_remote_invalid_request), + INIT_Q_COUNTER(req_remote_access_errors), + INIT_Q_COUNTER(resp_remote_access_errors), + INIT_Q_COUNTER(resp_cqe_flush_error), + INIT_Q_COUNTER(req_cqe_flush_error), +}; + +static const struct mlx5_ib_counter roce_accl_cnts[] = { + INIT_Q_COUNTER(roce_adp_retrans), + INIT_Q_COUNTER(roce_adp_retrans_to), + INIT_Q_COUNTER(roce_slow_restart), + INIT_Q_COUNTER(roce_slow_restart_cnps), + INIT_Q_COUNTER(roce_slow_restart_trans), +}; + +#define INIT_EXT_PPCNT_COUNTER(_name) \ + { .name = #_name, .offset = \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_extended_cntrs_grp_data_layout._name##_high)} + +static const struct mlx5_ib_counter ext_ppcnt_cnts[] = { + INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated), +}; + +static int mlx5_ib_read_counters(struct ib_counters *counters, + struct ib_counters_read_attr *read_attr, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + struct mlx5_read_counters_attr mread_attr = {}; + struct mlx5_ib_flow_counters_desc *desc; + int ret, i; + + mutex_lock(&mcounters->mcntrs_mutex); + if (mcounters->cntrs_max_index > read_attr->ncounters) { + ret = -EINVAL; + goto err_bound; + } + + mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64), + GFP_KERNEL); + if (!mread_attr.out) { + ret = -ENOMEM; + goto err_bound; + } + + mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl; + mread_attr.flags = read_attr->flags; + ret = mcounters->read_counters(counters->device, &mread_attr); + if (ret) + goto err_read; + + /* do the pass over the counters data array to assign according to the + * descriptions and indexing pairs + */ + desc = mcounters->counters_data; + for (i = 0; i < mcounters->ncounters; i++) + read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description]; + +err_read: + kfree(mread_attr.out); +err_bound: + mutex_unlock(&mcounters->mcntrs_mutex); + return ret; +} + +static void mlx5_ib_destroy_counters(struct ib_counters *counters) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + + mlx5_ib_counters_clear_description(counters); + if (mcounters->hw_cntrs_hndl) + mlx5_fc_destroy(to_mdev(counters->device)->mdev, + mcounters->hw_cntrs_hndl); +} + +static int mlx5_ib_create_counters(struct ib_counters *counters, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + + mutex_init(&mcounters->mcntrs_mutex); + return 0; +} + + +static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev) +{ + return MLX5_ESWITCH_MANAGER(mdev) && + mlx5_ib_eswitch_mode(mdev->priv.eswitch) == + MLX5_ESWITCH_OFFLOADS; +} + +static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev, + u8 port_num) +{ + return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts : + &dev->port[port_num].cnts; +} + +/** + * mlx5_ib_get_counters_id - Returns counters id to use for device+port + * @dev: Pointer to mlx5 IB device + * @port_num: Zero based port number + * + * mlx5_ib_get_counters_id() Returns counters set id to use for given + * device port combination in switchdev and non switchdev mode of the + * parent device. + */ +u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num) +{ + const struct mlx5_ib_counters *cnts = get_counters(dev, port_num); + + return cnts->set_id; +} + +static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, + u8 port_num) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + const struct mlx5_ib_counters *cnts; + bool is_switchdev = is_mdev_switchdev_mode(dev->mdev); + + if ((is_switchdev && port_num) || (!is_switchdev && !port_num)) + return NULL; + + cnts = get_counters(dev, port_num - 1); + + return rdma_alloc_hw_stats_struct(cnts->names, + cnts->num_q_counters + + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters, + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev, + const struct mlx5_ib_counters *cnts, + struct rdma_hw_stats *stats, + u16 set_id) +{ + u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {}; + __be32 val; + int ret, i; + + MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); + MLX5_SET(query_q_counter_in, in, counter_set_id, set_id); + ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out); + if (ret) + return ret; + + for (i = 0; i < cnts->num_q_counters; i++) { + val = *(__be32 *)((void *)out + cnts->offsets[i]); + stats->value[i] = (u64)be32_to_cpu(val); + } + + return 0; +} + +static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev, + const struct mlx5_ib_counters *cnts, + struct rdma_hw_stats *stats) +{ + int offset = cnts->num_q_counters + cnts->num_cong_counters; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + int ret, i; + void *out; + + out = kvzalloc(sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP); + ret = mlx5_core_access_reg(dev->mdev, in, sz, out, sz, MLX5_REG_PPCNT, + 0, 0); + if (ret) + goto free; + + for (i = 0; i < cnts->num_ext_ppcnt_counters; i++) + stats->value[i + offset] = + be64_to_cpup((__be64 *)(out + + cnts->offsets[i + offset])); +free: + kvfree(out); + return ret; +} + +static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u8 port_num, int index) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1); + struct mlx5_core_dev *mdev; + int ret, num_counters; + u8 mdev_port_num; + + if (!stats) + return -EINVAL; + + num_counters = cnts->num_q_counters + + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters; + + /* q_counters are per IB device, query the master mdev */ + ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id); + if (ret) + return ret; + + if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { + ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats); + if (ret) + return ret; + } + + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { + mdev = mlx5_ib_get_native_port_mdev(dev, port_num, + &mdev_port_num); + if (!mdev) { + /* If port is not affiliated yet, its in down state + * which doesn't have any counters yet, so it would be + * zero. So no need to read from the HCA. + */ + goto done; + } + ret = mlx5_lag_query_cong_counters(dev->mdev, + stats->value + + cnts->num_q_counters, + cnts->num_cong_counters, + cnts->offsets + + cnts->num_q_counters); + + mlx5_ib_put_native_port_mdev(dev, port_num); + if (ret) + return ret; + } + +done: + return num_counters; +} + +static struct rdma_hw_stats * +mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) +{ + struct mlx5_ib_dev *dev = to_mdev(counter->device); + const struct mlx5_ib_counters *cnts = + get_counters(dev, counter->port - 1); + + return rdma_alloc_hw_stats_struct(cnts->names, + cnts->num_q_counters + + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters, + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +static int mlx5_ib_counter_update_stats(struct rdma_counter *counter) +{ + struct mlx5_ib_dev *dev = to_mdev(counter->device); + const struct mlx5_ib_counters *cnts = + get_counters(dev, counter->port - 1); + + return mlx5_ib_query_q_counters(dev->mdev, cnts, + counter->stats, counter->id); +} + +static int mlx5_ib_counter_dealloc(struct rdma_counter *counter) +{ + struct mlx5_ib_dev *dev = to_mdev(counter->device); + u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; + + if (!counter->id) + return 0; + + MLX5_SET(dealloc_q_counter_in, in, opcode, + MLX5_CMD_OP_DEALLOC_Q_COUNTER); + MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id); + return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in); +} + +static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, + struct ib_qp *qp) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->device); + int err; + + if (!counter->id) { + u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; + + MLX5_SET(alloc_q_counter_in, in, opcode, + MLX5_CMD_OP_ALLOC_Q_COUNTER); + MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID); + err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out); + if (err) + return err; + counter->id = + MLX5_GET(alloc_q_counter_out, out, counter_set_id); + } + + err = mlx5_ib_qp_set_counter(qp, counter); + if (err) + goto fail_set_counter; + + return 0; + +fail_set_counter: + mlx5_ib_counter_dealloc(counter); + counter->id = 0; + + return err; +} + +static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp) +{ + return mlx5_ib_qp_set_counter(qp, NULL); +} + + +static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, + const char **names, + size_t *offsets) +{ + int i; + int j = 0; + + for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) { + names[j] = basic_q_cnts[i].name; + offsets[j] = basic_q_cnts[i].offset; + } + + if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) { + for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) { + names[j] = out_of_seq_q_cnts[i].name; + offsets[j] = out_of_seq_q_cnts[i].offset; + } + } + + if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { + for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) { + names[j] = retrans_q_cnts[i].name; + offsets[j] = retrans_q_cnts[i].offset; + } + } + + if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) { + for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) { + names[j] = extended_err_cnts[i].name; + offsets[j] = extended_err_cnts[i].offset; + } + } + + if (MLX5_CAP_GEN(dev->mdev, roce_accl)) { + for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) { + names[j] = roce_accl_cnts[i].name; + offsets[j] = roce_accl_cnts[i].offset; + } + } + + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { + for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { + names[j] = cong_cnts[i].name; + offsets[j] = cong_cnts[i].offset; + } + } + + if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { + for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) { + names[j] = ext_ppcnt_cnts[i].name; + offsets[j] = ext_ppcnt_cnts[i].offset; + } + } +} + + +static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, + struct mlx5_ib_counters *cnts) +{ + u32 num_counters; + + num_counters = ARRAY_SIZE(basic_q_cnts); + + if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) + num_counters += ARRAY_SIZE(out_of_seq_q_cnts); + + if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) + num_counters += ARRAY_SIZE(retrans_q_cnts); + + if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) + num_counters += ARRAY_SIZE(extended_err_cnts); + + if (MLX5_CAP_GEN(dev->mdev, roce_accl)) + num_counters += ARRAY_SIZE(roce_accl_cnts); + + cnts->num_q_counters = num_counters; + + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { + cnts->num_cong_counters = ARRAY_SIZE(cong_cnts); + num_counters += ARRAY_SIZE(cong_cnts); + } + if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { + cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts); + num_counters += ARRAY_SIZE(ext_ppcnt_cnts); + } + cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL); + if (!cnts->names) + return -ENOMEM; + + cnts->offsets = kcalloc(num_counters, + sizeof(cnts->offsets), GFP_KERNEL); + if (!cnts->offsets) + goto err_names; + + return 0; + +err_names: + kfree(cnts->names); + cnts->names = NULL; + return -ENOMEM; +} + +static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; + int num_cnt_ports; + int i; + + num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; + + MLX5_SET(dealloc_q_counter_in, in, opcode, + MLX5_CMD_OP_DEALLOC_Q_COUNTER); + + for (i = 0; i < num_cnt_ports; i++) { + if (dev->port[i].cnts.set_id) { + MLX5_SET(dealloc_q_counter_in, in, counter_set_id, + dev->port[i].cnts.set_id); + mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in); + } + kfree(dev->port[i].cnts.names); + kfree(dev->port[i].cnts.offsets); + } +} + +static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; + int num_cnt_ports; + int err = 0; + int i; + bool is_shared; + + MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); + is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0; + num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; + + for (i = 0; i < num_cnt_ports; i++) { + err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts); + if (err) + goto err_alloc; + + mlx5_ib_fill_counters(dev, dev->port[i].cnts.names, + dev->port[i].cnts.offsets); + + MLX5_SET(alloc_q_counter_in, in, uid, + is_shared ? MLX5_SHARED_RESOURCE_UID : 0); + + err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out); + if (err) { + mlx5_ib_warn(dev, + "couldn't allocate queue counter for port %d, err %d\n", + i + 1, err); + goto err_alloc; + } + + dev->port[i].cnts.set_id = + MLX5_GET(alloc_q_counter_out, out, counter_set_id); + } + return 0; + +err_alloc: + mlx5_ib_dealloc_counters(dev); + return err; +} + +static int read_flow_counters(struct ib_device *ibdev, + struct mlx5_read_counters_attr *read_attr) +{ + struct mlx5_fc *fc = read_attr->hw_cntrs_hndl; + struct mlx5_ib_dev *dev = to_mdev(ibdev); + + return mlx5_fc_query(dev->mdev, fc, + &read_attr->out[IB_COUNTER_PACKETS], + &read_attr->out[IB_COUNTER_BYTES]); +} + +/* flow counters currently expose two counters packets and bytes */ +#define FLOW_COUNTERS_NUM 2 +static int counters_set_description( + struct ib_counters *counters, enum mlx5_ib_counters_type counters_type, + struct mlx5_ib_flow_counters_desc *desc_data, u32 ncounters) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + u32 cntrs_max_index = 0; + int i; + + if (counters_type != MLX5_IB_COUNTERS_FLOW) + return -EINVAL; + + /* init the fields for the object */ + mcounters->type = counters_type; + mcounters->read_counters = read_flow_counters; + mcounters->counters_num = FLOW_COUNTERS_NUM; + mcounters->ncounters = ncounters; + /* each counter entry have both description and index pair */ + for (i = 0; i < ncounters; i++) { + if (desc_data[i].description > IB_COUNTER_BYTES) + return -EINVAL; + + if (cntrs_max_index <= desc_data[i].index) + cntrs_max_index = desc_data[i].index + 1; + } + + mutex_lock(&mcounters->mcntrs_mutex); + mcounters->counters_data = desc_data; + mcounters->cntrs_max_index = cntrs_max_index; + mutex_unlock(&mcounters->mcntrs_mutex); + + return 0; +} + +#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2)) +int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters, + struct mlx5_ib_create_flow *ucmd) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters); + struct mlx5_ib_flow_counters_data *cntrs_data = NULL; + struct mlx5_ib_flow_counters_desc *desc_data = NULL; + bool hw_hndl = false; + int ret = 0; + + if (ucmd && ucmd->ncounters_data != 0) { + cntrs_data = ucmd->data; + if (cntrs_data->ncounters > MAX_COUNTERS_NUM) + return -EINVAL; + + desc_data = kcalloc(cntrs_data->ncounters, + sizeof(*desc_data), + GFP_KERNEL); + if (!desc_data) + return -ENOMEM; + + if (copy_from_user(desc_data, + u64_to_user_ptr(cntrs_data->counters_data), + sizeof(*desc_data) * cntrs_data->ncounters)) { + ret = -EFAULT; + goto free; + } + } + + if (!mcounters->hw_cntrs_hndl) { + mcounters->hw_cntrs_hndl = mlx5_fc_create( + to_mdev(ibcounters->device)->mdev, false); + if (IS_ERR(mcounters->hw_cntrs_hndl)) { + ret = PTR_ERR(mcounters->hw_cntrs_hndl); + goto free; + } + hw_hndl = true; + } + + if (desc_data) { + /* counters already bound to at least one flow */ + if (mcounters->cntrs_max_index) { + ret = -EINVAL; + goto free_hndl; + } + + ret = counters_set_description(ibcounters, + MLX5_IB_COUNTERS_FLOW, + desc_data, + cntrs_data->ncounters); + if (ret) + goto free_hndl; + + } else if (!mcounters->cntrs_max_index) { + /* counters not bound yet, must have udata passed */ + ret = -EINVAL; + goto free_hndl; + } + + return 0; + +free_hndl: + if (hw_hndl) { + mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev, + mcounters->hw_cntrs_hndl); + mcounters->hw_cntrs_hndl = NULL; + } +free: + kfree(desc_data); + return ret; +} + +void mlx5_ib_counters_clear_description(struct ib_counters *counters) +{ + struct mlx5_ib_mcounters *mcounters; + + if (!counters || atomic_read(&counters->usecnt) != 1) + return; + + mcounters = to_mcounters(counters); + + mutex_lock(&mcounters->mcntrs_mutex); + kfree(mcounters->counters_data); + mcounters->counters_data = NULL; + mcounters->cntrs_max_index = 0; + mutex_unlock(&mcounters->mcntrs_mutex); +} + +static const struct ib_device_ops hw_stats_ops = { + .alloc_hw_stats = mlx5_ib_alloc_hw_stats, + .get_hw_stats = mlx5_ib_get_hw_stats, + .counter_bind_qp = mlx5_ib_counter_bind_qp, + .counter_unbind_qp = mlx5_ib_counter_unbind_qp, + .counter_dealloc = mlx5_ib_counter_dealloc, + .counter_alloc_stats = mlx5_ib_counter_alloc_stats, + .counter_update_stats = mlx5_ib_counter_update_stats, +}; + +static const struct ib_device_ops counters_ops = { + .create_counters = mlx5_ib_create_counters, + .destroy_counters = mlx5_ib_destroy_counters, + .read_counters = mlx5_ib_read_counters, + + INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs), +}; + +int mlx5_ib_counters_init(struct mlx5_ib_dev *dev) +{ + ib_set_device_ops(&dev->ib_dev, &counters_ops); + + if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) + return 0; + + ib_set_device_ops(&dev->ib_dev, &hw_stats_ops); + return mlx5_ib_alloc_counters(dev); +} + +void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev) +{ + if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) + return; + + mlx5_ib_dealloc_counters(dev); +} diff --git a/drivers/infiniband/hw/mlx5/counters.h b/drivers/infiniband/hw/mlx5/counters.h new file mode 100644 index 000000000000..1aa30c2f3f4d --- /dev/null +++ b/drivers/infiniband/hw/mlx5/counters.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. + */ + +#ifndef _MLX5_IB_COUNTERS_H +#define _MLX5_IB_COUNTERS_H + +#include "mlx5_ib.h" + +int mlx5_ib_counters_init(struct mlx5_ib_dev *dev); +void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev); +void mlx5_ib_counters_clear_description(struct ib_counters *counters); +int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters, + struct mlx5_ib_create_flow *ucmd); +u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num); +#endif /* _MLX5_IB_COUNTERS_H */ diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 655ea9c984e1..9e3d8b826498 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -14,6 +14,7 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/fs.h> #include "mlx5_ib.h" +#include "devx.h" #include "qp.h" #include <linux/xarray.h> @@ -89,22 +90,6 @@ struct devx_async_event_file { u8 is_destroyed:1; }; -#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in) -struct devx_obj { - struct mlx5_ib_dev *ib_dev; - u64 obj_id; - u32 dinlen; /* destroy inbox length */ - u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW]; - u32 flags; - union { - struct mlx5_ib_devx_mr devx_mr; - struct mlx5_core_dct core_dct; - struct mlx5_core_cq core_cq; - u32 flow_counter_bulk_size; - }; - struct list_head event_sub; /* holds devx_event_subscription entries */ -}; - struct devx_umem { struct mlx5_core_dev *mdev; struct ib_umem *umem; @@ -171,48 +156,6 @@ void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); } -bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type) -{ - struct devx_obj *devx_obj = obj; - u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); - - switch (opcode) { - case MLX5_CMD_OP_DESTROY_TIR: - *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; - *dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, - obj_id); - return true; - - case MLX5_CMD_OP_DESTROY_FLOW_TABLE: - *dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - *dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox, - table_id); - return true; - default: - return false; - } -} - -bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id) -{ - struct devx_obj *devx_obj = obj; - u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); - - if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) { - - if (offset && offset >= devx_obj->flow_counter_bulk_size) - return false; - - *counter_id = MLX5_GET(dealloc_flow_counter_in, - devx_obj->dinbox, - flow_counter_id); - *counter_id += offset; - return true; - } - - return false; -} - static bool is_legacy_unaffiliated_event_num(u16 event_num) { switch (event_num) { @@ -2419,17 +2362,24 @@ static int devx_event_notifier(struct notifier_block *nb, return NOTIFY_OK; } -void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev) +int mlx5_ib_devx_init(struct mlx5_ib_dev *dev) { struct mlx5_devx_event_table *table = &dev->devx_event_table; + int uid; - xa_init(&table->event_xa); - mutex_init(&table->event_xa_lock); - MLX5_NB_INIT(&table->devx_nb, devx_event_notifier, NOTIFY_ANY); - mlx5_eq_notifier_register(dev->mdev, &table->devx_nb); + uid = mlx5_ib_devx_create(dev, false); + if (uid > 0) { + dev->devx_whitelist_uid = uid; + xa_init(&table->event_xa); + mutex_init(&table->event_xa_lock); + MLX5_NB_INIT(&table->devx_nb, devx_event_notifier, NOTIFY_ANY); + mlx5_eq_notifier_register(dev->mdev, &table->devx_nb); + } + + return 0; } -void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev) +void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev) { struct mlx5_devx_event_table *table = &dev->devx_event_table; struct devx_event_subscription *sub, *tmp; @@ -2437,17 +2387,21 @@ void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev) void *entry; unsigned long id; - mlx5_eq_notifier_unregister(dev->mdev, &table->devx_nb); - mutex_lock(&dev->devx_event_table.event_xa_lock); - xa_for_each(&table->event_xa, id, entry) { - event = entry; - list_for_each_entry_safe(sub, tmp, &event->unaffiliated_list, - xa_list) - devx_cleanup_subscription(dev, sub); - kfree(entry); + if (dev->devx_whitelist_uid) { + mlx5_eq_notifier_unregister(dev->mdev, &table->devx_nb); + mutex_lock(&dev->devx_event_table.event_xa_lock); + xa_for_each(&table->event_xa, id, entry) { + event = entry; + list_for_each_entry_safe( + sub, tmp, &event->unaffiliated_list, xa_list) + devx_cleanup_subscription(dev, sub); + kfree(entry); + } + mutex_unlock(&dev->devx_event_table.event_xa_lock); + xa_destroy(&table->event_xa); + + mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid); } - mutex_unlock(&dev->devx_event_table.event_xa_lock); - xa_destroy(&table->event_xa); } static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf, diff --git a/drivers/infiniband/hw/mlx5/devx.h b/drivers/infiniband/hw/mlx5/devx.h new file mode 100644 index 000000000000..1f69866aed16 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/devx.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved. + */ + +#ifndef _MLX5_IB_DEVX_H +#define _MLX5_IB_DEVX_H + +#include "mlx5_ib.h" + +#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in) +struct devx_obj { + struct mlx5_ib_dev *ib_dev; + u64 obj_id; + u32 dinlen; /* destroy inbox length */ + u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW]; + u32 flags; + union { + struct mlx5_ib_devx_mr devx_mr; + struct mlx5_core_dct core_dct; + struct mlx5_core_cq core_cq; + u32 flow_counter_bulk_size; + }; + struct list_head event_sub; /* holds devx_event_subscription entries */ +}; +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) +int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); +void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid); +int mlx5_ib_devx_init(struct mlx5_ib_dev *dev); +void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev); +#else +static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) +{ + return -EOPNOTSUPP; +} +static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {} +static inline int mlx5_ib_devx_init(struct mlx5_ib_dev *dev) +{ + return 0; +} +static inline void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev) +{ +} +#endif +#endif /* _MLX5_IB_DEVX_H */ diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c deleted file mode 100644 index 216a1108ad34..000000000000 --- a/drivers/infiniband/hw/mlx5/flow.c +++ /dev/null @@ -1,765 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB -/* - * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. - */ - -#include <rdma/ib_user_verbs.h> -#include <rdma/ib_verbs.h> -#include <rdma/uverbs_types.h> -#include <rdma/uverbs_ioctl.h> -#include <rdma/uverbs_std_types.h> -#include <rdma/mlx5_user_ioctl_cmds.h> -#include <rdma/mlx5_user_ioctl_verbs.h> -#include <rdma/ib_umem.h> -#include <linux/mlx5/driver.h> -#include <linux/mlx5/fs.h> -#include "mlx5_ib.h" - -#define UVERBS_MODULE_NAME mlx5_ib -#include <rdma/uverbs_named_ioctl.h> - -static int -mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type, - enum mlx5_flow_namespace_type *namespace) -{ - switch (table_type) { - case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX: - *namespace = MLX5_FLOW_NAMESPACE_BYPASS; - break; - case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX: - *namespace = MLX5_FLOW_NAMESPACE_EGRESS; - break; - case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB: - *namespace = MLX5_FLOW_NAMESPACE_FDB; - break; - case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX: - *namespace = MLX5_FLOW_NAMESPACE_RDMA_RX; - break; - case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX: - *namespace = MLX5_FLOW_NAMESPACE_RDMA_TX; - break; - default: - return -EINVAL; - } - - return 0; -} - -static const struct uverbs_attr_spec mlx5_ib_flow_type[] = { - [MLX5_IB_FLOW_TYPE_NORMAL] = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - .u.ptr = { - .len = sizeof(u16), /* data is priority */ - .min_len = sizeof(u16), - } - }, - [MLX5_IB_FLOW_TYPE_SNIFFER] = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - UVERBS_ATTR_NO_DATA(), - }, - [MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - UVERBS_ATTR_NO_DATA(), - }, - [MLX5_IB_FLOW_TYPE_MC_DEFAULT] = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - UVERBS_ATTR_NO_DATA(), - }, -}; - -static int get_dests(struct uverbs_attr_bundle *attrs, - struct mlx5_ib_flow_matcher *fs_matcher, int *dest_id, - int *dest_type, struct ib_qp **qp, u32 *flags) -{ - bool dest_devx, dest_qp; - void *devx_obj; - int err; - - dest_devx = uverbs_attr_is_valid(attrs, - MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); - dest_qp = uverbs_attr_is_valid(attrs, - MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); - - *flags = 0; - err = uverbs_get_flags32(flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_FLAGS, - MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS | - MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP); - if (err) - return err; - - /* Both flags are not allowed */ - if (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS && - *flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP) - return -EINVAL; - - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { - if (dest_devx && (dest_qp || *flags)) - return -EINVAL; - else if (dest_qp && *flags) - return -EINVAL; - } - - /* Allow only DEVX object, drop as dest for FDB */ - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && !(dest_devx || - (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP))) - return -EINVAL; - - /* Allow only DEVX object or QP as dest when inserting to RDMA_RX */ - if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) && - ((!dest_devx && !dest_qp) || (dest_devx && dest_qp))) - return -EINVAL; - - *qp = NULL; - if (dest_devx) { - devx_obj = - uverbs_attr_get_obj(attrs, - MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); - - /* Verify that the given DEVX object is a flow - * steering destination. - */ - if (!mlx5_ib_devx_is_flow_dest(devx_obj, dest_id, dest_type)) - return -EINVAL; - /* Allow only flow table as dest when inserting to FDB or RDMA_RX */ - if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB || - fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) && - *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) - return -EINVAL; - } else if (dest_qp) { - struct mlx5_ib_qp *mqp; - - *qp = uverbs_attr_get_obj(attrs, - MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); - if (IS_ERR(*qp)) - return PTR_ERR(*qp); - - if ((*qp)->qp_type != IB_QPT_RAW_PACKET) - return -EINVAL; - - mqp = to_mqp(*qp); - if (mqp->is_rss) - *dest_id = mqp->rss_qp.tirn; - else - *dest_id = mqp->raw_packet_qp.rq.tirn; - *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) { - *dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT; - } - - if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR && - fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) - return -EINVAL; - - return 0; -} - -#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2 -static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_flow_context flow_context = {.flow_tag = - MLX5_FS_DEFAULT_FLOW_TAG}; - u32 *offset_attr, offset = 0, counter_id = 0; - int dest_id, dest_type, inlen, len, ret, i; - struct mlx5_ib_flow_handler *flow_handler; - struct mlx5_ib_flow_matcher *fs_matcher; - struct ib_uobject **arr_flow_actions; - struct ib_uflow_resources *uflow_res; - struct mlx5_flow_act flow_act = {}; - struct ib_qp *qp = NULL; - void *devx_obj, *cmd_in; - struct ib_uobject *uobj; - struct mlx5_ib_dev *dev; - u32 flags; - - if (!capable(CAP_NET_RAW)) - return -EPERM; - - fs_matcher = uverbs_attr_get_obj(attrs, - MLX5_IB_ATTR_CREATE_FLOW_MATCHER); - uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE); - dev = mlx5_udata_to_mdev(&attrs->driver_udata); - - if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp, &flags)) - return -EINVAL; - - if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS) - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS; - - if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP) - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP; - - len = uverbs_attr_get_uobjs_arr(attrs, - MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions); - if (len) { - devx_obj = arr_flow_actions[0]->object; - - if (uverbs_attr_is_valid(attrs, - MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) { - - int num_offsets = uverbs_attr_ptr_get_array_size( - attrs, - MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET, - sizeof(u32)); - - if (num_offsets != 1) - return -EINVAL; - - offset_attr = uverbs_attr_get_alloced_ptr( - attrs, - MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET); - offset = *offset_attr; - } - - if (!mlx5_ib_devx_is_flow_counter(devx_obj, offset, - &counter_id)) - return -EINVAL; - - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; - } - - cmd_in = uverbs_attr_get_alloced_ptr( - attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); - inlen = uverbs_attr_get_len(attrs, - MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); - - uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS); - if (!uflow_res) - return -ENOMEM; - - len = uverbs_attr_get_uobjs_arr(attrs, - MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions); - for (i = 0; i < len; i++) { - struct mlx5_ib_flow_action *maction = - to_mflow_act(arr_flow_actions[i]->object); - - ret = parse_flow_flow_action(maction, false, &flow_act); - if (ret) - goto err_out; - flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE, - arr_flow_actions[i]->object); - } - - ret = uverbs_copy_from(&flow_context.flow_tag, attrs, - MLX5_IB_ATTR_CREATE_FLOW_TAG); - if (!ret) { - if (flow_context.flow_tag >= BIT(24)) { - ret = -EINVAL; - goto err_out; - } - flow_context.flags |= FLOW_CONTEXT_HAS_TAG; - } - - flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, - &flow_context, - &flow_act, - counter_id, - cmd_in, inlen, - dest_id, dest_type); - if (IS_ERR(flow_handler)) { - ret = PTR_ERR(flow_handler); - goto err_out; - } - - ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res); - - return 0; -err_out: - ib_uverbs_flow_resources_free(uflow_res); - return ret; -} - -static int flow_matcher_cleanup(struct ib_uobject *uobject, - enum rdma_remove_reason why, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_flow_matcher *obj = uobject->object; - int ret; - - ret = ib_destroy_usecnt(&obj->usecnt, why, uobject); - if (ret) - return ret; - - kfree(obj); - return 0; -} - -static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs, - struct mlx5_ib_flow_matcher *obj) -{ - enum mlx5_ib_uapi_flow_table_type ft_type = - MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX; - u32 flags; - int err; - - /* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older - * users should switch to it. We leave this to not break userspace - */ - if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) && - uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) - return -EINVAL; - - if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) { - err = uverbs_get_const(&ft_type, attrs, - MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE); - if (err) - return err; - - err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type); - if (err) - return err; - - return 0; - } - - if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) { - err = uverbs_get_flags32(&flags, attrs, - MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, - IB_FLOW_ATTR_FLAGS_EGRESS); - if (err) - return err; - - if (flags) { - mlx5_ib_ft_type_to_namespace( - MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX, - &obj->ns_type); - return 0; - } - } - - obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS; - - return 0; -} - -static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( - struct uverbs_attr_bundle *attrs) -{ - struct ib_uobject *uobj = uverbs_attr_get_uobject( - attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE); - struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata); - struct mlx5_ib_flow_matcher *obj; - int err; - - obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL); - if (!obj) - return -ENOMEM; - - obj->mask_len = uverbs_attr_get_len( - attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); - err = uverbs_copy_from(&obj->matcher_mask, - attrs, - MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); - if (err) - goto end; - - obj->flow_type = uverbs_attr_get_enum_id( - attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE); - - if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) { - err = uverbs_copy_from(&obj->priority, - attrs, - MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE); - if (err) - goto end; - } - - err = uverbs_copy_from(&obj->match_criteria_enable, - attrs, - MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA); - if (err) - goto end; - - err = mlx5_ib_matcher_ns(attrs, obj); - if (err) - goto end; - - uobj->object = obj; - obj->mdev = dev->mdev; - atomic_set(&obj->usecnt, 0); - return 0; - -end: - kfree(obj); - return err; -} - -void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction) -{ - switch (maction->flow_action_raw.sub_type) { - case MLX5_IB_FLOW_ACTION_MODIFY_HEADER: - mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev, - maction->flow_action_raw.modify_hdr); - break; - case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT: - mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev, - maction->flow_action_raw.pkt_reformat); - break; - case MLX5_IB_FLOW_ACTION_DECAP: - break; - default: - break; - } -} - -static struct ib_flow_action * -mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev, - enum mlx5_ib_uapi_flow_table_type ft_type, - u8 num_actions, void *in) -{ - enum mlx5_flow_namespace_type namespace; - struct mlx5_ib_flow_action *maction; - int ret; - - ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace); - if (ret) - return ERR_PTR(-EINVAL); - - maction = kzalloc(sizeof(*maction), GFP_KERNEL); - if (!maction) - return ERR_PTR(-ENOMEM); - - maction->flow_action_raw.modify_hdr = - mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in); - - if (IS_ERR(maction->flow_action_raw.modify_hdr)) { - ret = PTR_ERR(maction->flow_action_raw.modify_hdr); - kfree(maction); - return ERR_PTR(ret); - } - maction->flow_action_raw.sub_type = - MLX5_IB_FLOW_ACTION_MODIFY_HEADER; - maction->flow_action_raw.dev = dev; - - return &maction->ib_action; -} - -static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev) -{ - return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - max_modify_header_actions) || - MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, - max_modify_header_actions) || - MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, - max_modify_header_actions); -} - -static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( - struct uverbs_attr_bundle *attrs) -{ - struct ib_uobject *uobj = uverbs_attr_get_uobject( - attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE); - struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata); - enum mlx5_ib_uapi_flow_table_type ft_type; - struct ib_flow_action *action; - int num_actions; - void *in; - int ret; - - if (!mlx5_ib_modify_header_supported(mdev)) - return -EOPNOTSUPP; - - in = uverbs_attr_get_alloced_ptr(attrs, - MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM); - - num_actions = uverbs_attr_ptr_get_array_size( - attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, - MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)); - if (num_actions < 0) - return num_actions; - - ret = uverbs_get_const(&ft_type, attrs, - MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE); - if (ret) - return ret; - action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in); - if (IS_ERR(action)) - return PTR_ERR(action); - - uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev, - IB_FLOW_ACTION_UNSPECIFIED); - - return 0; -} - -static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev, - u8 packet_reformat_type, - u8 ft_type) -{ - switch (packet_reformat_type) { - case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL: - if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX) - return MLX5_CAP_FLOWTABLE(ibdev->mdev, - encap_general_header); - break; - case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL: - if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX) - return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev, - reformat_l2_to_l3_tunnel); - break; - case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2: - if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX) - return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, - reformat_l3_tunnel_to_l2); - break; - case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2: - if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX) - return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap); - break; - default: - break; - } - - return false; -} - -static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt) -{ - switch (dv_prt) { - case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL: - *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL; - break; - case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2: - *prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; - break; - case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL: - *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL; - break; - default: - return -EINVAL; - } - - return 0; -} - -static int mlx5_ib_flow_action_create_packet_reformat_ctx( - struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_action *maction, - u8 ft_type, u8 dv_prt, - void *in, size_t len) -{ - enum mlx5_flow_namespace_type namespace; - u8 prm_prt; - int ret; - - ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace); - if (ret) - return ret; - - ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt); - if (ret) - return ret; - - maction->flow_action_raw.pkt_reformat = - mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len, - in, namespace); - if (IS_ERR(maction->flow_action_raw.pkt_reformat)) { - ret = PTR_ERR(maction->flow_action_raw.pkt_reformat); - return ret; - } - - maction->flow_action_raw.sub_type = - MLX5_IB_FLOW_ACTION_PACKET_REFORMAT; - maction->flow_action_raw.dev = dev; - - return 0; -} - -static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)( - struct uverbs_attr_bundle *attrs) -{ - struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, - MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE); - struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata); - enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt; - enum mlx5_ib_uapi_flow_table_type ft_type; - struct mlx5_ib_flow_action *maction; - int ret; - - ret = uverbs_get_const(&ft_type, attrs, - MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE); - if (ret) - return ret; - - ret = uverbs_get_const(&dv_prt, attrs, - MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE); - if (ret) - return ret; - - if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type)) - return -EOPNOTSUPP; - - maction = kzalloc(sizeof(*maction), GFP_KERNEL); - if (!maction) - return -ENOMEM; - - if (dv_prt == - MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) { - maction->flow_action_raw.sub_type = - MLX5_IB_FLOW_ACTION_DECAP; - maction->flow_action_raw.dev = mdev; - } else { - void *in; - int len; - - in = uverbs_attr_get_alloced_ptr(attrs, - MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF); - if (IS_ERR(in)) { - ret = PTR_ERR(in); - goto free_maction; - } - - len = uverbs_attr_get_len(attrs, - MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF); - - ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev, - maction, ft_type, dv_prt, in, len); - if (ret) - goto free_maction; - } - - uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev, - IB_FLOW_ACTION_UNSPECIFIED); - return 0; - -free_maction: - kfree(maction); - return ret; -} - -DECLARE_UVERBS_NAMED_METHOD( - MLX5_IB_METHOD_CREATE_FLOW, - UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE, - UVERBS_OBJECT_FLOW, - UVERBS_ACCESS_NEW, - UA_MANDATORY), - UVERBS_ATTR_PTR_IN( - MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE, - UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)), - UA_MANDATORY, - UA_ALLOC_AND_COPY), - UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER, - MLX5_IB_OBJECT_FLOW_MATCHER, - UVERBS_ACCESS_READ, - UA_MANDATORY), - UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP, - UVERBS_OBJECT_QP, - UVERBS_ACCESS_READ), - UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX, - MLX5_IB_OBJECT_DEVX_OBJ, - UVERBS_ACCESS_READ), - UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, - UVERBS_OBJECT_FLOW_ACTION, - UVERBS_ACCESS_READ, 1, - MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS, - UA_OPTIONAL), - UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG, - UVERBS_ATTR_TYPE(u32), - UA_OPTIONAL), - UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, - MLX5_IB_OBJECT_DEVX_OBJ, - UVERBS_ACCESS_READ, 1, 1, - UA_OPTIONAL), - UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET, - UVERBS_ATTR_MIN_SIZE(sizeof(u32)), - UA_OPTIONAL, - UA_ALLOC_AND_COPY), - UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_FLAGS, - enum mlx5_ib_create_flow_flags, - UA_OPTIONAL)); - -DECLARE_UVERBS_NAMED_METHOD_DESTROY( - MLX5_IB_METHOD_DESTROY_FLOW, - UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE, - UVERBS_OBJECT_FLOW, - UVERBS_ACCESS_DESTROY, - UA_MANDATORY)); - -ADD_UVERBS_METHODS(mlx5_ib_fs, - UVERBS_OBJECT_FLOW, - &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW), - &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW)); - -DECLARE_UVERBS_NAMED_METHOD( - MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER, - UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE, - UVERBS_OBJECT_FLOW_ACTION, - UVERBS_ACCESS_NEW, - UA_MANDATORY), - UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, - UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES( - set_add_copy_action_in_auto)), - UA_MANDATORY, - UA_ALLOC_AND_COPY), - UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE, - enum mlx5_ib_uapi_flow_table_type, - UA_MANDATORY)); - -DECLARE_UVERBS_NAMED_METHOD( - MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT, - UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE, - UVERBS_OBJECT_FLOW_ACTION, - UVERBS_ACCESS_NEW, - UA_MANDATORY), - UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF, - UVERBS_ATTR_MIN_SIZE(1), - UA_ALLOC_AND_COPY, - UA_OPTIONAL), - UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE, - enum mlx5_ib_uapi_flow_action_packet_reformat_type, - UA_MANDATORY), - UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE, - enum mlx5_ib_uapi_flow_table_type, - UA_MANDATORY)); - -ADD_UVERBS_METHODS( - mlx5_ib_flow_actions, - UVERBS_OBJECT_FLOW_ACTION, - &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER), - &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)); - -DECLARE_UVERBS_NAMED_METHOD( - MLX5_IB_METHOD_FLOW_MATCHER_CREATE, - UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE, - MLX5_IB_OBJECT_FLOW_MATCHER, - UVERBS_ACCESS_NEW, - UA_MANDATORY), - UVERBS_ATTR_PTR_IN( - MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK, - UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)), - UA_MANDATORY), - UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE, - mlx5_ib_flow_type, - UA_MANDATORY), - UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, - UVERBS_ATTR_TYPE(u8), - UA_MANDATORY), - UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, - enum ib_flow_flags, - UA_OPTIONAL), - UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE, - enum mlx5_ib_uapi_flow_table_type, - UA_OPTIONAL)); - -DECLARE_UVERBS_NAMED_METHOD_DESTROY( - MLX5_IB_METHOD_FLOW_MATCHER_DESTROY, - UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE, - MLX5_IB_OBJECT_FLOW_MATCHER, - UVERBS_ACCESS_DESTROY, - UA_MANDATORY)); - -DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER, - UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup), - &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE), - &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY)); - -const struct uapi_definition mlx5_ib_flow_defs[] = { - UAPI_DEF_CHAIN_OBJ_TREE_NAMED( - MLX5_IB_OBJECT_FLOW_MATCHER), - UAPI_DEF_CHAIN_OBJ_TREE( - UVERBS_OBJECT_FLOW, - &mlx5_ib_fs), - UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, - &mlx5_ib_flow_actions), - {}, -}; diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c new file mode 100644 index 000000000000..e9cfb9a2ef41 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -0,0 +1,2516 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + */ + +#include <rdma/ib_user_verbs.h> +#include <rdma/ib_verbs.h> +#include <rdma/uverbs_types.h> +#include <rdma/uverbs_ioctl.h> +#include <rdma/uverbs_std_types.h> +#include <rdma/mlx5_user_ioctl_cmds.h> +#include <rdma/mlx5_user_ioctl_verbs.h> +#include <rdma/ib_umem.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/fs.h> +#include <linux/mlx5/fs_helpers.h> +#include <linux/mlx5/accel.h> +#include <linux/mlx5/eswitch.h> +#include "mlx5_ib.h" +#include "counters.h" +#include "devx.h" +#include "fs.h" + +#define UVERBS_MODULE_NAME mlx5_ib +#include <rdma/uverbs_named_ioctl.h> + +enum { + MATCH_CRITERIA_ENABLE_OUTER_BIT, + MATCH_CRITERIA_ENABLE_MISC_BIT, + MATCH_CRITERIA_ENABLE_INNER_BIT, + MATCH_CRITERIA_ENABLE_MISC2_BIT +}; + +#define HEADER_IS_ZERO(match_criteria, headers) \ + !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \ + 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \ + +static u8 get_match_criteria_enable(u32 *match_criteria) +{ + u8 match_criteria_enable; + + match_criteria_enable = + (!HEADER_IS_ZERO(match_criteria, outer_headers)) << + MATCH_CRITERIA_ENABLE_OUTER_BIT; + match_criteria_enable |= + (!HEADER_IS_ZERO(match_criteria, misc_parameters)) << + MATCH_CRITERIA_ENABLE_MISC_BIT; + match_criteria_enable |= + (!HEADER_IS_ZERO(match_criteria, inner_headers)) << + MATCH_CRITERIA_ENABLE_INNER_BIT; + match_criteria_enable |= + (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) << + MATCH_CRITERIA_ENABLE_MISC2_BIT; + + return match_criteria_enable; +} + +static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) +{ + u8 entry_mask; + u8 entry_val; + int err = 0; + + if (!mask) + goto out; + + entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c, + ip_protocol); + entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v, + ip_protocol); + if (!entry_mask) { + MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask); + MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); + goto out; + } + /* Don't override existing ip protocol */ + if (mask != entry_mask || val != entry_val) + err = -EINVAL; +out: + return err; +} + +static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val, + bool inner) +{ + if (inner) { + MLX5_SET(fte_match_set_misc, + misc_c, inner_ipv6_flow_label, mask); + MLX5_SET(fte_match_set_misc, + misc_v, inner_ipv6_flow_label, val); + } else { + MLX5_SET(fte_match_set_misc, + misc_c, outer_ipv6_flow_label, mask); + MLX5_SET(fte_match_set_misc, + misc_v, outer_ipv6_flow_label, val); + } +} + +static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) +{ + MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask); + MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val); + MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2); + MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2); +} + +static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask) +{ + if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) && + !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL)) + return -EOPNOTSUPP; + + if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) && + !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP)) + return -EOPNOTSUPP; + + if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) && + !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS)) + return -EOPNOTSUPP; + + if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) && + !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL)) + return -EOPNOTSUPP; + + return 0; +} + +#define LAST_ETH_FIELD vlan_tag +#define LAST_IB_FIELD sl +#define LAST_IPV4_FIELD tos +#define LAST_IPV6_FIELD traffic_class +#define LAST_TCP_UDP_FIELD src_port +#define LAST_TUNNEL_FIELD tunnel_id +#define LAST_FLOW_TAG_FIELD tag_id +#define LAST_DROP_FIELD size +#define LAST_COUNTERS_FIELD counters + +/* Field is the last supported field */ +#define FIELDS_NOT_SUPPORTED(filter, field)\ + memchr_inv((void *)&filter.field +\ + sizeof(filter.field), 0,\ + sizeof(filter) -\ + offsetof(typeof(filter), field) -\ + sizeof(filter.field)) + +int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, + bool is_egress, + struct mlx5_flow_act *action) +{ + + switch (maction->ib_action.type) { + case IB_FLOW_ACTION_ESP: + if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | + MLX5_FLOW_CONTEXT_ACTION_DECRYPT)) + return -EINVAL; + /* Currently only AES_GCM keymat is supported by the driver */ + action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx; + action->action |= is_egress ? + MLX5_FLOW_CONTEXT_ACTION_ENCRYPT : + MLX5_FLOW_CONTEXT_ACTION_DECRYPT; + return 0; + case IB_FLOW_ACTION_UNSPECIFIED: + if (maction->flow_action_raw.sub_type == + MLX5_IB_FLOW_ACTION_MODIFY_HEADER) { + if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + return -EINVAL; + action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + action->modify_hdr = + maction->flow_action_raw.modify_hdr; + return 0; + } + if (maction->flow_action_raw.sub_type == + MLX5_IB_FLOW_ACTION_DECAP) { + if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) + return -EINVAL; + action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; + return 0; + } + if (maction->flow_action_raw.sub_type == + MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) { + if (action->action & + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) + return -EINVAL; + action->action |= + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + action->pkt_reformat = + maction->flow_action_raw.pkt_reformat; + return 0; + } + fallthrough; + default: + return -EOPNOTSUPP; + } +} + +static int parse_flow_attr(struct mlx5_core_dev *mdev, + struct mlx5_flow_spec *spec, + const union ib_flow_spec *ib_spec, + const struct ib_flow_attr *flow_attr, + struct mlx5_flow_act *action, u32 prev_type) +{ + struct mlx5_flow_context *flow_context = &spec->flow_context; + u32 *match_c = spec->match_criteria; + u32 *match_v = spec->match_value; + void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, + misc_parameters); + void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v, + misc_parameters); + void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c, + misc_parameters_2); + void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v, + misc_parameters_2); + void *headers_c; + void *headers_v; + int match_ipv; + int ret; + + if (ib_spec->type & IB_FLOW_SPEC_INNER) { + headers_c = MLX5_ADDR_OF(fte_match_param, match_c, + inner_headers); + headers_v = MLX5_ADDR_OF(fte_match_param, match_v, + inner_headers); + match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.inner_ip_version); + } else { + headers_c = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers); + headers_v = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers); + match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_ip_version); + } + + switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) { + case IB_FLOW_SPEC_ETH: + if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD)) + return -EOPNOTSUPP; + + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dmac_47_16), + ib_spec->eth.mask.dst_mac); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dmac_47_16), + ib_spec->eth.val.dst_mac); + + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + smac_47_16), + ib_spec->eth.mask.src_mac); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + smac_47_16), + ib_spec->eth.val.src_mac); + + if (ib_spec->eth.mask.vlan_tag) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + cvlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + cvlan_tag, 1); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + first_vid, ntohs(ib_spec->eth.mask.vlan_tag)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + first_vid, ntohs(ib_spec->eth.val.vlan_tag)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + first_cfi, + ntohs(ib_spec->eth.mask.vlan_tag) >> 12); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + first_cfi, + ntohs(ib_spec->eth.val.vlan_tag) >> 12); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + first_prio, + ntohs(ib_spec->eth.mask.vlan_tag) >> 13); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + first_prio, + ntohs(ib_spec->eth.val.vlan_tag) >> 13); + } + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + ethertype, ntohs(ib_spec->eth.mask.ether_type)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + ethertype, ntohs(ib_spec->eth.val.ether_type)); + break; + case IB_FLOW_SPEC_IPV4: + if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD)) + return -EOPNOTSUPP; + + if (match_ipv) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + ip_version, 0xf); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + ip_version, MLX5_FS_IPV4_VERSION); + } else { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + ethertype, 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + ethertype, ETH_P_IP); + } + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &ib_spec->ipv4.mask.src_ip, + sizeof(ib_spec->ipv4.mask.src_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &ib_spec->ipv4.val.src_ip, + sizeof(ib_spec->ipv4.val.src_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &ib_spec->ipv4.mask.dst_ip, + sizeof(ib_spec->ipv4.mask.dst_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &ib_spec->ipv4.val.dst_ip, + sizeof(ib_spec->ipv4.val.dst_ip)); + + set_tos(headers_c, headers_v, + ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos); + + if (set_proto(headers_c, headers_v, + ib_spec->ipv4.mask.proto, + ib_spec->ipv4.val.proto)) + return -EINVAL; + break; + case IB_FLOW_SPEC_IPV6: + if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD)) + return -EOPNOTSUPP; + + if (match_ipv) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + ip_version, 0xf); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + ip_version, MLX5_FS_IPV6_VERSION); + } else { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + ethertype, 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + ethertype, ETH_P_IPV6); + } + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &ib_spec->ipv6.mask.src_ip, + sizeof(ib_spec->ipv6.mask.src_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &ib_spec->ipv6.val.src_ip, + sizeof(ib_spec->ipv6.val.src_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &ib_spec->ipv6.mask.dst_ip, + sizeof(ib_spec->ipv6.mask.dst_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &ib_spec->ipv6.val.dst_ip, + sizeof(ib_spec->ipv6.val.dst_ip)); + + set_tos(headers_c, headers_v, + ib_spec->ipv6.mask.traffic_class, + ib_spec->ipv6.val.traffic_class); + + if (set_proto(headers_c, headers_v, + ib_spec->ipv6.mask.next_hdr, + ib_spec->ipv6.val.next_hdr)) + return -EINVAL; + + set_flow_label(misc_params_c, misc_params_v, + ntohl(ib_spec->ipv6.mask.flow_label), + ntohl(ib_spec->ipv6.val.flow_label), + ib_spec->type & IB_FLOW_SPEC_INNER); + break; + case IB_FLOW_SPEC_ESP: + if (ib_spec->esp.mask.seq) + return -EOPNOTSUPP; + + MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, + ntohl(ib_spec->esp.mask.spi)); + MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, + ntohl(ib_spec->esp.val.spi)); + break; + case IB_FLOW_SPEC_TCP: + if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, + LAST_TCP_UDP_FIELD)) + return -EOPNOTSUPP; + + if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP)) + return -EINVAL; + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport, + ntohs(ib_spec->tcp_udp.mask.src_port)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport, + ntohs(ib_spec->tcp_udp.val.src_port)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport, + ntohs(ib_spec->tcp_udp.mask.dst_port)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport, + ntohs(ib_spec->tcp_udp.val.dst_port)); + break; + case IB_FLOW_SPEC_UDP: + if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, + LAST_TCP_UDP_FIELD)) + return -EOPNOTSUPP; + + if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP)) + return -EINVAL; + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, + ntohs(ib_spec->tcp_udp.mask.src_port)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, + ntohs(ib_spec->tcp_udp.val.src_port)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, + ntohs(ib_spec->tcp_udp.mask.dst_port)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, + ntohs(ib_spec->tcp_udp.val.dst_port)); + break; + case IB_FLOW_SPEC_GRE: + if (ib_spec->gre.mask.c_ks_res0_ver) + return -EOPNOTSUPP; + + if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE)) + return -EINVAL; + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, + 0xff); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, + IPPROTO_GRE); + + MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol, + ntohs(ib_spec->gre.mask.protocol)); + MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol, + ntohs(ib_spec->gre.val.protocol)); + + memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c, + gre_key.nvgre.hi), + &ib_spec->gre.mask.key, + sizeof(ib_spec->gre.mask.key)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v, + gre_key.nvgre.hi), + &ib_spec->gre.val.key, + sizeof(ib_spec->gre.val.key)); + break; + case IB_FLOW_SPEC_MPLS: + switch (prev_type) { + case IB_FLOW_SPEC_UDP: + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_first_mpls_over_udp), + &ib_spec->mpls.mask.tag)) + return -EOPNOTSUPP; + + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, + outer_first_mpls_over_udp), + &ib_spec->mpls.val.tag, + sizeof(ib_spec->mpls.val.tag)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, + outer_first_mpls_over_udp), + &ib_spec->mpls.mask.tag, + sizeof(ib_spec->mpls.mask.tag)); + break; + case IB_FLOW_SPEC_GRE: + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_first_mpls_over_gre), + &ib_spec->mpls.mask.tag)) + return -EOPNOTSUPP; + + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, + outer_first_mpls_over_gre), + &ib_spec->mpls.val.tag, + sizeof(ib_spec->mpls.val.tag)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, + outer_first_mpls_over_gre), + &ib_spec->mpls.mask.tag, + sizeof(ib_spec->mpls.mask.tag)); + break; + default: + if (ib_spec->type & IB_FLOW_SPEC_INNER) { + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.inner_first_mpls), + &ib_spec->mpls.mask.tag)) + return -EOPNOTSUPP; + + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, + inner_first_mpls), + &ib_spec->mpls.val.tag, + sizeof(ib_spec->mpls.val.tag)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, + inner_first_mpls), + &ib_spec->mpls.mask.tag, + sizeof(ib_spec->mpls.mask.tag)); + } else { + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_first_mpls), + &ib_spec->mpls.mask.tag)) + return -EOPNOTSUPP; + + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, + outer_first_mpls), + &ib_spec->mpls.val.tag, + sizeof(ib_spec->mpls.val.tag)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, + outer_first_mpls), + &ib_spec->mpls.mask.tag, + sizeof(ib_spec->mpls.mask.tag)); + } + } + break; + case IB_FLOW_SPEC_VXLAN_TUNNEL: + if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask, + LAST_TUNNEL_FIELD)) + return -EOPNOTSUPP; + + MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni, + ntohl(ib_spec->tunnel.mask.tunnel_id)); + MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni, + ntohl(ib_spec->tunnel.val.tunnel_id)); + break; + case IB_FLOW_SPEC_ACTION_TAG: + if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag, + LAST_FLOW_TAG_FIELD)) + return -EOPNOTSUPP; + if (ib_spec->flow_tag.tag_id >= BIT(24)) + return -EINVAL; + + flow_context->flow_tag = ib_spec->flow_tag.tag_id; + flow_context->flags |= FLOW_CONTEXT_HAS_TAG; + break; + case IB_FLOW_SPEC_ACTION_DROP: + if (FIELDS_NOT_SUPPORTED(ib_spec->drop, + LAST_DROP_FIELD)) + return -EOPNOTSUPP; + action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; + break; + case IB_FLOW_SPEC_ACTION_HANDLE: + ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act), + flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action); + if (ret) + return ret; + break; + case IB_FLOW_SPEC_ACTION_COUNT: + if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count, + LAST_COUNTERS_FIELD)) + return -EOPNOTSUPP; + + /* for now support only one counters spec per flow */ + if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) + return -EINVAL; + + action->counters = ib_spec->flow_count.counters; + action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + break; + default: + return -EINVAL; + } + + return 0; +} + +/* If a flow could catch both multicast and unicast packets, + * it won't fall into the multicast flow steering table and this rule + * could steal other multicast packets. + */ +static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr) +{ + union ib_flow_spec *flow_spec; + + if (ib_attr->type != IB_FLOW_ATTR_NORMAL || + ib_attr->num_of_specs < 1) + return false; + + flow_spec = (union ib_flow_spec *)(ib_attr + 1); + if (flow_spec->type == IB_FLOW_SPEC_IPV4) { + struct ib_flow_spec_ipv4 *ipv4_spec; + + ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec; + if (ipv4_is_multicast(ipv4_spec->val.dst_ip)) + return true; + + return false; + } + + if (flow_spec->type == IB_FLOW_SPEC_ETH) { + struct ib_flow_spec_eth *eth_spec; + + eth_spec = (struct ib_flow_spec_eth *)flow_spec; + return is_multicast_ether_addr(eth_spec->mask.dst_mac) && + is_multicast_ether_addr(eth_spec->val.dst_mac); + } + + return false; +} + +enum valid_spec { + VALID_SPEC_INVALID, + VALID_SPEC_VALID, + VALID_SPEC_NA, +}; + +static enum valid_spec +is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev, + const struct mlx5_flow_spec *spec, + const struct mlx5_flow_act *flow_act, + bool egress) +{ + const u32 *match_c = spec->match_criteria; + bool is_crypto = + (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | + MLX5_FLOW_CONTEXT_ACTION_DECRYPT)); + bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c); + bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP; + + /* + * Currently only crypto is supported in egress, when regular egress + * rules would be supported, always return VALID_SPEC_NA. + */ + if (!is_crypto) + return VALID_SPEC_NA; + + return is_crypto && is_ipsec && + (!egress || (!is_drop && + !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ? + VALID_SPEC_VALID : VALID_SPEC_INVALID; +} + +static bool is_valid_spec(struct mlx5_core_dev *mdev, + const struct mlx5_flow_spec *spec, + const struct mlx5_flow_act *flow_act, + bool egress) +{ + /* We curretly only support ipsec egress flow */ + return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID; +} + +static bool is_valid_ethertype(struct mlx5_core_dev *mdev, + const struct ib_flow_attr *flow_attr, + bool check_inner) +{ + union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1); + int match_ipv = check_inner ? + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.inner_ip_version) : + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_ip_version); + int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0; + bool ipv4_spec_valid, ipv6_spec_valid; + unsigned int ip_spec_type = 0; + bool has_ethertype = false; + unsigned int spec_index; + bool mask_valid = true; + u16 eth_type = 0; + bool type_valid; + + /* Validate that ethertype is correct */ + for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { + if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) && + ib_spec->eth.mask.ether_type) { + mask_valid = (ib_spec->eth.mask.ether_type == + htons(0xffff)); + has_ethertype = true; + eth_type = ntohs(ib_spec->eth.val.ether_type); + } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) || + (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) { + ip_spec_type = ib_spec->type; + } + ib_spec = (void *)ib_spec + ib_spec->size; + } + + type_valid = (!has_ethertype) || (!ip_spec_type); + if (!type_valid && mask_valid) { + ipv4_spec_valid = (eth_type == ETH_P_IP) && + (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit)); + ipv6_spec_valid = (eth_type == ETH_P_IPV6) && + (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit)); + + type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) || + (((eth_type == ETH_P_MPLS_UC) || + (eth_type == ETH_P_MPLS_MC)) && match_ipv); + } + + return type_valid; +} + +static bool is_valid_attr(struct mlx5_core_dev *mdev, + const struct ib_flow_attr *flow_attr) +{ + return is_valid_ethertype(mdev, flow_attr, false) && + is_valid_ethertype(mdev, flow_attr, true); +} + +static void put_flow_table(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *prio, bool ft_added) +{ + prio->refcount -= !!ft_added; + if (!prio->refcount) { + mlx5_destroy_flow_table(prio->flow_table); + prio->flow_table = NULL; + } +} + +static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) +{ + struct mlx5_ib_flow_handler *handler = container_of(flow_id, + struct mlx5_ib_flow_handler, + ibflow); + struct mlx5_ib_flow_handler *iter, *tmp; + struct mlx5_ib_dev *dev = handler->dev; + + mutex_lock(&dev->flow_db->lock); + + list_for_each_entry_safe(iter, tmp, &handler->list, list) { + mlx5_del_flow_rules(iter->rule); + put_flow_table(dev, iter->prio, true); + list_del(&iter->list); + kfree(iter); + } + + mlx5_del_flow_rules(handler->rule); + put_flow_table(dev, handler->prio, true); + mlx5_ib_counters_clear_description(handler->ibcounters); + mutex_unlock(&dev->flow_db->lock); + if (handler->flow_matcher) + atomic_dec(&handler->flow_matcher->usecnt); + kfree(handler); + + return 0; +} + +static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap) +{ + priority *= 2; + if (!dont_trap) + priority++; + return priority; +} + +enum flow_table_type { + MLX5_IB_FT_RX, + MLX5_IB_FT_TX +}; + +#define MLX5_FS_MAX_TYPES 6 +#define MLX5_FS_MAX_ENTRIES BIT(16) + +static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns, + struct mlx5_ib_flow_prio *prio, + int priority, + int num_entries, int num_groups, + u32 flags) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_table *ft; + + ft_attr.prio = priority; + ft_attr.max_fte = num_entries; + ft_attr.flags = flags; + ft_attr.autogroup.max_num_groups = num_groups; + ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) + return ERR_CAST(ft); + + prio->flow_table = ft; + prio->refcount = 0; + return prio; +} + +static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, + struct ib_flow_attr *flow_attr, + enum flow_table_type ft_type) +{ + bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP; + struct mlx5_flow_namespace *ns = NULL; + struct mlx5_ib_flow_prio *prio; + struct mlx5_flow_table *ft; + int max_table_size; + int num_entries; + int num_groups; + bool esw_encap; + u32 flags = 0; + int priority; + + max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + log_max_ft_size)); + esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != + DEVLINK_ESWITCH_ENCAP_MODE_NONE; + if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { + enum mlx5_flow_namespace_type fn_type; + + if (flow_is_multicast_only(flow_attr) && + !dont_trap) + priority = MLX5_IB_FLOW_MCAST_PRIO; + else + priority = ib_prio_to_core_prio(flow_attr->priority, + dont_trap); + if (ft_type == MLX5_IB_FT_RX) { + fn_type = MLX5_FLOW_NAMESPACE_BYPASS; + prio = &dev->flow_db->prios[priority]; + if (!dev->is_rep && !esw_encap && + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap)) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; + if (!dev->is_rep && !esw_encap && + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + reformat_l3_tunnel_to_l2)) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + } else { + max_table_size = + BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, + log_max_ft_size)); + fn_type = MLX5_FLOW_NAMESPACE_EGRESS; + prio = &dev->flow_db->egress_prios[priority]; + if (!dev->is_rep && !esw_encap && + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat)) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + } + ns = mlx5_get_flow_namespace(dev->mdev, fn_type); + num_entries = MLX5_FS_MAX_ENTRIES; + num_groups = MLX5_FS_MAX_TYPES; + } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || + flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { + ns = mlx5_get_flow_namespace(dev->mdev, + MLX5_FLOW_NAMESPACE_LEFTOVERS); + build_leftovers_ft_param(&priority, + &num_entries, + &num_groups); + prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO]; + } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { + if (!MLX5_CAP_FLOWTABLE(dev->mdev, + allow_sniffer_and_nic_rx_shared_tir)) + return ERR_PTR(-EOPNOTSUPP); + + ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ? + MLX5_FLOW_NAMESPACE_SNIFFER_RX : + MLX5_FLOW_NAMESPACE_SNIFFER_TX); + + prio = &dev->flow_db->sniffer[ft_type]; + priority = 0; + num_entries = 1; + num_groups = 1; + } + + if (!ns) + return ERR_PTR(-EOPNOTSUPP); + + max_table_size = min_t(int, num_entries, max_table_size); + + ft = prio->flow_table; + if (!ft) + return _get_prio(ns, prio, priority, max_table_size, num_groups, + flags); + + return prio; +} + +static void set_underlay_qp(struct mlx5_ib_dev *dev, + struct mlx5_flow_spec *spec, + u32 underlay_qpn) +{ + void *misc_params_c = MLX5_ADDR_OF(fte_match_param, + spec->match_criteria, + misc_parameters); + void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + + if (underlay_qpn && + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + ft_field_support.bth_dst_qp)) { + MLX5_SET(fte_match_set_misc, + misc_params_v, bth_dst_qp, underlay_qpn); + MLX5_SET(fte_match_set_misc, + misc_params_c, bth_dst_qp, 0xffffff); + } +} + +static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev, + struct mlx5_flow_spec *spec, + struct mlx5_eswitch_rep *rep) +{ + struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; + void *misc; + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_2); + + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(esw, + rep->vport)); + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_2); + + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + + MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + } +} + +static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + const struct ib_flow_attr *flow_attr, + struct mlx5_flow_destination *dst, + u32 underlay_qpn, + struct mlx5_ib_create_flow *ucmd) +{ + struct mlx5_flow_table *ft = ft_prio->flow_table; + struct mlx5_ib_flow_handler *handler; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec *spec; + struct mlx5_flow_destination dest_arr[2] = {}; + struct mlx5_flow_destination *rule_dst = dest_arr; + const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr); + unsigned int spec_index; + u32 prev_type = 0; + int err = 0; + int dest_num = 0; + bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; + + if (!is_valid_attr(dev->mdev, flow_attr)) + return ERR_PTR(-EINVAL); + + if (dev->is_rep && is_egress) + return ERR_PTR(-EINVAL); + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + handler = kzalloc(sizeof(*handler), GFP_KERNEL); + if (!handler || !spec) { + err = -ENOMEM; + goto free; + } + + INIT_LIST_HEAD(&handler->list); + + for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { + err = parse_flow_attr(dev->mdev, spec, + ib_flow, flow_attr, &flow_act, + prev_type); + if (err < 0) + goto free; + + prev_type = ((union ib_flow_spec *)ib_flow)->type; + ib_flow += ((union ib_flow_spec *)ib_flow)->size; + } + + if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) { + memcpy(&dest_arr[0], dst, sizeof(*dst)); + dest_num++; + } + + if (!flow_is_multicast_only(flow_attr)) + set_underlay_qp(dev, spec, underlay_qpn); + + if (dev->is_rep) { + struct mlx5_eswitch_rep *rep; + + rep = dev->port[flow_attr->port - 1].rep; + if (!rep) { + err = -EINVAL; + goto free; + } + + mlx5_ib_set_rule_source_port(dev, spec, rep); + } + + spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); + + if (is_egress && + !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) { + err = -EINVAL; + goto free; + } + + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + struct mlx5_ib_mcounters *mcounters; + + err = mlx5_ib_flow_counters_set_data(flow_act.counters, ucmd); + if (err) + goto free; + + mcounters = to_mcounters(flow_act.counters); + handler->ibcounters = flow_act.counters; + dest_arr[dest_num].type = + MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest_arr[dest_num].counter_id = + mlx5_fc_id(mcounters->hw_cntrs_hndl); + dest_num++; + } + + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) { + if (!dest_num) + rule_dst = NULL; + } else { + if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) + flow_act.action |= + MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; + if (is_egress) + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; + else if (dest_num) + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } + + if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG) && + (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || + flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) { + mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n", + spec->flow_context.flow_tag, flow_attr->type); + err = -EINVAL; + goto free; + } + handler->rule = mlx5_add_flow_rules(ft, spec, + &flow_act, + rule_dst, dest_num); + + if (IS_ERR(handler->rule)) { + err = PTR_ERR(handler->rule); + goto free; + } + + ft_prio->refcount++; + handler->prio = ft_prio; + handler->dev = dev; + + ft_prio->flow_table = ft; +free: + if (err && handler) { + mlx5_ib_counters_clear_description(handler->ibcounters); + kfree(handler); + } + kvfree(spec); + return err ? ERR_PTR(err) : handler; +} + +static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + const struct ib_flow_attr *flow_attr, + struct mlx5_flow_destination *dst) +{ + return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL); +} + +enum { + LEFTOVERS_MC, + LEFTOVERS_UC, +}; + +static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + struct ib_flow_attr *flow_attr, + struct mlx5_flow_destination *dst) +{ + struct mlx5_ib_flow_handler *handler_ucast = NULL; + struct mlx5_ib_flow_handler *handler = NULL; + + static struct { + struct ib_flow_attr flow_attr; + struct ib_flow_spec_eth eth_flow; + } leftovers_specs[] = { + [LEFTOVERS_MC] = { + .flow_attr = { + .num_of_specs = 1, + .size = sizeof(leftovers_specs[0]) + }, + .eth_flow = { + .type = IB_FLOW_SPEC_ETH, + .size = sizeof(struct ib_flow_spec_eth), + .mask = {.dst_mac = {0x1} }, + .val = {.dst_mac = {0x1} } + } + }, + [LEFTOVERS_UC] = { + .flow_attr = { + .num_of_specs = 1, + .size = sizeof(leftovers_specs[0]) + }, + .eth_flow = { + .type = IB_FLOW_SPEC_ETH, + .size = sizeof(struct ib_flow_spec_eth), + .mask = {.dst_mac = {0x1} }, + .val = {.dst_mac = {} } + } + } + }; + + handler = create_flow_rule(dev, ft_prio, + &leftovers_specs[LEFTOVERS_MC].flow_attr, + dst); + if (!IS_ERR(handler) && + flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) { + handler_ucast = create_flow_rule(dev, ft_prio, + &leftovers_specs[LEFTOVERS_UC].flow_attr, + dst); + if (IS_ERR(handler_ucast)) { + mlx5_del_flow_rules(handler->rule); + ft_prio->refcount--; + kfree(handler); + handler = handler_ucast; + } else { + list_add(&handler_ucast->list, &handler->list); + } + } + + return handler; +} + +static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_rx, + struct mlx5_ib_flow_prio *ft_tx, + struct mlx5_flow_destination *dst) +{ + struct mlx5_ib_flow_handler *handler_rx; + struct mlx5_ib_flow_handler *handler_tx; + int err; + static const struct ib_flow_attr flow_attr = { + .num_of_specs = 0, + .size = sizeof(flow_attr) + }; + + handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst); + if (IS_ERR(handler_rx)) { + err = PTR_ERR(handler_rx); + goto err; + } + + handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst); + if (IS_ERR(handler_tx)) { + err = PTR_ERR(handler_tx); + goto err_tx; + } + + list_add(&handler_tx->list, &handler_rx->list); + + return handler_rx; + +err_tx: + mlx5_del_flow_rules(handler_rx->rule); + ft_rx->refcount--; + kfree(handler_rx); +err: + return ERR_PTR(err); +} + + +static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, + struct ib_flow_attr *flow_attr, + int domain, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_ib_qp *mqp = to_mqp(qp); + struct mlx5_ib_flow_handler *handler = NULL; + struct mlx5_flow_destination *dst = NULL; + struct mlx5_ib_flow_prio *ft_prio_tx = NULL; + struct mlx5_ib_flow_prio *ft_prio; + bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; + struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr; + size_t min_ucmd_sz, required_ucmd_sz; + int err; + int underlay_qpn; + + if (udata && udata->inlen) { + min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) + + sizeof(ucmd_hdr.reserved); + if (udata->inlen < min_ucmd_sz) + return ERR_PTR(-EOPNOTSUPP); + + err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz); + if (err) + return ERR_PTR(err); + + /* currently supports only one counters data */ + if (ucmd_hdr.ncounters_data > 1) + return ERR_PTR(-EINVAL); + + required_ucmd_sz = min_ucmd_sz + + sizeof(struct mlx5_ib_flow_counters_data) * + ucmd_hdr.ncounters_data; + if (udata->inlen > required_ucmd_sz && + !ib_is_udata_cleared(udata, required_ucmd_sz, + udata->inlen - required_ucmd_sz)) + return ERR_PTR(-EOPNOTSUPP); + + ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL); + if (!ucmd) + return ERR_PTR(-ENOMEM); + + err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz); + if (err) + goto free_ucmd; + } + + if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) { + err = -ENOMEM; + goto free_ucmd; + } + + if (domain != IB_FLOW_DOMAIN_USER || + flow_attr->port > dev->num_ports || + (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | + IB_FLOW_ATTR_FLAGS_EGRESS))) { + err = -EINVAL; + goto free_ucmd; + } + + if (is_egress && + (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || + flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) { + err = -EINVAL; + goto free_ucmd; + } + + dst = kzalloc(sizeof(*dst), GFP_KERNEL); + if (!dst) { + err = -ENOMEM; + goto free_ucmd; + } + + mutex_lock(&dev->flow_db->lock); + + ft_prio = get_flow_table(dev, flow_attr, + is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX); + if (IS_ERR(ft_prio)) { + err = PTR_ERR(ft_prio); + goto unlock; + } + if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { + ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX); + if (IS_ERR(ft_prio_tx)) { + err = PTR_ERR(ft_prio_tx); + ft_prio_tx = NULL; + goto destroy_ft; + } + } + + if (is_egress) { + dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT; + } else { + dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; + if (mqp->is_rss) + dst->tir_num = mqp->rss_qp.tirn; + else + dst->tir_num = mqp->raw_packet_qp.rq.tirn; + } + + if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { + underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ? + mqp->underlay_qpn : + 0; + handler = _create_flow_rule(dev, ft_prio, flow_attr, dst, + underlay_qpn, ucmd); + } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || + flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { + handler = create_leftovers_rule(dev, ft_prio, flow_attr, + dst); + } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { + handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst); + } else { + err = -EINVAL; + goto destroy_ft; + } + + if (IS_ERR(handler)) { + err = PTR_ERR(handler); + handler = NULL; + goto destroy_ft; + } + + mutex_unlock(&dev->flow_db->lock); + kfree(dst); + kfree(ucmd); + + return &handler->ibflow; + +destroy_ft: + put_flow_table(dev, ft_prio, false); + if (ft_prio_tx) + put_flow_table(dev, ft_prio_tx, false); +unlock: + mutex_unlock(&dev->flow_db->lock); + kfree(dst); +free_ucmd: + kfree(ucmd); + return ERR_PTR(err); +} + +static struct mlx5_ib_flow_prio * +_get_flow_table(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_matcher *fs_matcher, + bool mcast) +{ + struct mlx5_flow_namespace *ns = NULL; + struct mlx5_ib_flow_prio *prio = NULL; + int max_table_size = 0; + bool esw_encap; + u32 flags = 0; + int priority; + + if (mcast) + priority = MLX5_IB_FLOW_MCAST_PRIO; + else + priority = ib_prio_to_core_prio(fs_matcher->priority, false); + + esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != + DEVLINK_ESWITCH_ENCAP_MODE_NONE; + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { + max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + log_max_ft_size)); + if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; + if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + reformat_l3_tunnel_to_l2) && + !esw_encap) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) { + max_table_size = BIT( + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size)); + if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) { + max_table_size = BIT( + MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size)); + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) && + esw_encap) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + priority = FDB_BYPASS_PATH; + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) { + max_table_size = + BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, + log_max_ft_size)); + priority = fs_matcher->priority; + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) { + max_table_size = + BIT(MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, + log_max_ft_size)); + priority = fs_matcher->priority; + } + + max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES); + + ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type); + if (!ns) + return ERR_PTR(-EOPNOTSUPP); + + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) + prio = &dev->flow_db->prios[priority]; + else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) + prio = &dev->flow_db->egress_prios[priority]; + else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) + prio = &dev->flow_db->fdb; + else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) + prio = &dev->flow_db->rdma_rx[priority]; + else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) + prio = &dev->flow_db->rdma_tx[priority]; + + if (!prio) + return ERR_PTR(-EINVAL); + + if (prio->flow_table) + return prio; + + return _get_prio(ns, prio, priority, max_table_size, + MLX5_FS_MAX_TYPES, flags); +} + +static struct mlx5_ib_flow_handler * +_create_raw_flow_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + struct mlx5_flow_destination *dst, + struct mlx5_ib_flow_matcher *fs_matcher, + struct mlx5_flow_context *flow_context, + struct mlx5_flow_act *flow_act, + void *cmd_in, int inlen, + int dst_num) +{ + struct mlx5_ib_flow_handler *handler; + struct mlx5_flow_spec *spec; + struct mlx5_flow_table *ft = ft_prio->flow_table; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + handler = kzalloc(sizeof(*handler), GFP_KERNEL); + if (!handler || !spec) { + err = -ENOMEM; + goto free; + } + + INIT_LIST_HEAD(&handler->list); + + memcpy(spec->match_value, cmd_in, inlen); + memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params, + fs_matcher->mask_len); + spec->match_criteria_enable = fs_matcher->match_criteria_enable; + spec->flow_context = *flow_context; + + handler->rule = mlx5_add_flow_rules(ft, spec, + flow_act, dst, dst_num); + + if (IS_ERR(handler->rule)) { + err = PTR_ERR(handler->rule); + goto free; + } + + ft_prio->refcount++; + handler->prio = ft_prio; + handler->dev = dev; + ft_prio->flow_table = ft; + +free: + if (err) + kfree(handler); + kvfree(spec); + return err ? ERR_PTR(err) : handler; +} + +static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher, + void *match_v) +{ + void *match_c; + void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4; + void *dmac, *dmac_mask; + void *ipv4, *ipv4_mask; + + if (!(fs_matcher->match_criteria_enable & + (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT))) + return false; + + match_c = fs_matcher->matcher_mask.match_params; + match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers); + match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers); + + dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4, + dmac_47_16); + dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4, + dmac_47_16); + + if (is_multicast_ether_addr(dmac) && + is_multicast_ether_addr(dmac_mask)) + return true; + + ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + + ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + + if (ipv4_is_multicast(*(__be32 *)(ipv4)) && + ipv4_is_multicast(*(__be32 *)(ipv4_mask))) + return true; + + return false; +} + +static struct mlx5_ib_flow_handler *raw_fs_rule_add( + struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, + struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act, + u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type) +{ + struct mlx5_flow_destination *dst; + struct mlx5_ib_flow_prio *ft_prio; + struct mlx5_ib_flow_handler *handler; + int dst_num = 0; + bool mcast; + int err; + + if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL) + return ERR_PTR(-EOPNOTSUPP); + + if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO) + return ERR_PTR(-ENOMEM); + + dst = kcalloc(2, sizeof(*dst), GFP_KERNEL); + if (!dst) + return ERR_PTR(-ENOMEM); + + mcast = raw_fs_is_multicast(fs_matcher, cmd_in); + mutex_lock(&dev->flow_db->lock); + + ft_prio = _get_flow_table(dev, fs_matcher, mcast); + if (IS_ERR(ft_prio)) { + err = PTR_ERR(ft_prio); + goto unlock; + } + + if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) { + dst[dst_num].type = dest_type; + dst[dst_num++].tir_num = dest_id; + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) { + dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; + dst[dst_num++].ft_num = dest_id; + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_PORT) { + dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT; + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; + } + + + if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dst[dst_num].counter_id = counter_id; + dst_num++; + } + + handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, + flow_context, flow_act, + cmd_in, inlen, dst_num); + + if (IS_ERR(handler)) { + err = PTR_ERR(handler); + goto destroy_ft; + } + + mutex_unlock(&dev->flow_db->lock); + atomic_inc(&fs_matcher->usecnt); + handler->flow_matcher = fs_matcher; + + kfree(dst); + + return handler; + +destroy_ft: + put_flow_table(dev, ft_prio, false); +unlock: + mutex_unlock(&dev->flow_db->lock); + kfree(dst); + + return ERR_PTR(err); +} + +static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags) +{ + u32 flags = 0; + + if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA) + flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA; + + return flags; +} + +#define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED \ + MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA +static struct ib_flow_action * +mlx5_ib_create_flow_action_esp(struct ib_device *device, + const struct ib_flow_action_attrs_esp *attr, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_dev *mdev = to_mdev(device); + struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm; + struct mlx5_accel_esp_xfrm_attrs accel_attrs = {}; + struct mlx5_ib_flow_action *action; + u64 action_flags; + u64 flags; + int err = 0; + + err = uverbs_get_flags64( + &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, + ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1)); + if (err) + return ERR_PTR(err); + + flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags); + + /* We current only support a subset of the standard features. Only a + * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn + * (with overlap). Full offload mode isn't supported. + */ + if (!attr->keymat || attr->replay || attr->encap || + attr->spi || attr->seq || attr->tfc_pad || + attr->hard_limit_pkts || + (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT))) + return ERR_PTR(-EOPNOTSUPP); + + if (attr->keymat->protocol != + IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM) + return ERR_PTR(-EOPNOTSUPP); + + aes_gcm = &attr->keymat->keymat.aes_gcm; + + if (aes_gcm->icv_len != 16 || + aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ) + return ERR_PTR(-EOPNOTSUPP); + + action = kmalloc(sizeof(*action), GFP_KERNEL); + if (!action) + return ERR_PTR(-ENOMEM); + + action->esp_aes_gcm.ib_flags = attr->flags; + memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key, + sizeof(accel_attrs.keymat.aes_gcm.aes_key)); + accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8; + memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt, + sizeof(accel_attrs.keymat.aes_gcm.salt)); + memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv, + sizeof(accel_attrs.keymat.aes_gcm.seq_iv)); + accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8; + accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ; + accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM; + + accel_attrs.esn = attr->esn; + if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) + accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED; + if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) + accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; + + if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT) + accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT; + + action->esp_aes_gcm.ctx = + mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags); + if (IS_ERR(action->esp_aes_gcm.ctx)) { + err = PTR_ERR(action->esp_aes_gcm.ctx); + goto err_parse; + } + + action->esp_aes_gcm.ib_flags = attr->flags; + + return &action->ib_action; + +err_parse: + kfree(action); + return ERR_PTR(err); +} + +static int +mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action, + const struct ib_flow_action_attrs_esp *attr, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_flow_action *maction = to_mflow_act(action); + struct mlx5_accel_esp_xfrm_attrs accel_attrs; + int err = 0; + + if (attr->keymat || attr->replay || attr->encap || + attr->spi || attr->seq || attr->tfc_pad || + attr->hard_limit_pkts || + (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | + IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS | + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))) + return -EOPNOTSUPP; + + /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can + * be modified. + */ + if (!(maction->esp_aes_gcm.ib_flags & + IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) && + attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)) + return -EINVAL; + + memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs, + sizeof(accel_attrs)); + + accel_attrs.esn = attr->esn; + if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) + accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; + else + accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; + + err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx, + &accel_attrs); + if (err) + return err; + + maction->esp_aes_gcm.ib_flags &= + ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; + maction->esp_aes_gcm.ib_flags |= + attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; + + return 0; +} + +static void destroy_flow_action_raw(struct mlx5_ib_flow_action *maction) +{ + switch (maction->flow_action_raw.sub_type) { + case MLX5_IB_FLOW_ACTION_MODIFY_HEADER: + mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev, + maction->flow_action_raw.modify_hdr); + break; + case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT: + mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev, + maction->flow_action_raw.pkt_reformat); + break; + case MLX5_IB_FLOW_ACTION_DECAP: + break; + default: + break; + } +} + +static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action) +{ + struct mlx5_ib_flow_action *maction = to_mflow_act(action); + + switch (action->type) { + case IB_FLOW_ACTION_ESP: + /* + * We only support aes_gcm by now, so we implicitly know this is + * the underline crypto. + */ + mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx); + break; + case IB_FLOW_ACTION_UNSPECIFIED: + destroy_flow_action_raw(maction); + break; + default: + WARN_ON(true); + break; + } + + kfree(maction); + return 0; +} + +static int +mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type, + enum mlx5_flow_namespace_type *namespace) +{ + switch (table_type) { + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX: + *namespace = MLX5_FLOW_NAMESPACE_BYPASS; + break; + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX: + *namespace = MLX5_FLOW_NAMESPACE_EGRESS; + break; + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB: + *namespace = MLX5_FLOW_NAMESPACE_FDB; + break; + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX: + *namespace = MLX5_FLOW_NAMESPACE_RDMA_RX; + break; + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX: + *namespace = MLX5_FLOW_NAMESPACE_RDMA_TX; + break; + default: + return -EINVAL; + } + + return 0; +} + +static const struct uverbs_attr_spec mlx5_ib_flow_type[] = { + [MLX5_IB_FLOW_TYPE_NORMAL] = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + .u.ptr = { + .len = sizeof(u16), /* data is priority */ + .min_len = sizeof(u16), + } + }, + [MLX5_IB_FLOW_TYPE_SNIFFER] = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_NO_DATA(), + }, + [MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_NO_DATA(), + }, + [MLX5_IB_FLOW_TYPE_MC_DEFAULT] = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_NO_DATA(), + }, +}; + +static bool is_flow_dest(void *obj, int *dest_id, int *dest_type) +{ + struct devx_obj *devx_obj = obj; + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); + + switch (opcode) { + case MLX5_CMD_OP_DESTROY_TIR: + *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; + *dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, + obj_id); + return true; + + case MLX5_CMD_OP_DESTROY_FLOW_TABLE: + *dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + *dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox, + table_id); + return true; + default: + return false; + } +} + +static int get_dests(struct uverbs_attr_bundle *attrs, + struct mlx5_ib_flow_matcher *fs_matcher, int *dest_id, + int *dest_type, struct ib_qp **qp, u32 *flags) +{ + bool dest_devx, dest_qp; + void *devx_obj; + int err; + + dest_devx = uverbs_attr_is_valid(attrs, + MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); + dest_qp = uverbs_attr_is_valid(attrs, + MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); + + *flags = 0; + err = uverbs_get_flags32(flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_FLAGS, + MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS | + MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP); + if (err) + return err; + + /* Both flags are not allowed */ + if (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS && + *flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP) + return -EINVAL; + + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { + if (dest_devx && (dest_qp || *flags)) + return -EINVAL; + else if (dest_qp && *flags) + return -EINVAL; + } + + /* Allow only DEVX object, drop as dest for FDB */ + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && !(dest_devx || + (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP))) + return -EINVAL; + + /* Allow only DEVX object or QP as dest when inserting to RDMA_RX */ + if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) && + ((!dest_devx && !dest_qp) || (dest_devx && dest_qp))) + return -EINVAL; + + *qp = NULL; + if (dest_devx) { + devx_obj = + uverbs_attr_get_obj(attrs, + MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); + + /* Verify that the given DEVX object is a flow + * steering destination. + */ + if (!is_flow_dest(devx_obj, dest_id, dest_type)) + return -EINVAL; + /* Allow only flow table as dest when inserting to FDB or RDMA_RX */ + if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB || + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) && + *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) + return -EINVAL; + } else if (dest_qp) { + struct mlx5_ib_qp *mqp; + + *qp = uverbs_attr_get_obj(attrs, + MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); + if (IS_ERR(*qp)) + return PTR_ERR(*qp); + + if ((*qp)->qp_type != IB_QPT_RAW_PACKET) + return -EINVAL; + + mqp = to_mqp(*qp); + if (mqp->is_rss) + *dest_id = mqp->rss_qp.tirn; + else + *dest_id = mqp->raw_packet_qp.rq.tirn; + *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS || + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) { + *dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT; + } + + if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR && + (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS || + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX)) + return -EINVAL; + + return 0; +} + +static bool is_flow_counter(void *obj, u32 offset, u32 *counter_id) +{ + struct devx_obj *devx_obj = obj; + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); + + if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) { + + if (offset && offset >= devx_obj->flow_counter_bulk_size) + return false; + + *counter_id = MLX5_GET(dealloc_flow_counter_in, + devx_obj->dinbox, + flow_counter_id); + *counter_id += offset; + return true; + } + + return false; +} + +#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2 +static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_flow_context flow_context = {.flow_tag = + MLX5_FS_DEFAULT_FLOW_TAG}; + u32 *offset_attr, offset = 0, counter_id = 0; + int dest_id, dest_type = -1, inlen, len, ret, i; + struct mlx5_ib_flow_handler *flow_handler; + struct mlx5_ib_flow_matcher *fs_matcher; + struct ib_uobject **arr_flow_actions; + struct ib_uflow_resources *uflow_res; + struct mlx5_flow_act flow_act = {}; + struct ib_qp *qp = NULL; + void *devx_obj, *cmd_in; + struct ib_uobject *uobj; + struct mlx5_ib_dev *dev; + u32 flags; + + if (!capable(CAP_NET_RAW)) + return -EPERM; + + fs_matcher = uverbs_attr_get_obj(attrs, + MLX5_IB_ATTR_CREATE_FLOW_MATCHER); + uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE); + dev = mlx5_udata_to_mdev(&attrs->driver_udata); + + if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp, &flags)) + return -EINVAL; + + if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS) + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS; + + if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP) + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP; + + len = uverbs_attr_get_uobjs_arr(attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions); + if (len) { + devx_obj = arr_flow_actions[0]->object; + + if (uverbs_attr_is_valid(attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) { + + int num_offsets = uverbs_attr_ptr_get_array_size( + attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET, + sizeof(u32)); + + if (num_offsets != 1) + return -EINVAL; + + offset_attr = uverbs_attr_get_alloced_ptr( + attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET); + offset = *offset_attr; + } + + if (!is_flow_counter(devx_obj, offset, &counter_id)) + return -EINVAL; + + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + } + + cmd_in = uverbs_attr_get_alloced_ptr( + attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); + inlen = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); + + uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS); + if (!uflow_res) + return -ENOMEM; + + len = uverbs_attr_get_uobjs_arr(attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions); + for (i = 0; i < len; i++) { + struct mlx5_ib_flow_action *maction = + to_mflow_act(arr_flow_actions[i]->object); + + ret = parse_flow_flow_action(maction, false, &flow_act); + if (ret) + goto err_out; + flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE, + arr_flow_actions[i]->object); + } + + ret = uverbs_copy_from(&flow_context.flow_tag, attrs, + MLX5_IB_ATTR_CREATE_FLOW_TAG); + if (!ret) { + if (flow_context.flow_tag >= BIT(24)) { + ret = -EINVAL; + goto err_out; + } + flow_context.flags |= FLOW_CONTEXT_HAS_TAG; + } + + flow_handler = + raw_fs_rule_add(dev, fs_matcher, &flow_context, &flow_act, + counter_id, cmd_in, inlen, dest_id, dest_type); + if (IS_ERR(flow_handler)) { + ret = PTR_ERR(flow_handler); + goto err_out; + } + + ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res); + + return 0; +err_out: + ib_uverbs_flow_resources_free(uflow_res); + return ret; +} + +static int flow_matcher_cleanup(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_flow_matcher *obj = uobject->object; + int ret; + + ret = ib_destroy_usecnt(&obj->usecnt, why, uobject); + if (ret) + return ret; + + kfree(obj); + return 0; +} + +static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs, + struct mlx5_ib_flow_matcher *obj) +{ + enum mlx5_ib_uapi_flow_table_type ft_type = + MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX; + u32 flags; + int err; + + /* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older + * users should switch to it. We leave this to not break userspace + */ + if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) && + uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) + return -EINVAL; + + if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) { + err = uverbs_get_const(&ft_type, attrs, + MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE); + if (err) + return err; + + err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type); + if (err) + return err; + + return 0; + } + + if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) { + err = uverbs_get_flags32(&flags, attrs, + MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, + IB_FLOW_ATTR_FLAGS_EGRESS); + if (err) + return err; + + if (flags) { + mlx5_ib_ft_type_to_namespace( + MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX, + &obj->ns_type); + return 0; + } + } + + obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS; + + return 0; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE); + struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata); + struct mlx5_ib_flow_matcher *obj; + int err; + + obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL); + if (!obj) + return -ENOMEM; + + obj->mask_len = uverbs_attr_get_len( + attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); + err = uverbs_copy_from(&obj->matcher_mask, + attrs, + MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); + if (err) + goto end; + + obj->flow_type = uverbs_attr_get_enum_id( + attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE); + + if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) { + err = uverbs_copy_from(&obj->priority, + attrs, + MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE); + if (err) + goto end; + } + + err = uverbs_copy_from(&obj->match_criteria_enable, + attrs, + MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA); + if (err) + goto end; + + err = mlx5_ib_matcher_ns(attrs, obj); + if (err) + goto end; + + uobj->object = obj; + obj->mdev = dev->mdev; + atomic_set(&obj->usecnt, 0); + return 0; + +end: + kfree(obj); + return err; +} + +static struct ib_flow_action * +mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev, + enum mlx5_ib_uapi_flow_table_type ft_type, + u8 num_actions, void *in) +{ + enum mlx5_flow_namespace_type namespace; + struct mlx5_ib_flow_action *maction; + int ret; + + ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace); + if (ret) + return ERR_PTR(-EINVAL); + + maction = kzalloc(sizeof(*maction), GFP_KERNEL); + if (!maction) + return ERR_PTR(-ENOMEM); + + maction->flow_action_raw.modify_hdr = + mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in); + + if (IS_ERR(maction->flow_action_raw.modify_hdr)) { + ret = PTR_ERR(maction->flow_action_raw.modify_hdr); + kfree(maction); + return ERR_PTR(ret); + } + maction->flow_action_raw.sub_type = + MLX5_IB_FLOW_ACTION_MODIFY_HEADER; + maction->flow_action_raw.dev = dev; + + return &maction->ib_action; +} + +static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev) +{ + return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + max_modify_header_actions) || + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, + max_modify_header_actions) || + MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, + max_modify_header_actions); +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE); + struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata); + enum mlx5_ib_uapi_flow_table_type ft_type; + struct ib_flow_action *action; + int num_actions; + void *in; + int ret; + + if (!mlx5_ib_modify_header_supported(mdev)) + return -EOPNOTSUPP; + + in = uverbs_attr_get_alloced_ptr(attrs, + MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM); + + num_actions = uverbs_attr_ptr_get_array_size( + attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, + MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)); + if (num_actions < 0) + return num_actions; + + ret = uverbs_get_const(&ft_type, attrs, + MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE); + if (ret) + return ret; + action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in); + if (IS_ERR(action)) + return PTR_ERR(action); + + uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev, + IB_FLOW_ACTION_UNSPECIFIED); + + return 0; +} + +static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev, + u8 packet_reformat_type, + u8 ft_type) +{ + switch (packet_reformat_type) { + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL: + if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX) + return MLX5_CAP_FLOWTABLE(ibdev->mdev, + encap_general_header); + break; + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL: + if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX) + return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev, + reformat_l2_to_l3_tunnel); + break; + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2: + if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX) + return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, + reformat_l3_tunnel_to_l2); + break; + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2: + if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX) + return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap); + break; + default: + break; + } + + return false; +} + +static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt) +{ + switch (dv_prt) { + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL: + *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL; + break; + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2: + *prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; + break; + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL: + *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int mlx5_ib_flow_action_create_packet_reformat_ctx( + struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_action *maction, + u8 ft_type, u8 dv_prt, + void *in, size_t len) +{ + enum mlx5_flow_namespace_type namespace; + u8 prm_prt; + int ret; + + ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace); + if (ret) + return ret; + + ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt); + if (ret) + return ret; + + maction->flow_action_raw.pkt_reformat = + mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len, + in, namespace); + if (IS_ERR(maction->flow_action_raw.pkt_reformat)) { + ret = PTR_ERR(maction->flow_action_raw.pkt_reformat); + return ret; + } + + maction->flow_action_raw.sub_type = + MLX5_IB_FLOW_ACTION_PACKET_REFORMAT; + maction->flow_action_raw.dev = dev; + + return 0; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE); + struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata); + enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt; + enum mlx5_ib_uapi_flow_table_type ft_type; + struct mlx5_ib_flow_action *maction; + int ret; + + ret = uverbs_get_const(&ft_type, attrs, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE); + if (ret) + return ret; + + ret = uverbs_get_const(&dv_prt, attrs, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE); + if (ret) + return ret; + + if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type)) + return -EOPNOTSUPP; + + maction = kzalloc(sizeof(*maction), GFP_KERNEL); + if (!maction) + return -ENOMEM; + + if (dv_prt == + MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) { + maction->flow_action_raw.sub_type = + MLX5_IB_FLOW_ACTION_DECAP; + maction->flow_action_raw.dev = mdev; + } else { + void *in; + int len; + + in = uverbs_attr_get_alloced_ptr(attrs, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF); + if (IS_ERR(in)) { + ret = PTR_ERR(in); + goto free_maction; + } + + len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF); + + ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev, + maction, ft_type, dv_prt, in, len); + if (ret) + goto free_maction; + } + + uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev, + IB_FLOW_ACTION_UNSPECIFIED); + return 0; + +free_maction: + kfree(maction); + return ret; +} + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_CREATE_FLOW, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE, + UVERBS_OBJECT_FLOW, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE, + UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER, + MLX5_IB_OBJECT_FLOW_MATCHER, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP, + UVERBS_OBJECT_QP, + UVERBS_ACCESS_READ), + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_READ), + UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_READ, 1, + MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS, + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG, + UVERBS_ATTR_TYPE(u32), + UA_OPTIONAL), + UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_READ, 1, 1, + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET, + UVERBS_ATTR_MIN_SIZE(sizeof(u32)), + UA_OPTIONAL, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_FLAGS, + enum mlx5_ib_create_flow_flags, + UA_OPTIONAL)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + MLX5_IB_METHOD_DESTROY_FLOW, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE, + UVERBS_OBJECT_FLOW, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +ADD_UVERBS_METHODS(mlx5_ib_fs, + UVERBS_OBJECT_FLOW, + &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW), + &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, + UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES( + set_add_copy_action_in_auto)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE, + enum mlx5_ib_uapi_flow_table_type, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF, + UVERBS_ATTR_MIN_SIZE(1), + UA_ALLOC_AND_COPY, + UA_OPTIONAL), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE, + enum mlx5_ib_uapi_flow_action_packet_reformat_type, + UA_MANDATORY), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE, + enum mlx5_ib_uapi_flow_table_type, + UA_MANDATORY)); + +ADD_UVERBS_METHODS( + mlx5_ib_flow_actions, + UVERBS_OBJECT_FLOW_ACTION, + &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER), + &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_FLOW_MATCHER_CREATE, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE, + MLX5_IB_OBJECT_FLOW_MATCHER, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK, + UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)), + UA_MANDATORY), + UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE, + mlx5_ib_flow_type, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, + UVERBS_ATTR_TYPE(u8), + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, + enum ib_flow_flags, + UA_OPTIONAL), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE, + enum mlx5_ib_uapi_flow_table_type, + UA_OPTIONAL)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + MLX5_IB_METHOD_FLOW_MATCHER_DESTROY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE, + MLX5_IB_OBJECT_FLOW_MATCHER, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER, + UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup), + &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE), + &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY)); + +const struct uapi_definition mlx5_ib_flow_defs[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_FLOW_MATCHER), + UAPI_DEF_CHAIN_OBJ_TREE( + UVERBS_OBJECT_FLOW, + &mlx5_ib_fs), + UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, + &mlx5_ib_flow_actions), + {}, +}; + +static const struct ib_device_ops flow_ops = { + .create_flow = mlx5_ib_create_flow, + .destroy_flow = mlx5_ib_destroy_flow, + .destroy_flow_action = mlx5_ib_destroy_flow_action, +}; + +static const struct ib_device_ops flow_ipsec_ops = { + .create_flow_action_esp = mlx5_ib_create_flow_action_esp, + .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp, +}; + +int mlx5_ib_fs_init(struct mlx5_ib_dev *dev) +{ + dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL); + + if (!dev->flow_db) + return -ENOMEM; + + mutex_init(&dev->flow_db->lock); + + ib_set_device_ops(&dev->ib_dev, &flow_ops); + if (mlx5_accel_ipsec_device_caps(dev->mdev) & + MLX5_ACCEL_IPSEC_CAP_DEVICE) + ib_set_device_ops(&dev->ib_dev, &flow_ipsec_ops); + + return 0; +} diff --git a/drivers/infiniband/hw/mlx5/fs.h b/drivers/infiniband/hw/mlx5/fs.h new file mode 100644 index 000000000000..ad320adaf321 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/fs.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. + */ + +#ifndef _MLX5_IB_FS_H +#define _MLX5_IB_FS_H + +#include "mlx5_ib.h" + +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) +int mlx5_ib_fs_init(struct mlx5_ib_dev *dev); +#else +static inline int mlx5_ib_fs_init(struct mlx5_ib_dev *dev) +{ + dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL); + + if (!dev->flow_db) + return -ENOMEM; + + mutex_init(&dev->flow_db->lock); + return 0; +} +#endif +static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev) +{ + kfree(dev->flow_db); +} +#endif /* _MLX5_IB_FS_H */ diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 6f99ed03d88e..fbc45a5e76c5 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1,33 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. */ #include <linux/debugfs.h> @@ -59,10 +32,13 @@ #include "mlx5_ib.h" #include "ib_rep.h" #include "cmd.h" +#include "devx.h" +#include "fs.h" #include "srq.h" #include "qp.h" #include "wr.h" -#include <linux/mlx5/fs_helpers.h> +#include "restrack.h" +#include "counters.h" #include <linux/mlx5/accel.h> #include <rdma/uverbs_std_types.h> #include <rdma/mlx5_user_ioctl_verbs.h> @@ -311,9 +287,6 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev, *native_port_num = 1; port = &ibdev->port[ib_port_num - 1]; - if (!port) - return NULL; - spin_lock(&port->mp.mpi_lock); mpi = ibdev->port[ib_port_num - 1].mp.mpi; if (mpi && !mpi->unaffiliate) { @@ -1765,6 +1738,92 @@ static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn, mlx5_ib_disable_lb(dev, true, false); } +static int set_ucontext_resp(struct ib_ucontext *uctx, + struct mlx5_ib_alloc_ucontext_resp *resp) +{ + struct ib_device *ibdev = uctx->device; + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_ib_ucontext *context = to_mucontext(uctx); + struct mlx5_bfreg_info *bfregi = &context->bfregi; + int err; + + if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { + err = mlx5_cmd_dump_fill_mkey(dev->mdev, + &resp->dump_fill_mkey); + if (err) + return err; + resp->comp_mask |= + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY; + } + + resp->qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); + if (dev->wc_support) + resp->bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, + log_bf_reg_size); + resp->cache_line_size = cache_line_size(); + resp->max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); + resp->max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); + resp->max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); + resp->max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); + resp->max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); + resp->cqe_version = context->cqe_version; + resp->log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ? + MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT; + resp->num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? + MLX5_CAP_GEN(dev->mdev, + num_of_uars_per_page) : 1; + + if (mlx5_accel_ipsec_device_caps(dev->mdev) & + MLX5_ACCEL_IPSEC_CAP_DEVICE) { + if (mlx5_get_flow_namespace(dev->mdev, + MLX5_FLOW_NAMESPACE_EGRESS)) + resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM; + if (mlx5_accel_ipsec_device_caps(dev->mdev) & + MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA) + resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA; + if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi)) + resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING; + if (mlx5_accel_ipsec_device_caps(dev->mdev) & + MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN) + resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN; + /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */ + } + + resp->tot_bfregs = bfregi->lib_uar_dyn ? 0 : + bfregi->total_num_bfregs - bfregi->num_dyn_bfregs; + resp->num_ports = dev->num_ports; + resp->cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE | + MLX5_USER_CMDS_SUPP_UHW_CREATE_AH; + + if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) { + mlx5_query_min_inline(dev->mdev, &resp->eth_min_inline); + resp->eth_min_inline++; + } + + if (dev->mdev->clock_info) + resp->clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1); + + /* + * We don't want to expose information from the PCI bar that is located + * after 4096 bytes, so if the arch only supports larger pages, let's + * pretend we don't support reading the HCA's core clock. This is also + * forced by mmap function. + */ + if (PAGE_SIZE <= 4096) { + resp->comp_mask |= + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET; + resp->hca_core_clock_offset = + offsetof(struct mlx5_init_seg, + internal_timer_h) % PAGE_SIZE; + } + + if (MLX5_CAP_GEN(dev->mdev, ece_support)) + resp->comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE; + + resp->num_dyn_bfregs = bfregi->num_dyn_bfregs; + return 0; +} + static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { @@ -1772,14 +1831,12 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_ib_alloc_ucontext_req_v2 req = {}; struct mlx5_ib_alloc_ucontext_resp resp = {}; - struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_ucontext *context = to_mucontext(uctx); struct mlx5_bfreg_info *bfregi; int ver; int err; size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2, max_cqe_version); - u32 dump_fill_mkey; bool lib_uar_4k; bool lib_uar_dyn; @@ -1808,37 +1865,6 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, if (req.num_low_latency_bfregs > req.total_num_bfregs - 1) return -EINVAL; - resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); - if (dev->wc_support) - resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); - resp.cache_line_size = cache_line_size(); - resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); - resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); - resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); - resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); - resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); - resp.cqe_version = min_t(__u8, - (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version), - req.max_cqe_version); - resp.log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ? - MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT; - resp.num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? - MLX5_CAP_GEN(dev->mdev, num_of_uars_per_page) : 1; - resp.response_length = min(offsetof(typeof(resp), response_length) + - sizeof(resp.response_length), udata->outlen); - - if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE) { - if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_EGRESS)) - resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM; - if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA) - resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA; - if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi)) - resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING; - if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN) - resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN; - /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */ - } - lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR; lib_uar_dyn = req.lib_caps & MLX5_LIB_CAP_DYN_UAR; bfregi = &context->bfregi; @@ -1887,87 +1913,24 @@ uar_done: if (err) goto out_devx; - if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { - err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey); - if (err) - goto out_mdev; - } - INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); - resp.tot_bfregs = lib_uar_dyn ? 0 : req.total_num_bfregs; - resp.num_ports = dev->num_ports; - - if (offsetofend(typeof(resp), cqe_version) <= udata->outlen) - resp.response_length += sizeof(resp.cqe_version); - - if (offsetofend(typeof(resp), cmds_supp_uhw) <= udata->outlen) { - resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE | - MLX5_USER_CMDS_SUPP_UHW_CREATE_AH; - resp.response_length += sizeof(resp.cmds_supp_uhw); - } - - if (offsetofend(typeof(resp), eth_min_inline) <= udata->outlen) { - if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) { - mlx5_query_min_inline(dev->mdev, &resp.eth_min_inline); - resp.eth_min_inline++; - } - resp.response_length += sizeof(resp.eth_min_inline); - } - - if (offsetofend(typeof(resp), clock_info_versions) <= udata->outlen) { - if (mdev->clock_info) - resp.clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1); - resp.response_length += sizeof(resp.clock_info_versions); - } - - /* - * We don't want to expose information from the PCI bar that is located - * after 4096 bytes, so if the arch only supports larger pages, let's - * pretend we don't support reading the HCA's core clock. This is also - * forced by mmap function. - */ - if (offsetofend(typeof(resp), hca_core_clock_offset) <= udata->outlen) { - if (PAGE_SIZE <= 4096) { - resp.comp_mask |= - MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET; - resp.hca_core_clock_offset = - offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE; - } - resp.response_length += sizeof(resp.hca_core_clock_offset); - } - - if (offsetofend(typeof(resp), log_uar_size) <= udata->outlen) - resp.response_length += sizeof(resp.log_uar_size); - - if (offsetofend(typeof(resp), num_uars_per_page) <= udata->outlen) - resp.response_length += sizeof(resp.num_uars_per_page); - - if (offsetofend(typeof(resp), num_dyn_bfregs) <= udata->outlen) { - resp.num_dyn_bfregs = bfregi->num_dyn_bfregs; - resp.response_length += sizeof(resp.num_dyn_bfregs); - } - - if (offsetofend(typeof(resp), dump_fill_mkey) <= udata->outlen) { - if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { - resp.dump_fill_mkey = dump_fill_mkey; - resp.comp_mask |= - MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY; - } - resp.response_length += sizeof(resp.dump_fill_mkey); - } + context->cqe_version = min_t(__u8, + (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version), + req.max_cqe_version); - if (MLX5_CAP_GEN(dev->mdev, ece_support)) - resp.comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE; + err = set_ucontext_resp(uctx, &resp); + if (err) + goto out_mdev; + resp.response_length = min(udata->outlen, sizeof(resp)); err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) goto out_mdev; bfregi->ver = ver; bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs; - context->cqe_version = resp.cqe_version; context->lib_caps = req.lib_caps; print_lib_caps(dev, context->lib_caps); @@ -2000,6 +1963,29 @@ out_ctx: return err; } +static int mlx5_ib_query_ucontext(struct ib_ucontext *ibcontext, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_alloc_ucontext_resp uctx_resp = {}; + int ret; + + ret = set_ucontext_resp(ibcontext, &uctx_resp); + if (ret) + return ret; + + uctx_resp.response_length = + min_t(size_t, + uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX), + sizeof(uctx_resp)); + + ret = uverbs_copy_to_struct_or_zero(attrs, + MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX, + &uctx_resp, + sizeof(uctx_resp)); + return ret; +} + static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) { struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); @@ -2591,1820 +2577,6 @@ static void mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid); } -enum { - MATCH_CRITERIA_ENABLE_OUTER_BIT, - MATCH_CRITERIA_ENABLE_MISC_BIT, - MATCH_CRITERIA_ENABLE_INNER_BIT, - MATCH_CRITERIA_ENABLE_MISC2_BIT -}; - -#define HEADER_IS_ZERO(match_criteria, headers) \ - !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \ - 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \ - -static u8 get_match_criteria_enable(u32 *match_criteria) -{ - u8 match_criteria_enable; - - match_criteria_enable = - (!HEADER_IS_ZERO(match_criteria, outer_headers)) << - MATCH_CRITERIA_ENABLE_OUTER_BIT; - match_criteria_enable |= - (!HEADER_IS_ZERO(match_criteria, misc_parameters)) << - MATCH_CRITERIA_ENABLE_MISC_BIT; - match_criteria_enable |= - (!HEADER_IS_ZERO(match_criteria, inner_headers)) << - MATCH_CRITERIA_ENABLE_INNER_BIT; - match_criteria_enable |= - (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) << - MATCH_CRITERIA_ENABLE_MISC2_BIT; - - return match_criteria_enable; -} - -static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) -{ - u8 entry_mask; - u8 entry_val; - int err = 0; - - if (!mask) - goto out; - - entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c, - ip_protocol); - entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v, - ip_protocol); - if (!entry_mask) { - MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask); - MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); - goto out; - } - /* Don't override existing ip protocol */ - if (mask != entry_mask || val != entry_val) - err = -EINVAL; -out: - return err; -} - -static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val, - bool inner) -{ - if (inner) { - MLX5_SET(fte_match_set_misc, - misc_c, inner_ipv6_flow_label, mask); - MLX5_SET(fte_match_set_misc, - misc_v, inner_ipv6_flow_label, val); - } else { - MLX5_SET(fte_match_set_misc, - misc_c, outer_ipv6_flow_label, mask); - MLX5_SET(fte_match_set_misc, - misc_v, outer_ipv6_flow_label, val); - } -} - -static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) -{ - MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask); - MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val); - MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2); - MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2); -} - -static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask) -{ - if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) && - !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL)) - return -EOPNOTSUPP; - - if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) && - !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP)) - return -EOPNOTSUPP; - - if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) && - !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS)) - return -EOPNOTSUPP; - - if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) && - !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL)) - return -EOPNOTSUPP; - - return 0; -} - -#define LAST_ETH_FIELD vlan_tag -#define LAST_IB_FIELD sl -#define LAST_IPV4_FIELD tos -#define LAST_IPV6_FIELD traffic_class -#define LAST_TCP_UDP_FIELD src_port -#define LAST_TUNNEL_FIELD tunnel_id -#define LAST_FLOW_TAG_FIELD tag_id -#define LAST_DROP_FIELD size -#define LAST_COUNTERS_FIELD counters - -/* Field is the last supported field */ -#define FIELDS_NOT_SUPPORTED(filter, field)\ - memchr_inv((void *)&filter.field +\ - sizeof(filter.field), 0,\ - sizeof(filter) -\ - offsetof(typeof(filter), field) -\ - sizeof(filter.field)) - -int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, - bool is_egress, - struct mlx5_flow_act *action) -{ - - switch (maction->ib_action.type) { - case IB_FLOW_ACTION_ESP: - if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT)) - return -EINVAL; - /* Currently only AES_GCM keymat is supported by the driver */ - action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx; - action->action |= is_egress ? - MLX5_FLOW_CONTEXT_ACTION_ENCRYPT : - MLX5_FLOW_CONTEXT_ACTION_DECRYPT; - return 0; - case IB_FLOW_ACTION_UNSPECIFIED: - if (maction->flow_action_raw.sub_type == - MLX5_IB_FLOW_ACTION_MODIFY_HEADER) { - if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - return -EINVAL; - action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - action->modify_hdr = - maction->flow_action_raw.modify_hdr; - return 0; - } - if (maction->flow_action_raw.sub_type == - MLX5_IB_FLOW_ACTION_DECAP) { - if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) - return -EINVAL; - action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; - return 0; - } - if (maction->flow_action_raw.sub_type == - MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) { - if (action->action & - MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) - return -EINVAL; - action->action |= - MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; - action->pkt_reformat = - maction->flow_action_raw.pkt_reformat; - return 0; - } - /* fall through */ - default: - return -EOPNOTSUPP; - } -} - -static int parse_flow_attr(struct mlx5_core_dev *mdev, - struct mlx5_flow_spec *spec, - const union ib_flow_spec *ib_spec, - const struct ib_flow_attr *flow_attr, - struct mlx5_flow_act *action, u32 prev_type) -{ - struct mlx5_flow_context *flow_context = &spec->flow_context; - u32 *match_c = spec->match_criteria; - u32 *match_v = spec->match_value; - void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, - misc_parameters); - void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v, - misc_parameters); - void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c, - misc_parameters_2); - void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v, - misc_parameters_2); - void *headers_c; - void *headers_v; - int match_ipv; - int ret; - - if (ib_spec->type & IB_FLOW_SPEC_INNER) { - headers_c = MLX5_ADDR_OF(fte_match_param, match_c, - inner_headers); - headers_v = MLX5_ADDR_OF(fte_match_param, match_v, - inner_headers); - match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.inner_ip_version); - } else { - headers_c = MLX5_ADDR_OF(fte_match_param, match_c, - outer_headers); - headers_v = MLX5_ADDR_OF(fte_match_param, match_v, - outer_headers); - match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.outer_ip_version); - } - - switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) { - case IB_FLOW_SPEC_ETH: - if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD)) - return -EOPNOTSUPP; - - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - dmac_47_16), - ib_spec->eth.mask.dst_mac); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dmac_47_16), - ib_spec->eth.val.dst_mac); - - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - smac_47_16), - ib_spec->eth.mask.src_mac); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - smac_47_16), - ib_spec->eth.val.src_mac); - - if (ib_spec->eth.mask.vlan_tag) { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - cvlan_tag, 1); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - cvlan_tag, 1); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - first_vid, ntohs(ib_spec->eth.mask.vlan_tag)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - first_vid, ntohs(ib_spec->eth.val.vlan_tag)); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - first_cfi, - ntohs(ib_spec->eth.mask.vlan_tag) >> 12); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - first_cfi, - ntohs(ib_spec->eth.val.vlan_tag) >> 12); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - first_prio, - ntohs(ib_spec->eth.mask.vlan_tag) >> 13); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - first_prio, - ntohs(ib_spec->eth.val.vlan_tag) >> 13); - } - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - ethertype, ntohs(ib_spec->eth.mask.ether_type)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ethertype, ntohs(ib_spec->eth.val.ether_type)); - break; - case IB_FLOW_SPEC_IPV4: - if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD)) - return -EOPNOTSUPP; - - if (match_ipv) { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - ip_version, 0xf); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ip_version, MLX5_FS_IPV4_VERSION); - } else { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - ethertype, 0xffff); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ethertype, ETH_P_IP); - } - - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - src_ipv4_src_ipv6.ipv4_layout.ipv4), - &ib_spec->ipv4.mask.src_ip, - sizeof(ib_spec->ipv4.mask.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - src_ipv4_src_ipv6.ipv4_layout.ipv4), - &ib_spec->ipv4.val.src_ip, - sizeof(ib_spec->ipv4.val.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4), - &ib_spec->ipv4.mask.dst_ip, - sizeof(ib_spec->ipv4.mask.dst_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4), - &ib_spec->ipv4.val.dst_ip, - sizeof(ib_spec->ipv4.val.dst_ip)); - - set_tos(headers_c, headers_v, - ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos); - - if (set_proto(headers_c, headers_v, - ib_spec->ipv4.mask.proto, - ib_spec->ipv4.val.proto)) - return -EINVAL; - break; - case IB_FLOW_SPEC_IPV6: - if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD)) - return -EOPNOTSUPP; - - if (match_ipv) { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - ip_version, 0xf); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ip_version, MLX5_FS_IPV6_VERSION); - } else { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - ethertype, 0xffff); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ethertype, ETH_P_IPV6); - } - - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - src_ipv4_src_ipv6.ipv6_layout.ipv6), - &ib_spec->ipv6.mask.src_ip, - sizeof(ib_spec->ipv6.mask.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - src_ipv4_src_ipv6.ipv6_layout.ipv6), - &ib_spec->ipv6.val.src_ip, - sizeof(ib_spec->ipv6.val.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &ib_spec->ipv6.mask.dst_ip, - sizeof(ib_spec->ipv6.mask.dst_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &ib_spec->ipv6.val.dst_ip, - sizeof(ib_spec->ipv6.val.dst_ip)); - - set_tos(headers_c, headers_v, - ib_spec->ipv6.mask.traffic_class, - ib_spec->ipv6.val.traffic_class); - - if (set_proto(headers_c, headers_v, - ib_spec->ipv6.mask.next_hdr, - ib_spec->ipv6.val.next_hdr)) - return -EINVAL; - - set_flow_label(misc_params_c, misc_params_v, - ntohl(ib_spec->ipv6.mask.flow_label), - ntohl(ib_spec->ipv6.val.flow_label), - ib_spec->type & IB_FLOW_SPEC_INNER); - break; - case IB_FLOW_SPEC_ESP: - if (ib_spec->esp.mask.seq) - return -EOPNOTSUPP; - - MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, - ntohl(ib_spec->esp.mask.spi)); - MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, - ntohl(ib_spec->esp.val.spi)); - break; - case IB_FLOW_SPEC_TCP: - if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, - LAST_TCP_UDP_FIELD)) - return -EOPNOTSUPP; - - if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP)) - return -EINVAL; - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport, - ntohs(ib_spec->tcp_udp.mask.src_port)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport, - ntohs(ib_spec->tcp_udp.val.src_port)); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport, - ntohs(ib_spec->tcp_udp.mask.dst_port)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport, - ntohs(ib_spec->tcp_udp.val.dst_port)); - break; - case IB_FLOW_SPEC_UDP: - if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, - LAST_TCP_UDP_FIELD)) - return -EOPNOTSUPP; - - if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP)) - return -EINVAL; - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, - ntohs(ib_spec->tcp_udp.mask.src_port)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, - ntohs(ib_spec->tcp_udp.val.src_port)); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, - ntohs(ib_spec->tcp_udp.mask.dst_port)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, - ntohs(ib_spec->tcp_udp.val.dst_port)); - break; - case IB_FLOW_SPEC_GRE: - if (ib_spec->gre.mask.c_ks_res0_ver) - return -EOPNOTSUPP; - - if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE)) - return -EINVAL; - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, - 0xff); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, - IPPROTO_GRE); - - MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol, - ntohs(ib_spec->gre.mask.protocol)); - MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol, - ntohs(ib_spec->gre.val.protocol)); - - memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c, - gre_key.nvgre.hi), - &ib_spec->gre.mask.key, - sizeof(ib_spec->gre.mask.key)); - memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v, - gre_key.nvgre.hi), - &ib_spec->gre.val.key, - sizeof(ib_spec->gre.val.key)); - break; - case IB_FLOW_SPEC_MPLS: - switch (prev_type) { - case IB_FLOW_SPEC_UDP: - if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.outer_first_mpls_over_udp), - &ib_spec->mpls.mask.tag)) - return -EOPNOTSUPP; - - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, - outer_first_mpls_over_udp), - &ib_spec->mpls.val.tag, - sizeof(ib_spec->mpls.val.tag)); - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, - outer_first_mpls_over_udp), - &ib_spec->mpls.mask.tag, - sizeof(ib_spec->mpls.mask.tag)); - break; - case IB_FLOW_SPEC_GRE: - if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.outer_first_mpls_over_gre), - &ib_spec->mpls.mask.tag)) - return -EOPNOTSUPP; - - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, - outer_first_mpls_over_gre), - &ib_spec->mpls.val.tag, - sizeof(ib_spec->mpls.val.tag)); - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, - outer_first_mpls_over_gre), - &ib_spec->mpls.mask.tag, - sizeof(ib_spec->mpls.mask.tag)); - break; - default: - if (ib_spec->type & IB_FLOW_SPEC_INNER) { - if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.inner_first_mpls), - &ib_spec->mpls.mask.tag)) - return -EOPNOTSUPP; - - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, - inner_first_mpls), - &ib_spec->mpls.val.tag, - sizeof(ib_spec->mpls.val.tag)); - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, - inner_first_mpls), - &ib_spec->mpls.mask.tag, - sizeof(ib_spec->mpls.mask.tag)); - } else { - if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.outer_first_mpls), - &ib_spec->mpls.mask.tag)) - return -EOPNOTSUPP; - - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, - outer_first_mpls), - &ib_spec->mpls.val.tag, - sizeof(ib_spec->mpls.val.tag)); - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, - outer_first_mpls), - &ib_spec->mpls.mask.tag, - sizeof(ib_spec->mpls.mask.tag)); - } - } - break; - case IB_FLOW_SPEC_VXLAN_TUNNEL: - if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask, - LAST_TUNNEL_FIELD)) - return -EOPNOTSUPP; - - MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni, - ntohl(ib_spec->tunnel.mask.tunnel_id)); - MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni, - ntohl(ib_spec->tunnel.val.tunnel_id)); - break; - case IB_FLOW_SPEC_ACTION_TAG: - if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag, - LAST_FLOW_TAG_FIELD)) - return -EOPNOTSUPP; - if (ib_spec->flow_tag.tag_id >= BIT(24)) - return -EINVAL; - - flow_context->flow_tag = ib_spec->flow_tag.tag_id; - flow_context->flags |= FLOW_CONTEXT_HAS_TAG; - break; - case IB_FLOW_SPEC_ACTION_DROP: - if (FIELDS_NOT_SUPPORTED(ib_spec->drop, - LAST_DROP_FIELD)) - return -EOPNOTSUPP; - action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; - break; - case IB_FLOW_SPEC_ACTION_HANDLE: - ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act), - flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action); - if (ret) - return ret; - break; - case IB_FLOW_SPEC_ACTION_COUNT: - if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count, - LAST_COUNTERS_FIELD)) - return -EOPNOTSUPP; - - /* for now support only one counters spec per flow */ - if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) - return -EINVAL; - - action->counters = ib_spec->flow_count.counters; - action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; - break; - default: - return -EINVAL; - } - - return 0; -} - -/* If a flow could catch both multicast and unicast packets, - * it won't fall into the multicast flow steering table and this rule - * could steal other multicast packets. - */ -static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr) -{ - union ib_flow_spec *flow_spec; - - if (ib_attr->type != IB_FLOW_ATTR_NORMAL || - ib_attr->num_of_specs < 1) - return false; - - flow_spec = (union ib_flow_spec *)(ib_attr + 1); - if (flow_spec->type == IB_FLOW_SPEC_IPV4) { - struct ib_flow_spec_ipv4 *ipv4_spec; - - ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec; - if (ipv4_is_multicast(ipv4_spec->val.dst_ip)) - return true; - - return false; - } - - if (flow_spec->type == IB_FLOW_SPEC_ETH) { - struct ib_flow_spec_eth *eth_spec; - - eth_spec = (struct ib_flow_spec_eth *)flow_spec; - return is_multicast_ether_addr(eth_spec->mask.dst_mac) && - is_multicast_ether_addr(eth_spec->val.dst_mac); - } - - return false; -} - -enum valid_spec { - VALID_SPEC_INVALID, - VALID_SPEC_VALID, - VALID_SPEC_NA, -}; - -static enum valid_spec -is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev, - const struct mlx5_flow_spec *spec, - const struct mlx5_flow_act *flow_act, - bool egress) -{ - const u32 *match_c = spec->match_criteria; - bool is_crypto = - (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT)); - bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c); - bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP; - - /* - * Currently only crypto is supported in egress, when regular egress - * rules would be supported, always return VALID_SPEC_NA. - */ - if (!is_crypto) - return VALID_SPEC_NA; - - return is_crypto && is_ipsec && - (!egress || (!is_drop && - !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ? - VALID_SPEC_VALID : VALID_SPEC_INVALID; -} - -static bool is_valid_spec(struct mlx5_core_dev *mdev, - const struct mlx5_flow_spec *spec, - const struct mlx5_flow_act *flow_act, - bool egress) -{ - /* We curretly only support ipsec egress flow */ - return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID; -} - -static bool is_valid_ethertype(struct mlx5_core_dev *mdev, - const struct ib_flow_attr *flow_attr, - bool check_inner) -{ - union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1); - int match_ipv = check_inner ? - MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.inner_ip_version) : - MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.outer_ip_version); - int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0; - bool ipv4_spec_valid, ipv6_spec_valid; - unsigned int ip_spec_type = 0; - bool has_ethertype = false; - unsigned int spec_index; - bool mask_valid = true; - u16 eth_type = 0; - bool type_valid; - - /* Validate that ethertype is correct */ - for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { - if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) && - ib_spec->eth.mask.ether_type) { - mask_valid = (ib_spec->eth.mask.ether_type == - htons(0xffff)); - has_ethertype = true; - eth_type = ntohs(ib_spec->eth.val.ether_type); - } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) || - (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) { - ip_spec_type = ib_spec->type; - } - ib_spec = (void *)ib_spec + ib_spec->size; - } - - type_valid = (!has_ethertype) || (!ip_spec_type); - if (!type_valid && mask_valid) { - ipv4_spec_valid = (eth_type == ETH_P_IP) && - (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit)); - ipv6_spec_valid = (eth_type == ETH_P_IPV6) && - (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit)); - - type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) || - (((eth_type == ETH_P_MPLS_UC) || - (eth_type == ETH_P_MPLS_MC)) && match_ipv); - } - - return type_valid; -} - -static bool is_valid_attr(struct mlx5_core_dev *mdev, - const struct ib_flow_attr *flow_attr) -{ - return is_valid_ethertype(mdev, flow_attr, false) && - is_valid_ethertype(mdev, flow_attr, true); -} - -static void put_flow_table(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *prio, bool ft_added) -{ - prio->refcount -= !!ft_added; - if (!prio->refcount) { - mlx5_destroy_flow_table(prio->flow_table); - prio->flow_table = NULL; - } -} - -static void counters_clear_description(struct ib_counters *counters) -{ - struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); - - mutex_lock(&mcounters->mcntrs_mutex); - kfree(mcounters->counters_data); - mcounters->counters_data = NULL; - mcounters->cntrs_max_index = 0; - mutex_unlock(&mcounters->mcntrs_mutex); -} - -static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) -{ - struct mlx5_ib_flow_handler *handler = container_of(flow_id, - struct mlx5_ib_flow_handler, - ibflow); - struct mlx5_ib_flow_handler *iter, *tmp; - struct mlx5_ib_dev *dev = handler->dev; - - mutex_lock(&dev->flow_db->lock); - - list_for_each_entry_safe(iter, tmp, &handler->list, list) { - mlx5_del_flow_rules(iter->rule); - put_flow_table(dev, iter->prio, true); - list_del(&iter->list); - kfree(iter); - } - - mlx5_del_flow_rules(handler->rule); - put_flow_table(dev, handler->prio, true); - if (handler->ibcounters && - atomic_read(&handler->ibcounters->usecnt) == 1) - counters_clear_description(handler->ibcounters); - - mutex_unlock(&dev->flow_db->lock); - if (handler->flow_matcher) - atomic_dec(&handler->flow_matcher->usecnt); - kfree(handler); - - return 0; -} - -static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap) -{ - priority *= 2; - if (!dont_trap) - priority++; - return priority; -} - -enum flow_table_type { - MLX5_IB_FT_RX, - MLX5_IB_FT_TX -}; - -#define MLX5_FS_MAX_TYPES 6 -#define MLX5_FS_MAX_ENTRIES BIT(16) - -static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns, - struct mlx5_ib_flow_prio *prio, - int priority, - int num_entries, int num_groups, - u32 flags) -{ - struct mlx5_flow_table_attr ft_attr = {}; - struct mlx5_flow_table *ft; - - ft_attr.prio = priority; - ft_attr.max_fte = num_entries; - ft_attr.flags = flags; - ft_attr.autogroup.max_num_groups = num_groups; - ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); - if (IS_ERR(ft)) - return ERR_CAST(ft); - - prio->flow_table = ft; - prio->refcount = 0; - return prio; -} - -static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, - struct ib_flow_attr *flow_attr, - enum flow_table_type ft_type) -{ - bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP; - struct mlx5_flow_namespace *ns = NULL; - struct mlx5_ib_flow_prio *prio; - struct mlx5_flow_table *ft; - int max_table_size; - int num_entries; - int num_groups; - bool esw_encap; - u32 flags = 0; - int priority; - - max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - log_max_ft_size)); - esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != - DEVLINK_ESWITCH_ENCAP_MODE_NONE; - if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { - enum mlx5_flow_namespace_type fn_type; - - if (flow_is_multicast_only(flow_attr) && - !dont_trap) - priority = MLX5_IB_FLOW_MCAST_PRIO; - else - priority = ib_prio_to_core_prio(flow_attr->priority, - dont_trap); - if (ft_type == MLX5_IB_FT_RX) { - fn_type = MLX5_FLOW_NAMESPACE_BYPASS; - prio = &dev->flow_db->prios[priority]; - if (!dev->is_rep && !esw_encap && - MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap)) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; - if (!dev->is_rep && !esw_encap && - MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - reformat_l3_tunnel_to_l2)) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } else { - max_table_size = - BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, - log_max_ft_size)); - fn_type = MLX5_FLOW_NAMESPACE_EGRESS; - prio = &dev->flow_db->egress_prios[priority]; - if (!dev->is_rep && !esw_encap && - MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat)) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } - ns = mlx5_get_flow_namespace(dev->mdev, fn_type); - num_entries = MLX5_FS_MAX_ENTRIES; - num_groups = MLX5_FS_MAX_TYPES; - } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { - ns = mlx5_get_flow_namespace(dev->mdev, - MLX5_FLOW_NAMESPACE_LEFTOVERS); - build_leftovers_ft_param(&priority, - &num_entries, - &num_groups); - prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO]; - } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { - if (!MLX5_CAP_FLOWTABLE(dev->mdev, - allow_sniffer_and_nic_rx_shared_tir)) - return ERR_PTR(-ENOTSUPP); - - ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ? - MLX5_FLOW_NAMESPACE_SNIFFER_RX : - MLX5_FLOW_NAMESPACE_SNIFFER_TX); - - prio = &dev->flow_db->sniffer[ft_type]; - priority = 0; - num_entries = 1; - num_groups = 1; - } - - if (!ns) - return ERR_PTR(-ENOTSUPP); - - max_table_size = min_t(int, num_entries, max_table_size); - - ft = prio->flow_table; - if (!ft) - return _get_prio(ns, prio, priority, max_table_size, num_groups, - flags); - - return prio; -} - -static void set_underlay_qp(struct mlx5_ib_dev *dev, - struct mlx5_flow_spec *spec, - u32 underlay_qpn) -{ - void *misc_params_c = MLX5_ADDR_OF(fte_match_param, - spec->match_criteria, - misc_parameters); - void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters); - - if (underlay_qpn && - MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - ft_field_support.bth_dst_qp)) { - MLX5_SET(fte_match_set_misc, - misc_params_v, bth_dst_qp, underlay_qpn); - MLX5_SET(fte_match_set_misc, - misc_params_c, bth_dst_qp, 0xffffff); - } -} - -static int read_flow_counters(struct ib_device *ibdev, - struct mlx5_read_counters_attr *read_attr) -{ - struct mlx5_fc *fc = read_attr->hw_cntrs_hndl; - struct mlx5_ib_dev *dev = to_mdev(ibdev); - - return mlx5_fc_query(dev->mdev, fc, - &read_attr->out[IB_COUNTER_PACKETS], - &read_attr->out[IB_COUNTER_BYTES]); -} - -/* flow counters currently expose two counters packets and bytes */ -#define FLOW_COUNTERS_NUM 2 -static int counters_set_description(struct ib_counters *counters, - enum mlx5_ib_counters_type counters_type, - struct mlx5_ib_flow_counters_desc *desc_data, - u32 ncounters) -{ - struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); - u32 cntrs_max_index = 0; - int i; - - if (counters_type != MLX5_IB_COUNTERS_FLOW) - return -EINVAL; - - /* init the fields for the object */ - mcounters->type = counters_type; - mcounters->read_counters = read_flow_counters; - mcounters->counters_num = FLOW_COUNTERS_NUM; - mcounters->ncounters = ncounters; - /* each counter entry have both description and index pair */ - for (i = 0; i < ncounters; i++) { - if (desc_data[i].description > IB_COUNTER_BYTES) - return -EINVAL; - - if (cntrs_max_index <= desc_data[i].index) - cntrs_max_index = desc_data[i].index + 1; - } - - mutex_lock(&mcounters->mcntrs_mutex); - mcounters->counters_data = desc_data; - mcounters->cntrs_max_index = cntrs_max_index; - mutex_unlock(&mcounters->mcntrs_mutex); - - return 0; -} - -#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2)) -static int flow_counters_set_data(struct ib_counters *ibcounters, - struct mlx5_ib_create_flow *ucmd) -{ - struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters); - struct mlx5_ib_flow_counters_data *cntrs_data = NULL; - struct mlx5_ib_flow_counters_desc *desc_data = NULL; - bool hw_hndl = false; - int ret = 0; - - if (ucmd && ucmd->ncounters_data != 0) { - cntrs_data = ucmd->data; - if (cntrs_data->ncounters > MAX_COUNTERS_NUM) - return -EINVAL; - - desc_data = kcalloc(cntrs_data->ncounters, - sizeof(*desc_data), - GFP_KERNEL); - if (!desc_data) - return -ENOMEM; - - if (copy_from_user(desc_data, - u64_to_user_ptr(cntrs_data->counters_data), - sizeof(*desc_data) * cntrs_data->ncounters)) { - ret = -EFAULT; - goto free; - } - } - - if (!mcounters->hw_cntrs_hndl) { - mcounters->hw_cntrs_hndl = mlx5_fc_create( - to_mdev(ibcounters->device)->mdev, false); - if (IS_ERR(mcounters->hw_cntrs_hndl)) { - ret = PTR_ERR(mcounters->hw_cntrs_hndl); - goto free; - } - hw_hndl = true; - } - - if (desc_data) { - /* counters already bound to at least one flow */ - if (mcounters->cntrs_max_index) { - ret = -EINVAL; - goto free_hndl; - } - - ret = counters_set_description(ibcounters, - MLX5_IB_COUNTERS_FLOW, - desc_data, - cntrs_data->ncounters); - if (ret) - goto free_hndl; - - } else if (!mcounters->cntrs_max_index) { - /* counters not bound yet, must have udata passed */ - ret = -EINVAL; - goto free_hndl; - } - - return 0; - -free_hndl: - if (hw_hndl) { - mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev, - mcounters->hw_cntrs_hndl); - mcounters->hw_cntrs_hndl = NULL; - } -free: - kfree(desc_data); - return ret; -} - -static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev, - struct mlx5_flow_spec *spec, - struct mlx5_eswitch_rep *rep) -{ - struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; - void *misc; - - if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters_2); - - MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, - mlx5_eswitch_get_vport_metadata_for_match(esw, - rep->vport)); - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - misc_parameters_2); - - MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, - mlx5_eswitch_get_vport_metadata_mask()); - } else { - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters); - - MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport); - - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - misc_parameters); - - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); - } -} - -static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_prio, - const struct ib_flow_attr *flow_attr, - struct mlx5_flow_destination *dst, - u32 underlay_qpn, - struct mlx5_ib_create_flow *ucmd) -{ - struct mlx5_flow_table *ft = ft_prio->flow_table; - struct mlx5_ib_flow_handler *handler; - struct mlx5_flow_act flow_act = {}; - struct mlx5_flow_spec *spec; - struct mlx5_flow_destination dest_arr[2] = {}; - struct mlx5_flow_destination *rule_dst = dest_arr; - const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr); - unsigned int spec_index; - u32 prev_type = 0; - int err = 0; - int dest_num = 0; - bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; - - if (!is_valid_attr(dev->mdev, flow_attr)) - return ERR_PTR(-EINVAL); - - if (dev->is_rep && is_egress) - return ERR_PTR(-EINVAL); - - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - handler = kzalloc(sizeof(*handler), GFP_KERNEL); - if (!handler || !spec) { - err = -ENOMEM; - goto free; - } - - INIT_LIST_HEAD(&handler->list); - - for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { - err = parse_flow_attr(dev->mdev, spec, - ib_flow, flow_attr, &flow_act, - prev_type); - if (err < 0) - goto free; - - prev_type = ((union ib_flow_spec *)ib_flow)->type; - ib_flow += ((union ib_flow_spec *)ib_flow)->size; - } - - if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) { - memcpy(&dest_arr[0], dst, sizeof(*dst)); - dest_num++; - } - - if (!flow_is_multicast_only(flow_attr)) - set_underlay_qp(dev, spec, underlay_qpn); - - if (dev->is_rep) { - struct mlx5_eswitch_rep *rep; - - rep = dev->port[flow_attr->port - 1].rep; - if (!rep) { - err = -EINVAL; - goto free; - } - - mlx5_ib_set_rule_source_port(dev, spec, rep); - } - - spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); - - if (is_egress && - !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) { - err = -EINVAL; - goto free; - } - - if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { - struct mlx5_ib_mcounters *mcounters; - - err = flow_counters_set_data(flow_act.counters, ucmd); - if (err) - goto free; - - mcounters = to_mcounters(flow_act.counters); - handler->ibcounters = flow_act.counters; - dest_arr[dest_num].type = - MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest_arr[dest_num].counter_id = - mlx5_fc_id(mcounters->hw_cntrs_hndl); - dest_num++; - } - - if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) { - if (!dest_num) - rule_dst = NULL; - } else { - if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) - flow_act.action |= - MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; - if (is_egress) - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; - else if (dest_num) - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - } - - if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG) && - (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) { - mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n", - spec->flow_context.flow_tag, flow_attr->type); - err = -EINVAL; - goto free; - } - handler->rule = mlx5_add_flow_rules(ft, spec, - &flow_act, - rule_dst, dest_num); - - if (IS_ERR(handler->rule)) { - err = PTR_ERR(handler->rule); - goto free; - } - - ft_prio->refcount++; - handler->prio = ft_prio; - handler->dev = dev; - - ft_prio->flow_table = ft; -free: - if (err && handler) { - if (handler->ibcounters && - atomic_read(&handler->ibcounters->usecnt) == 1) - counters_clear_description(handler->ibcounters); - kfree(handler); - } - kvfree(spec); - return err ? ERR_PTR(err) : handler; -} - -static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_prio, - const struct ib_flow_attr *flow_attr, - struct mlx5_flow_destination *dst) -{ - return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL); -} - -enum { - LEFTOVERS_MC, - LEFTOVERS_UC, -}; - -static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_prio, - struct ib_flow_attr *flow_attr, - struct mlx5_flow_destination *dst) -{ - struct mlx5_ib_flow_handler *handler_ucast = NULL; - struct mlx5_ib_flow_handler *handler = NULL; - - static struct { - struct ib_flow_attr flow_attr; - struct ib_flow_spec_eth eth_flow; - } leftovers_specs[] = { - [LEFTOVERS_MC] = { - .flow_attr = { - .num_of_specs = 1, - .size = sizeof(leftovers_specs[0]) - }, - .eth_flow = { - .type = IB_FLOW_SPEC_ETH, - .size = sizeof(struct ib_flow_spec_eth), - .mask = {.dst_mac = {0x1} }, - .val = {.dst_mac = {0x1} } - } - }, - [LEFTOVERS_UC] = { - .flow_attr = { - .num_of_specs = 1, - .size = sizeof(leftovers_specs[0]) - }, - .eth_flow = { - .type = IB_FLOW_SPEC_ETH, - .size = sizeof(struct ib_flow_spec_eth), - .mask = {.dst_mac = {0x1} }, - .val = {.dst_mac = {} } - } - } - }; - - handler = create_flow_rule(dev, ft_prio, - &leftovers_specs[LEFTOVERS_MC].flow_attr, - dst); - if (!IS_ERR(handler) && - flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) { - handler_ucast = create_flow_rule(dev, ft_prio, - &leftovers_specs[LEFTOVERS_UC].flow_attr, - dst); - if (IS_ERR(handler_ucast)) { - mlx5_del_flow_rules(handler->rule); - ft_prio->refcount--; - kfree(handler); - handler = handler_ucast; - } else { - list_add(&handler_ucast->list, &handler->list); - } - } - - return handler; -} - -static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_rx, - struct mlx5_ib_flow_prio *ft_tx, - struct mlx5_flow_destination *dst) -{ - struct mlx5_ib_flow_handler *handler_rx; - struct mlx5_ib_flow_handler *handler_tx; - int err; - static const struct ib_flow_attr flow_attr = { - .num_of_specs = 0, - .size = sizeof(flow_attr) - }; - - handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst); - if (IS_ERR(handler_rx)) { - err = PTR_ERR(handler_rx); - goto err; - } - - handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst); - if (IS_ERR(handler_tx)) { - err = PTR_ERR(handler_tx); - goto err_tx; - } - - list_add(&handler_tx->list, &handler_rx->list); - - return handler_rx; - -err_tx: - mlx5_del_flow_rules(handler_rx->rule); - ft_rx->refcount--; - kfree(handler_rx); -err: - return ERR_PTR(err); -} - -static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, - struct ib_flow_attr *flow_attr, - int domain, - struct ib_udata *udata) -{ - struct mlx5_ib_dev *dev = to_mdev(qp->device); - struct mlx5_ib_qp *mqp = to_mqp(qp); - struct mlx5_ib_flow_handler *handler = NULL; - struct mlx5_flow_destination *dst = NULL; - struct mlx5_ib_flow_prio *ft_prio_tx = NULL; - struct mlx5_ib_flow_prio *ft_prio; - bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; - struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr; - size_t min_ucmd_sz, required_ucmd_sz; - int err; - int underlay_qpn; - - if (udata && udata->inlen) { - min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) + - sizeof(ucmd_hdr.reserved); - if (udata->inlen < min_ucmd_sz) - return ERR_PTR(-EOPNOTSUPP); - - err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz); - if (err) - return ERR_PTR(err); - - /* currently supports only one counters data */ - if (ucmd_hdr.ncounters_data > 1) - return ERR_PTR(-EINVAL); - - required_ucmd_sz = min_ucmd_sz + - sizeof(struct mlx5_ib_flow_counters_data) * - ucmd_hdr.ncounters_data; - if (udata->inlen > required_ucmd_sz && - !ib_is_udata_cleared(udata, required_ucmd_sz, - udata->inlen - required_ucmd_sz)) - return ERR_PTR(-EOPNOTSUPP); - - ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL); - if (!ucmd) - return ERR_PTR(-ENOMEM); - - err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz); - if (err) - goto free_ucmd; - } - - if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) { - err = -ENOMEM; - goto free_ucmd; - } - - if (domain != IB_FLOW_DOMAIN_USER || - flow_attr->port > dev->num_ports || - (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | - IB_FLOW_ATTR_FLAGS_EGRESS))) { - err = -EINVAL; - goto free_ucmd; - } - - if (is_egress && - (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) { - err = -EINVAL; - goto free_ucmd; - } - - dst = kzalloc(sizeof(*dst), GFP_KERNEL); - if (!dst) { - err = -ENOMEM; - goto free_ucmd; - } - - mutex_lock(&dev->flow_db->lock); - - ft_prio = get_flow_table(dev, flow_attr, - is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX); - if (IS_ERR(ft_prio)) { - err = PTR_ERR(ft_prio); - goto unlock; - } - if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { - ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX); - if (IS_ERR(ft_prio_tx)) { - err = PTR_ERR(ft_prio_tx); - ft_prio_tx = NULL; - goto destroy_ft; - } - } - - if (is_egress) { - dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT; - } else { - dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; - if (mqp->is_rss) - dst->tir_num = mqp->rss_qp.tirn; - else - dst->tir_num = mqp->raw_packet_qp.rq.tirn; - } - - if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { - underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ? - mqp->underlay_qpn : - 0; - handler = _create_flow_rule(dev, ft_prio, flow_attr, dst, - underlay_qpn, ucmd); - } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { - handler = create_leftovers_rule(dev, ft_prio, flow_attr, - dst); - } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { - handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst); - } else { - err = -EINVAL; - goto destroy_ft; - } - - if (IS_ERR(handler)) { - err = PTR_ERR(handler); - handler = NULL; - goto destroy_ft; - } - - mutex_unlock(&dev->flow_db->lock); - kfree(dst); - kfree(ucmd); - - return &handler->ibflow; - -destroy_ft: - put_flow_table(dev, ft_prio, false); - if (ft_prio_tx) - put_flow_table(dev, ft_prio_tx, false); -unlock: - mutex_unlock(&dev->flow_db->lock); - kfree(dst); -free_ucmd: - kfree(ucmd); - return ERR_PTR(err); -} - -static struct mlx5_ib_flow_prio * -_get_flow_table(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_matcher *fs_matcher, - bool mcast) -{ - struct mlx5_flow_namespace *ns = NULL; - struct mlx5_ib_flow_prio *prio = NULL; - int max_table_size = 0; - bool esw_encap; - u32 flags = 0; - int priority; - - if (mcast) - priority = MLX5_IB_FLOW_MCAST_PRIO; - else - priority = ib_prio_to_core_prio(fs_matcher->priority, false); - - esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != - DEVLINK_ESWITCH_ENCAP_MODE_NONE; - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { - max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - log_max_ft_size)); - if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; - if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - reformat_l3_tunnel_to_l2) && - !esw_encap) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) { - max_table_size = BIT( - MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size)); - if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) { - max_table_size = BIT( - MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size)); - if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; - if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) && - esw_encap) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - priority = FDB_BYPASS_PATH; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) { - max_table_size = - BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, - log_max_ft_size)); - priority = fs_matcher->priority; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) { - max_table_size = - BIT(MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, - log_max_ft_size)); - priority = fs_matcher->priority; - } - - max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES); - - ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type); - if (!ns) - return ERR_PTR(-ENOTSUPP); - - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) - prio = &dev->flow_db->prios[priority]; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) - prio = &dev->flow_db->egress_prios[priority]; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) - prio = &dev->flow_db->fdb; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) - prio = &dev->flow_db->rdma_rx[priority]; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) - prio = &dev->flow_db->rdma_tx[priority]; - - if (!prio) - return ERR_PTR(-EINVAL); - - if (prio->flow_table) - return prio; - - return _get_prio(ns, prio, priority, max_table_size, - MLX5_FS_MAX_TYPES, flags); -} - -static struct mlx5_ib_flow_handler * -_create_raw_flow_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_prio, - struct mlx5_flow_destination *dst, - struct mlx5_ib_flow_matcher *fs_matcher, - struct mlx5_flow_context *flow_context, - struct mlx5_flow_act *flow_act, - void *cmd_in, int inlen, - int dst_num) -{ - struct mlx5_ib_flow_handler *handler; - struct mlx5_flow_spec *spec; - struct mlx5_flow_table *ft = ft_prio->flow_table; - int err = 0; - - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - handler = kzalloc(sizeof(*handler), GFP_KERNEL); - if (!handler || !spec) { - err = -ENOMEM; - goto free; - } - - INIT_LIST_HEAD(&handler->list); - - memcpy(spec->match_value, cmd_in, inlen); - memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params, - fs_matcher->mask_len); - spec->match_criteria_enable = fs_matcher->match_criteria_enable; - spec->flow_context = *flow_context; - - handler->rule = mlx5_add_flow_rules(ft, spec, - flow_act, dst, dst_num); - - if (IS_ERR(handler->rule)) { - err = PTR_ERR(handler->rule); - goto free; - } - - ft_prio->refcount++; - handler->prio = ft_prio; - handler->dev = dev; - ft_prio->flow_table = ft; - -free: - if (err) - kfree(handler); - kvfree(spec); - return err ? ERR_PTR(err) : handler; -} - -static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher, - void *match_v) -{ - void *match_c; - void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4; - void *dmac, *dmac_mask; - void *ipv4, *ipv4_mask; - - if (!(fs_matcher->match_criteria_enable & - (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT))) - return false; - - match_c = fs_matcher->matcher_mask.match_params; - match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v, - outer_headers); - match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c, - outer_headers); - - dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4, - dmac_47_16); - dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4, - dmac_47_16); - - if (is_multicast_ether_addr(dmac) && - is_multicast_ether_addr(dmac_mask)) - return true; - - ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4); - - ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4); - - if (ipv4_is_multicast(*(__be32 *)(ipv4)) && - ipv4_is_multicast(*(__be32 *)(ipv4_mask))) - return true; - - return false; -} - -struct mlx5_ib_flow_handler * -mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_matcher *fs_matcher, - struct mlx5_flow_context *flow_context, - struct mlx5_flow_act *flow_act, - u32 counter_id, - void *cmd_in, int inlen, int dest_id, - int dest_type) -{ - struct mlx5_flow_destination *dst; - struct mlx5_ib_flow_prio *ft_prio; - struct mlx5_ib_flow_handler *handler; - int dst_num = 0; - bool mcast; - int err; - - if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL) - return ERR_PTR(-EOPNOTSUPP); - - if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO) - return ERR_PTR(-ENOMEM); - - dst = kcalloc(2, sizeof(*dst), GFP_KERNEL); - if (!dst) - return ERR_PTR(-ENOMEM); - - mcast = raw_fs_is_multicast(fs_matcher, cmd_in); - mutex_lock(&dev->flow_db->lock); - - ft_prio = _get_flow_table(dev, fs_matcher, mcast); - if (IS_ERR(ft_prio)) { - err = PTR_ERR(ft_prio); - goto unlock; - } - - if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) { - dst[dst_num].type = dest_type; - dst[dst_num++].tir_num = dest_id; - flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) { - dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; - dst[dst_num++].ft_num = dest_id; - flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_PORT) { - dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT; - flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; - } - - - if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { - dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dst[dst_num].counter_id = counter_id; - dst_num++; - } - - handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, - flow_context, flow_act, - cmd_in, inlen, dst_num); - - if (IS_ERR(handler)) { - err = PTR_ERR(handler); - goto destroy_ft; - } - - mutex_unlock(&dev->flow_db->lock); - atomic_inc(&fs_matcher->usecnt); - handler->flow_matcher = fs_matcher; - - kfree(dst); - - return handler; - -destroy_ft: - put_flow_table(dev, ft_prio, false); -unlock: - mutex_unlock(&dev->flow_db->lock); - kfree(dst); - - return ERR_PTR(err); -} - -static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags) -{ - u32 flags = 0; - - if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA) - flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA; - - return flags; -} - -#define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA -static struct ib_flow_action * -mlx5_ib_create_flow_action_esp(struct ib_device *device, - const struct ib_flow_action_attrs_esp *attr, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_dev *mdev = to_mdev(device); - struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm; - struct mlx5_accel_esp_xfrm_attrs accel_attrs = {}; - struct mlx5_ib_flow_action *action; - u64 action_flags; - u64 flags; - int err = 0; - - err = uverbs_get_flags64( - &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, - ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1)); - if (err) - return ERR_PTR(err); - - flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags); - - /* We current only support a subset of the standard features. Only a - * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn - * (with overlap). Full offload mode isn't supported. - */ - if (!attr->keymat || attr->replay || attr->encap || - attr->spi || attr->seq || attr->tfc_pad || - attr->hard_limit_pkts || - (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | - IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT))) - return ERR_PTR(-EOPNOTSUPP); - - if (attr->keymat->protocol != - IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM) - return ERR_PTR(-EOPNOTSUPP); - - aes_gcm = &attr->keymat->keymat.aes_gcm; - - if (aes_gcm->icv_len != 16 || - aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ) - return ERR_PTR(-EOPNOTSUPP); - - action = kmalloc(sizeof(*action), GFP_KERNEL); - if (!action) - return ERR_PTR(-ENOMEM); - - action->esp_aes_gcm.ib_flags = attr->flags; - memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key, - sizeof(accel_attrs.keymat.aes_gcm.aes_key)); - accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8; - memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt, - sizeof(accel_attrs.keymat.aes_gcm.salt)); - memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv, - sizeof(accel_attrs.keymat.aes_gcm.seq_iv)); - accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8; - accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ; - accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM; - - accel_attrs.esn = attr->esn; - if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) - accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED; - if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) - accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; - - if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT) - accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT; - - action->esp_aes_gcm.ctx = - mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags); - if (IS_ERR(action->esp_aes_gcm.ctx)) { - err = PTR_ERR(action->esp_aes_gcm.ctx); - goto err_parse; - } - - action->esp_aes_gcm.ib_flags = attr->flags; - - return &action->ib_action; - -err_parse: - kfree(action); - return ERR_PTR(err); -} - -static int -mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action, - const struct ib_flow_action_attrs_esp *attr, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_flow_action *maction = to_mflow_act(action); - struct mlx5_accel_esp_xfrm_attrs accel_attrs; - int err = 0; - - if (attr->keymat || attr->replay || attr->encap || - attr->spi || attr->seq || attr->tfc_pad || - attr->hard_limit_pkts || - (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | - IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS | - IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))) - return -EOPNOTSUPP; - - /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can - * be modified. - */ - if (!(maction->esp_aes_gcm.ib_flags & - IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) && - attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | - IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)) - return -EINVAL; - - memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs, - sizeof(accel_attrs)); - - accel_attrs.esn = attr->esn; - if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) - accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; - else - accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; - - err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx, - &accel_attrs); - if (err) - return err; - - maction->esp_aes_gcm.ib_flags &= - ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; - maction->esp_aes_gcm.ib_flags |= - attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; - - return 0; -} - -static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action) -{ - struct mlx5_ib_flow_action *maction = to_mflow_act(action); - - switch (action->type) { - case IB_FLOW_ACTION_ESP: - /* - * We only support aes_gcm by now, so we implicitly know this is - * the underline crypto. - */ - mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx); - break; - case IB_FLOW_ACTION_UNSPECIFIED: - mlx5_ib_destroy_flow_action_raw(maction); - break; - default: - WARN_ON(true); - break; - } - - kfree(maction); - return 0; -} - static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { struct mlx5_ib_dev *dev = to_mdev(ibqp->device); @@ -4848,137 +3020,6 @@ static int get_port_caps(struct mlx5_ib_dev *dev, u8 port) return __get_port_caps(dev, port); } -static void destroy_umrc_res(struct mlx5_ib_dev *dev) -{ - int err; - - err = mlx5_mr_cache_cleanup(dev); - if (err) - mlx5_ib_warn(dev, "mr cache cleanup failed\n"); - - if (dev->umrc.qp) - mlx5_ib_destroy_qp(dev->umrc.qp, NULL); - if (dev->umrc.cq) - ib_free_cq(dev->umrc.cq); - if (dev->umrc.pd) - ib_dealloc_pd(dev->umrc.pd); -} - -enum { - MAX_UMR_WR = 128, -}; - -static int create_umr_res(struct mlx5_ib_dev *dev) -{ - struct ib_qp_init_attr *init_attr = NULL; - struct ib_qp_attr *attr = NULL; - struct ib_pd *pd; - struct ib_cq *cq; - struct ib_qp *qp; - int ret; - - attr = kzalloc(sizeof(*attr), GFP_KERNEL); - init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL); - if (!attr || !init_attr) { - ret = -ENOMEM; - goto error_0; - } - - pd = ib_alloc_pd(&dev->ib_dev, 0); - if (IS_ERR(pd)) { - mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); - ret = PTR_ERR(pd); - goto error_0; - } - - cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); - if (IS_ERR(cq)) { - mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); - ret = PTR_ERR(cq); - goto error_2; - } - - init_attr->send_cq = cq; - init_attr->recv_cq = cq; - init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; - init_attr->cap.max_send_wr = MAX_UMR_WR; - init_attr->cap.max_send_sge = 1; - init_attr->qp_type = MLX5_IB_QPT_REG_UMR; - init_attr->port_num = 1; - qp = mlx5_ib_create_qp(pd, init_attr, NULL); - if (IS_ERR(qp)) { - mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n"); - ret = PTR_ERR(qp); - goto error_3; - } - qp->device = &dev->ib_dev; - qp->real_qp = qp; - qp->uobject = NULL; - qp->qp_type = MLX5_IB_QPT_REG_UMR; - qp->send_cq = init_attr->send_cq; - qp->recv_cq = init_attr->recv_cq; - - attr->qp_state = IB_QPS_INIT; - attr->port_num = 1; - ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX | - IB_QP_PORT, NULL); - if (ret) { - mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); - goto error_4; - } - - memset(attr, 0, sizeof(*attr)); - attr->qp_state = IB_QPS_RTR; - attr->path_mtu = IB_MTU_256; - - ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); - if (ret) { - mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n"); - goto error_4; - } - - memset(attr, 0, sizeof(*attr)); - attr->qp_state = IB_QPS_RTS; - ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); - if (ret) { - mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n"); - goto error_4; - } - - dev->umrc.qp = qp; - dev->umrc.cq = cq; - dev->umrc.pd = pd; - - sema_init(&dev->umrc.sem, MAX_UMR_WR); - ret = mlx5_mr_cache_init(dev); - if (ret) { - mlx5_ib_warn(dev, "mr cache init failed %d\n", ret); - goto error_4; - } - - kfree(attr); - kfree(init_attr); - - return 0; - -error_4: - mlx5_ib_destroy_qp(qp, NULL); - dev->umrc.qp = NULL; - -error_3: - ib_free_cq(cq); - dev->umrc.cq = NULL; - -error_2: - ib_dealloc_pd(pd); - dev->umrc.pd = NULL; - -error_0: - kfree(attr); - kfree(init_attr); - return ret; -} - static u8 mlx5_get_umr_fence(u8 umr_fence_cap) { switch (umr_fence_cap) { @@ -4991,18 +3032,20 @@ static u8 mlx5_get_umr_fence(u8 umr_fence_cap) } } -static int create_dev_resources(struct mlx5_ib_resources *devr) +static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev) { + struct mlx5_ib_resources *devr = &dev->devr; struct ib_srq_init_attr attr; - struct mlx5_ib_dev *dev; struct ib_device *ibdev; struct ib_cq_init_attr cq_attr = {.cqe = 1}; int port; int ret = 0; - dev = container_of(devr, struct mlx5_ib_dev, devr); ibdev = &dev->ib_dev; + if (!MLX5_CAP_GEN(dev->mdev, xrc)) + return -EOPNOTSUPP; + mutex_init(&devr->mutex); devr->p0 = rdma_zalloc_drv_obj(ibdev, ib_pd); @@ -5030,34 +3073,19 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) if (ret) goto err_create_cq; - devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL); - if (IS_ERR(devr->x0)) { - ret = PTR_ERR(devr->x0); + ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn0, 0); + if (ret) goto error2; - } - devr->x0->device = &dev->ib_dev; - devr->x0->inode = NULL; - atomic_set(&devr->x0->usecnt, 0); - mutex_init(&devr->x0->tgt_qp_mutex); - INIT_LIST_HEAD(&devr->x0->tgt_qp_list); - - devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL); - if (IS_ERR(devr->x1)) { - ret = PTR_ERR(devr->x1); + + ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn1, 0); + if (ret) goto error3; - } - devr->x1->device = &dev->ib_dev; - devr->x1->inode = NULL; - atomic_set(&devr->x1->usecnt, 0); - mutex_init(&devr->x1->tgt_qp_mutex); - INIT_LIST_HEAD(&devr->x1->tgt_qp_list); memset(&attr, 0, sizeof(attr)); attr.attr.max_sge = 1; attr.attr.max_wr = 1; attr.srq_type = IB_SRQT_XRC; attr.ext.cq = devr->c0; - attr.ext.xrc.xrcd = devr->x0; devr->s0 = rdma_zalloc_drv_obj(ibdev, ib_srq); if (!devr->s0) { @@ -5068,13 +3096,11 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) devr->s0->device = &dev->ib_dev; devr->s0->pd = devr->p0; devr->s0->srq_type = IB_SRQT_XRC; - devr->s0->ext.xrc.xrcd = devr->x0; devr->s0->ext.cq = devr->c0; ret = mlx5_ib_create_srq(devr->s0, &attr, NULL); if (ret) goto err_create; - atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt); atomic_inc(&devr->s0->ext.cq->usecnt); atomic_inc(&devr->p0->usecnt); atomic_set(&devr->s0->usecnt, 0); @@ -5116,9 +3142,9 @@ error5: err_create: kfree(devr->s0); error4: - mlx5_ib_dealloc_xrcd(devr->x1, NULL); + mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0); error3: - mlx5_ib_dealloc_xrcd(devr->x0, NULL); + mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0); error2: mlx5_ib_destroy_cq(devr->c0, NULL); err_create_cq: @@ -5130,16 +3156,17 @@ error0: return ret; } -static void destroy_dev_resources(struct mlx5_ib_resources *devr) +static void mlx5_ib_dev_res_cleanup(struct mlx5_ib_dev *dev) { + struct mlx5_ib_resources *devr = &dev->devr; int port; mlx5_ib_destroy_srq(devr->s1, NULL); kfree(devr->s1); mlx5_ib_destroy_srq(devr->s0, NULL); kfree(devr->s0); - mlx5_ib_dealloc_xrcd(devr->x0, NULL); - mlx5_ib_dealloc_xrcd(devr->x1, NULL); + mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0); + mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0); mlx5_ib_destroy_cq(devr->c0, NULL); kfree(devr->c0); mlx5_ib_dealloc_pd(devr->p0, NULL); @@ -5332,466 +3359,6 @@ static void mlx5_disable_eth(struct mlx5_ib_dev *dev) mlx5_nic_vport_disable_roce(dev->mdev); } -struct mlx5_ib_counter { - const char *name; - size_t offset; -}; - -#define INIT_Q_COUNTER(_name) \ - { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)} - -static const struct mlx5_ib_counter basic_q_cnts[] = { - INIT_Q_COUNTER(rx_write_requests), - INIT_Q_COUNTER(rx_read_requests), - INIT_Q_COUNTER(rx_atomic_requests), - INIT_Q_COUNTER(out_of_buffer), -}; - -static const struct mlx5_ib_counter out_of_seq_q_cnts[] = { - INIT_Q_COUNTER(out_of_sequence), -}; - -static const struct mlx5_ib_counter retrans_q_cnts[] = { - INIT_Q_COUNTER(duplicate_request), - INIT_Q_COUNTER(rnr_nak_retry_err), - INIT_Q_COUNTER(packet_seq_err), - INIT_Q_COUNTER(implied_nak_seq_err), - INIT_Q_COUNTER(local_ack_timeout_err), -}; - -#define INIT_CONG_COUNTER(_name) \ - { .name = #_name, .offset = \ - MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)} - -static const struct mlx5_ib_counter cong_cnts[] = { - INIT_CONG_COUNTER(rp_cnp_ignored), - INIT_CONG_COUNTER(rp_cnp_handled), - INIT_CONG_COUNTER(np_ecn_marked_roce_packets), - INIT_CONG_COUNTER(np_cnp_sent), -}; - -static const struct mlx5_ib_counter extended_err_cnts[] = { - INIT_Q_COUNTER(resp_local_length_error), - INIT_Q_COUNTER(resp_cqe_error), - INIT_Q_COUNTER(req_cqe_error), - INIT_Q_COUNTER(req_remote_invalid_request), - INIT_Q_COUNTER(req_remote_access_errors), - INIT_Q_COUNTER(resp_remote_access_errors), - INIT_Q_COUNTER(resp_cqe_flush_error), - INIT_Q_COUNTER(req_cqe_flush_error), -}; - -static const struct mlx5_ib_counter roce_accl_cnts[] = { - INIT_Q_COUNTER(roce_adp_retrans), - INIT_Q_COUNTER(roce_adp_retrans_to), - INIT_Q_COUNTER(roce_slow_restart), - INIT_Q_COUNTER(roce_slow_restart_cnps), - INIT_Q_COUNTER(roce_slow_restart_trans), -}; - -#define INIT_EXT_PPCNT_COUNTER(_name) \ - { .name = #_name, .offset = \ - MLX5_BYTE_OFF(ppcnt_reg, \ - counter_set.eth_extended_cntrs_grp_data_layout._name##_high)} - -static const struct mlx5_ib_counter ext_ppcnt_cnts[] = { - INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated), -}; - -static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev) -{ - return MLX5_ESWITCH_MANAGER(mdev) && - mlx5_ib_eswitch_mode(mdev->priv.eswitch) == - MLX5_ESWITCH_OFFLOADS; -} - -static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) -{ - u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; - int num_cnt_ports; - int i; - - num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; - - MLX5_SET(dealloc_q_counter_in, in, opcode, - MLX5_CMD_OP_DEALLOC_Q_COUNTER); - - for (i = 0; i < num_cnt_ports; i++) { - if (dev->port[i].cnts.set_id) { - MLX5_SET(dealloc_q_counter_in, in, counter_set_id, - dev->port[i].cnts.set_id); - mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in); - } - kfree(dev->port[i].cnts.names); - kfree(dev->port[i].cnts.offsets); - } -} - -static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, - struct mlx5_ib_counters *cnts) -{ - u32 num_counters; - - num_counters = ARRAY_SIZE(basic_q_cnts); - - if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) - num_counters += ARRAY_SIZE(out_of_seq_q_cnts); - - if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) - num_counters += ARRAY_SIZE(retrans_q_cnts); - - if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) - num_counters += ARRAY_SIZE(extended_err_cnts); - - if (MLX5_CAP_GEN(dev->mdev, roce_accl)) - num_counters += ARRAY_SIZE(roce_accl_cnts); - - cnts->num_q_counters = num_counters; - - if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { - cnts->num_cong_counters = ARRAY_SIZE(cong_cnts); - num_counters += ARRAY_SIZE(cong_cnts); - } - if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { - cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts); - num_counters += ARRAY_SIZE(ext_ppcnt_cnts); - } - cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL); - if (!cnts->names) - return -ENOMEM; - - cnts->offsets = kcalloc(num_counters, - sizeof(cnts->offsets), GFP_KERNEL); - if (!cnts->offsets) - goto err_names; - - return 0; - -err_names: - kfree(cnts->names); - cnts->names = NULL; - return -ENOMEM; -} - -static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, - const char **names, - size_t *offsets) -{ - int i; - int j = 0; - - for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) { - names[j] = basic_q_cnts[i].name; - offsets[j] = basic_q_cnts[i].offset; - } - - if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) { - for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) { - names[j] = out_of_seq_q_cnts[i].name; - offsets[j] = out_of_seq_q_cnts[i].offset; - } - } - - if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { - for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) { - names[j] = retrans_q_cnts[i].name; - offsets[j] = retrans_q_cnts[i].offset; - } - } - - if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) { - for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) { - names[j] = extended_err_cnts[i].name; - offsets[j] = extended_err_cnts[i].offset; - } - } - - if (MLX5_CAP_GEN(dev->mdev, roce_accl)) { - for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) { - names[j] = roce_accl_cnts[i].name; - offsets[j] = roce_accl_cnts[i].offset; - } - } - - if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { - for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { - names[j] = cong_cnts[i].name; - offsets[j] = cong_cnts[i].offset; - } - } - - if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { - for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) { - names[j] = ext_ppcnt_cnts[i].name; - offsets[j] = ext_ppcnt_cnts[i].offset; - } - } -} - -static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) -{ - u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; - u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; - int num_cnt_ports; - int err = 0; - int i; - bool is_shared; - - MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); - is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0; - num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; - - for (i = 0; i < num_cnt_ports; i++) { - err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts); - if (err) - goto err_alloc; - - mlx5_ib_fill_counters(dev, dev->port[i].cnts.names, - dev->port[i].cnts.offsets); - - MLX5_SET(alloc_q_counter_in, in, uid, - is_shared ? MLX5_SHARED_RESOURCE_UID : 0); - - err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out); - if (err) { - mlx5_ib_warn(dev, - "couldn't allocate queue counter for port %d, err %d\n", - i + 1, err); - goto err_alloc; - } - - dev->port[i].cnts.set_id = - MLX5_GET(alloc_q_counter_out, out, counter_set_id); - } - return 0; - -err_alloc: - mlx5_ib_dealloc_counters(dev); - return err; -} - -static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev, - u8 port_num) -{ - return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts : - &dev->port[port_num].cnts; -} - -/** - * mlx5_ib_get_counters_id - Returns counters id to use for device+port - * @dev: Pointer to mlx5 IB device - * @port_num: Zero based port number - * - * mlx5_ib_get_counters_id() Returns counters set id to use for given - * device port combination in switchdev and non switchdev mode of the - * parent device. - */ -u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num) -{ - const struct mlx5_ib_counters *cnts = get_counters(dev, port_num); - - return cnts->set_id; -} - -static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, - u8 port_num) -{ - struct mlx5_ib_dev *dev = to_mdev(ibdev); - const struct mlx5_ib_counters *cnts; - bool is_switchdev = is_mdev_switchdev_mode(dev->mdev); - - if ((is_switchdev && port_num) || (!is_switchdev && !port_num)) - return NULL; - - cnts = get_counters(dev, port_num - 1); - - return rdma_alloc_hw_stats_struct(cnts->names, - cnts->num_q_counters + - cnts->num_cong_counters + - cnts->num_ext_ppcnt_counters, - RDMA_HW_STATS_DEFAULT_LIFESPAN); -} - -static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev, - const struct mlx5_ib_counters *cnts, - struct rdma_hw_stats *stats, - u16 set_id) -{ - u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {}; - u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {}; - __be32 val; - int ret, i; - - MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); - MLX5_SET(query_q_counter_in, in, counter_set_id, set_id); - ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out); - if (ret) - return ret; - - for (i = 0; i < cnts->num_q_counters; i++) { - val = *(__be32 *)((void *)out + cnts->offsets[i]); - stats->value[i] = (u64)be32_to_cpu(val); - } - - return 0; -} - -static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev, - const struct mlx5_ib_counters *cnts, - struct rdma_hw_stats *stats) -{ - int offset = cnts->num_q_counters + cnts->num_cong_counters; - int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); - int ret, i; - void *out; - - out = kvzalloc(sz, GFP_KERNEL); - if (!out) - return -ENOMEM; - - ret = mlx5_cmd_query_ext_ppcnt_counters(dev->mdev, out); - if (ret) - goto free; - - for (i = 0; i < cnts->num_ext_ppcnt_counters; i++) - stats->value[i + offset] = - be64_to_cpup((__be64 *)(out + - cnts->offsets[i + offset])); -free: - kvfree(out); - return ret; -} - -static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, - struct rdma_hw_stats *stats, - u8 port_num, int index) -{ - struct mlx5_ib_dev *dev = to_mdev(ibdev); - const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1); - struct mlx5_core_dev *mdev; - int ret, num_counters; - u8 mdev_port_num; - - if (!stats) - return -EINVAL; - - num_counters = cnts->num_q_counters + - cnts->num_cong_counters + - cnts->num_ext_ppcnt_counters; - - /* q_counters are per IB device, query the master mdev */ - ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id); - if (ret) - return ret; - - if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { - ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats); - if (ret) - return ret; - } - - if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { - mdev = mlx5_ib_get_native_port_mdev(dev, port_num, - &mdev_port_num); - if (!mdev) { - /* If port is not affiliated yet, its in down state - * which doesn't have any counters yet, so it would be - * zero. So no need to read from the HCA. - */ - goto done; - } - ret = mlx5_lag_query_cong_counters(dev->mdev, - stats->value + - cnts->num_q_counters, - cnts->num_cong_counters, - cnts->offsets + - cnts->num_q_counters); - - mlx5_ib_put_native_port_mdev(dev, port_num); - if (ret) - return ret; - } - -done: - return num_counters; -} - -static struct rdma_hw_stats * -mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) -{ - struct mlx5_ib_dev *dev = to_mdev(counter->device); - const struct mlx5_ib_counters *cnts = - get_counters(dev, counter->port - 1); - - return rdma_alloc_hw_stats_struct(cnts->names, - cnts->num_q_counters + - cnts->num_cong_counters + - cnts->num_ext_ppcnt_counters, - RDMA_HW_STATS_DEFAULT_LIFESPAN); -} - -static int mlx5_ib_counter_update_stats(struct rdma_counter *counter) -{ - struct mlx5_ib_dev *dev = to_mdev(counter->device); - const struct mlx5_ib_counters *cnts = - get_counters(dev, counter->port - 1); - - return mlx5_ib_query_q_counters(dev->mdev, cnts, - counter->stats, counter->id); -} - -static int mlx5_ib_counter_dealloc(struct rdma_counter *counter) -{ - struct mlx5_ib_dev *dev = to_mdev(counter->device); - u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; - - if (!counter->id) - return 0; - - MLX5_SET(dealloc_q_counter_in, in, opcode, - MLX5_CMD_OP_DEALLOC_Q_COUNTER); - MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id); - return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in); -} - -static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, - struct ib_qp *qp) -{ - struct mlx5_ib_dev *dev = to_mdev(qp->device); - int err; - - if (!counter->id) { - u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; - u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; - - MLX5_SET(alloc_q_counter_in, in, opcode, - MLX5_CMD_OP_ALLOC_Q_COUNTER); - MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID); - err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out); - if (err) - return err; - counter->id = - MLX5_GET(alloc_q_counter_out, out, counter_set_id); - } - - err = mlx5_ib_qp_set_counter(qp, counter); - if (err) - goto fail_set_counter; - - return 0; - -fail_set_counter: - mlx5_ib_counter_dealloc(counter); - counter->id = 0; - - return err; -} - -static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp) -{ - return mlx5_ib_qp_set_counter(qp, NULL); -} - static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num, enum rdma_netdev_t type, struct rdma_netdev_alloc_params *params) @@ -5802,23 +3369,6 @@ static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num, return mlx5_rdma_rn_get_params(to_mdev(device)->mdev, device, params); } -static void delay_drop_debugfs_cleanup(struct mlx5_ib_dev *dev) -{ - if (!dev->delay_drop.dir_debugfs) - return; - debugfs_remove_recursive(dev->delay_drop.dir_debugfs); - dev->delay_drop.dir_debugfs = NULL; -} - -static void cancel_delay_drop(struct mlx5_ib_dev *dev) -{ - if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) - return; - - cancel_work_sync(&dev->delay_drop.delay_drop_work); - delay_drop_debugfs_cleanup(dev); -} - static ssize_t delay_drop_timeout_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) { @@ -5858,40 +3408,6 @@ static const struct file_operations fops_delay_drop_timeout = { .read = delay_drop_timeout_read, }; -static void delay_drop_debugfs_init(struct mlx5_ib_dev *dev) -{ - struct dentry *root; - - if (!mlx5_debugfs_root) - return; - - root = debugfs_create_dir("delay_drop", dev->mdev->priv.dbg_root); - dev->delay_drop.dir_debugfs = root; - - debugfs_create_atomic_t("num_timeout_events", 0400, root, - &dev->delay_drop.events_cnt); - debugfs_create_atomic_t("num_rqs", 0400, root, - &dev->delay_drop.rqs_cnt); - debugfs_create_file("timeout", 0600, root, &dev->delay_drop, - &fops_delay_drop_timeout); -} - -static void init_delay_drop(struct mlx5_ib_dev *dev) -{ - if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) - return; - - mutex_init(&dev->delay_drop.lock); - dev->delay_drop.dev = dev; - dev->delay_drop.activate = false; - dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000; - INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler); - atomic_set(&dev->delay_drop.rqs_cnt, 0); - atomic_set(&dev->delay_drop.events_cnt, 0); - - delay_drop_debugfs_init(dev); -} - static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, struct mlx5_ib_multiport_info *mpi) { @@ -6385,90 +3901,32 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE( UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, enum mlx5_ib_uapi_flow_action_flags)); +ADD_UVERBS_ATTRIBUTES_SIMPLE( + mlx5_ib_query_context, + UVERBS_OBJECT_DEVICE, + UVERBS_METHOD_QUERY_CONTEXT, + UVERBS_ATTR_PTR_OUT( + MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX, + UVERBS_ATTR_STRUCT(struct mlx5_ib_alloc_ucontext_resp, + dump_fill_mkey), + UA_MANDATORY)); + static const struct uapi_definition mlx5_ib_defs[] = { UAPI_DEF_CHAIN(mlx5_ib_devx_defs), UAPI_DEF_CHAIN(mlx5_ib_flow_defs), UAPI_DEF_CHAIN(mlx5_ib_qos_defs), + UAPI_DEF_CHAIN(mlx5_ib_std_types_defs), UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, &mlx5_ib_flow_action), UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm), + UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DEVICE, &mlx5_ib_query_context), UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR, UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)), UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_UAR), {} }; -static int mlx5_ib_read_counters(struct ib_counters *counters, - struct ib_counters_read_attr *read_attr, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); - struct mlx5_read_counters_attr mread_attr = {}; - struct mlx5_ib_flow_counters_desc *desc; - int ret, i; - - mutex_lock(&mcounters->mcntrs_mutex); - if (mcounters->cntrs_max_index > read_attr->ncounters) { - ret = -EINVAL; - goto err_bound; - } - - mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64), - GFP_KERNEL); - if (!mread_attr.out) { - ret = -ENOMEM; - goto err_bound; - } - - mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl; - mread_attr.flags = read_attr->flags; - ret = mcounters->read_counters(counters->device, &mread_attr); - if (ret) - goto err_read; - - /* do the pass over the counters data array to assign according to the - * descriptions and indexing pairs - */ - desc = mcounters->counters_data; - for (i = 0; i < mcounters->ncounters; i++) - read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description]; - -err_read: - kfree(mread_attr.out); -err_bound: - mutex_unlock(&mcounters->mcntrs_mutex); - return ret; -} - -static int mlx5_ib_destroy_counters(struct ib_counters *counters) -{ - struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); - - counters_clear_description(counters); - if (mcounters->hw_cntrs_hndl) - mlx5_fc_destroy(to_mdev(counters->device)->mdev, - mcounters->hw_cntrs_hndl); - - kfree(mcounters); - - return 0; -} - -static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_mcounters *mcounters; - - mcounters = kzalloc(sizeof(*mcounters), GFP_KERNEL); - if (!mcounters) - return ERR_PTR(-ENOMEM); - - mutex_init(&mcounters->mcntrs_mutex); - - return &mcounters->ibcntrs; -} - static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); @@ -6547,21 +4005,16 @@ err_mp: return -ENOMEM; } -static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_enable_driver(struct ib_device *dev) { - dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL); - - if (!dev->flow_db) - return -ENOMEM; - - mutex_init(&dev->flow_db->lock); + struct mlx5_ib_dev *mdev = to_mdev(dev); + int ret; - return 0; -} + ret = mlx5_ib_test_wc(mdev); + mlx5_ib_dbg(mdev, "Write-Combining %s", + mdev->wc_support ? "supported" : "not supported"); -static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev) -{ - kfree(dev->flow_db); + return ret; } static const struct ib_device_ops mlx5_ib_dev_ops = { @@ -6577,9 +4030,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .attach_mcast = mlx5_ib_mcg_attach, .check_mr_status = mlx5_ib_check_mr_status, .create_ah = mlx5_ib_create_ah, - .create_counters = mlx5_ib_create_counters, .create_cq = mlx5_ib_create_cq, - .create_flow = mlx5_ib_create_flow, .create_qp = mlx5_ib_create_qp, .create_srq = mlx5_ib_create_srq, .dealloc_pd = mlx5_ib_dealloc_pd, @@ -6587,10 +4038,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .del_gid = mlx5_ib_del_gid, .dereg_mr = mlx5_ib_dereg_mr, .destroy_ah = mlx5_ib_destroy_ah, - .destroy_counters = mlx5_ib_destroy_counters, .destroy_cq = mlx5_ib_destroy_cq, - .destroy_flow = mlx5_ib_destroy_flow, - .destroy_flow_action = mlx5_ib_destroy_flow_action, .destroy_qp = mlx5_ib_destroy_qp, .destroy_srq = mlx5_ib_destroy_srq, .detach_mcast = mlx5_ib_mcg_detach, @@ -6598,8 +4046,6 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .drain_rq = mlx5_ib_drain_rq, .drain_sq = mlx5_ib_drain_sq, .enable_driver = mlx5_ib_enable_driver, - .fill_res_entry = mlx5_ib_fill_res_entry, - .fill_stat_entry = mlx5_ib_fill_stat_entry, .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = mlx5_ib_get_dma_mr, .get_link_layer = mlx5_ib_port_link_layer, @@ -6623,24 +4069,20 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .query_pkey = mlx5_ib_query_pkey, .query_qp = mlx5_ib_query_qp, .query_srq = mlx5_ib_query_srq, - .read_counters = mlx5_ib_read_counters, + .query_ucontext = mlx5_ib_query_ucontext, .reg_user_mr = mlx5_ib_reg_user_mr, .req_notify_cq = mlx5_ib_arm_cq, .rereg_user_mr = mlx5_ib_rereg_user_mr, .resize_cq = mlx5_ib_resize_cq, INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah), + INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs), INIT_RDMA_OBJ_SIZE(ib_cq, mlx5_ib_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_srq, mlx5_ib_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx5_ib_ucontext, ibucontext), }; -static const struct ib_device_ops mlx5_ib_dev_flow_ipsec_ops = { - .create_flow_action_esp = mlx5_ib_create_flow_action_esp, - .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp, -}; - static const struct ib_device_ops mlx5_ib_dev_ipoib_enhanced_ops = { .rdma_netdev_get_params = mlx5_ib_rn_get_params, }; @@ -6661,6 +4103,8 @@ static const struct ib_device_ops mlx5_ib_dev_mw_ops = { static const struct ib_device_ops mlx5_ib_dev_xrc_ops = { .alloc_xrcd = mlx5_ib_alloc_xrcd, .dealloc_xrcd = mlx5_ib_dealloc_xrcd, + + INIT_RDMA_OBJ_SIZE(ib_xrcd, mlx5_ib_xrcd, ibxrcd), }; static const struct ib_device_ops mlx5_ib_dev_dm_ops = { @@ -6769,9 +4213,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM) ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dm_ops); - if (mlx5_accel_ipsec_device_caps(dev->mdev) & - MLX5_ACCEL_IPSEC_CAP_DEVICE) - ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_flow_ipsec_ops); ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_ops); if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)) @@ -6829,65 +4270,36 @@ static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = { .modify_wq = mlx5_ib_modify_wq, }; -static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev) -{ - u8 port_num; - - dev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); - ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops); - - port_num = mlx5_core_native_port_num(dev->mdev) - 1; - - /* Register only for native ports */ - return mlx5_add_netdev_notifier(dev, port_num); -} - -static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev) -{ - u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1; - - mlx5_remove_netdev_notifier(dev, port_num); -} - -static int mlx5_ib_stage_raw_eth_roce_init(struct mlx5_ib_dev *dev) -{ - struct mlx5_core_dev *mdev = dev->mdev; - enum rdma_link_layer ll; - int port_type_cap; - int err = 0; - - port_type_cap = MLX5_CAP_GEN(mdev, port_type); - ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); - - if (ll == IB_LINK_LAYER_ETHERNET) - err = mlx5_ib_stage_common_roce_init(dev); - - return err; -} - -static void mlx5_ib_stage_raw_eth_roce_cleanup(struct mlx5_ib_dev *dev) -{ - mlx5_ib_stage_common_roce_cleanup(dev); -} - -static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; int port_type_cap; + u8 port_num = 0; int err; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { - err = mlx5_ib_stage_common_roce_init(dev); - if (err) + dev->ib_dev.uverbs_ex_cmd_mask |= + (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops); + + port_num = mlx5_core_native_port_num(dev->mdev) - 1; + + /* Register only for native ports */ + err = mlx5_add_netdev_notifier(dev, port_num); + if (err || dev->is_rep || !mlx5_is_roce_enabled(mdev)) + /* + * We don't enable ETH interface for + * 1. IB representors + * 2. User disabled ROCE through devlink interface + */ return err; err = mlx5_enable_eth(dev); @@ -6897,71 +4309,27 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev) return 0; cleanup: - mlx5_ib_stage_common_roce_cleanup(dev); - + mlx5_remove_netdev_notifier(dev, port_num); return err; } -static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_roce_cleanup(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; int port_type_cap; + u8 port_num; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { - mlx5_disable_eth(dev); - mlx5_ib_stage_common_roce_cleanup(dev); - } -} - -static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev) -{ - return create_dev_resources(&dev->devr); -} + if (!dev->is_rep) + mlx5_disable_eth(dev); -static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev) -{ - destroy_dev_resources(&dev->devr); -} - -static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev) -{ - return mlx5_ib_odp_init_one(dev); -} - -static void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev) -{ - mlx5_ib_odp_cleanup_one(dev); -} - -static const struct ib_device_ops mlx5_ib_dev_hw_stats_ops = { - .alloc_hw_stats = mlx5_ib_alloc_hw_stats, - .get_hw_stats = mlx5_ib_get_hw_stats, - .counter_bind_qp = mlx5_ib_counter_bind_qp, - .counter_unbind_qp = mlx5_ib_counter_unbind_qp, - .counter_dealloc = mlx5_ib_counter_dealloc, - .counter_alloc_stats = mlx5_ib_counter_alloc_stats, - .counter_update_stats = mlx5_ib_counter_update_stats, -}; - -static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) -{ - if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) { - ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_hw_stats_ops); - - return mlx5_ib_alloc_counters(dev); + port_num = mlx5_core_native_port_num(dev->mdev) - 1; + mlx5_remove_netdev_notifier(dev, port_num); } - - return 0; -} - -static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev) -{ - if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) - mlx5_ib_dealloc_counters(dev); } static int mlx5_ib_stage_cong_debugfs_init(struct mlx5_ib_dev *dev) @@ -7023,7 +4391,18 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) { - destroy_umrc_res(dev); + int err; + + err = mlx5_mr_cache_cleanup(dev); + if (err) + mlx5_ib_warn(dev, "mr cache cleanup failed\n"); + + if (dev->umrc.qp) + mlx5_ib_destroy_qp(dev->umrc.qp, NULL); + if (dev->umrc.cq) + ib_free_cq(dev->umrc.cq); + if (dev->umrc.pd) + ib_dealloc_pd(dev->umrc.pd); } static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) @@ -7031,21 +4410,162 @@ static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) ib_unregister_device(&dev->ib_dev); } +enum { + MAX_UMR_WR = 128, +}; + static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev) { - return create_umr_res(dev); + struct ib_qp_init_attr *init_attr = NULL; + struct ib_qp_attr *attr = NULL; + struct ib_pd *pd; + struct ib_cq *cq; + struct ib_qp *qp; + int ret; + + attr = kzalloc(sizeof(*attr), GFP_KERNEL); + init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL); + if (!attr || !init_attr) { + ret = -ENOMEM; + goto error_0; + } + + pd = ib_alloc_pd(&dev->ib_dev, 0); + if (IS_ERR(pd)) { + mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); + ret = PTR_ERR(pd); + goto error_0; + } + + cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); + if (IS_ERR(cq)) { + mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); + ret = PTR_ERR(cq); + goto error_2; + } + + init_attr->send_cq = cq; + init_attr->recv_cq = cq; + init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; + init_attr->cap.max_send_wr = MAX_UMR_WR; + init_attr->cap.max_send_sge = 1; + init_attr->qp_type = MLX5_IB_QPT_REG_UMR; + init_attr->port_num = 1; + qp = mlx5_ib_create_qp(pd, init_attr, NULL); + if (IS_ERR(qp)) { + mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n"); + ret = PTR_ERR(qp); + goto error_3; + } + qp->device = &dev->ib_dev; + qp->real_qp = qp; + qp->uobject = NULL; + qp->qp_type = MLX5_IB_QPT_REG_UMR; + qp->send_cq = init_attr->send_cq; + qp->recv_cq = init_attr->recv_cq; + + attr->qp_state = IB_QPS_INIT; + attr->port_num = 1; + ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX | + IB_QP_PORT, NULL); + if (ret) { + mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); + goto error_4; + } + + memset(attr, 0, sizeof(*attr)); + attr->qp_state = IB_QPS_RTR; + attr->path_mtu = IB_MTU_256; + + ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); + if (ret) { + mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n"); + goto error_4; + } + + memset(attr, 0, sizeof(*attr)); + attr->qp_state = IB_QPS_RTS; + ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); + if (ret) { + mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n"); + goto error_4; + } + + dev->umrc.qp = qp; + dev->umrc.cq = cq; + dev->umrc.pd = pd; + + sema_init(&dev->umrc.sem, MAX_UMR_WR); + ret = mlx5_mr_cache_init(dev); + if (ret) { + mlx5_ib_warn(dev, "mr cache init failed %d\n", ret); + goto error_4; + } + + kfree(attr); + kfree(init_attr); + + return 0; + +error_4: + mlx5_ib_destroy_qp(qp, NULL); + dev->umrc.qp = NULL; + +error_3: + ib_free_cq(cq); + dev->umrc.cq = NULL; + +error_2: + ib_dealloc_pd(pd); + dev->umrc.pd = NULL; + +error_0: + kfree(attr); + kfree(init_attr); + return ret; } static int mlx5_ib_stage_delay_drop_init(struct mlx5_ib_dev *dev) { - init_delay_drop(dev); + struct dentry *root; + + if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) + return 0; + + mutex_init(&dev->delay_drop.lock); + dev->delay_drop.dev = dev; + dev->delay_drop.activate = false; + dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000; + INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler); + atomic_set(&dev->delay_drop.rqs_cnt, 0); + atomic_set(&dev->delay_drop.events_cnt, 0); + + if (!mlx5_debugfs_root) + return 0; + + root = debugfs_create_dir("delay_drop", dev->mdev->priv.dbg_root); + dev->delay_drop.dir_debugfs = root; + debugfs_create_atomic_t("num_timeout_events", 0400, root, + &dev->delay_drop.events_cnt); + debugfs_create_atomic_t("num_rqs", 0400, root, + &dev->delay_drop.rqs_cnt); + debugfs_create_file("timeout", 0600, root, &dev->delay_drop, + &fops_delay_drop_timeout); return 0; } static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev) { - cancel_delay_drop(dev); + if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) + return; + + cancel_work_sync(&dev->delay_drop.delay_drop_work); + if (!dev->delay_drop.dir_debugfs) + return; + + debugfs_remove_recursive(dev->delay_drop.dir_debugfs); + dev->delay_drop.dir_debugfs = NULL; } static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev) @@ -7060,38 +4580,6 @@ static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev) mlx5_notifier_unregister(dev->mdev, &dev->mdev_events); } -static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev) -{ - int uid; - - uid = mlx5_ib_devx_create(dev, false); - if (uid > 0) { - dev->devx_whitelist_uid = uid; - mlx5_ib_devx_init_event_table(dev); - } - - return 0; -} -static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev) -{ - if (dev->devx_whitelist_uid) { - mlx5_ib_devx_cleanup_event_table(dev); - mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid); - } -} - -int mlx5_ib_enable_driver(struct ib_device *dev) -{ - struct mlx5_ib_dev *mdev = to_mdev(dev); - int ret; - - ret = mlx5_ib_test_wc(mdev); - mlx5_ib_dbg(mdev, "Write-Combining %s", - mdev->wc_support ? "supported" : "not supported"); - - return ret; -} - void __mlx5_ib_remove(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile, int stage) @@ -7139,9 +4627,9 @@ static const struct mlx5_ib_profile pf_profile = { STAGE_CREATE(MLX5_IB_STAGE_INIT, mlx5_ib_stage_init_init, mlx5_ib_stage_init_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB, - mlx5_ib_stage_flow_db_init, - mlx5_ib_stage_flow_db_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_FS, + mlx5_ib_fs_init, + mlx5_ib_fs_cleanup), STAGE_CREATE(MLX5_IB_STAGE_CAPS, mlx5_ib_stage_caps_init, mlx5_ib_stage_caps_cleanup), @@ -7149,8 +4637,8 @@ static const struct mlx5_ib_profile pf_profile = { mlx5_ib_stage_non_default_cb, NULL), STAGE_CREATE(MLX5_IB_STAGE_ROCE, - mlx5_ib_stage_roce_init, - mlx5_ib_stage_roce_cleanup), + mlx5_ib_roce_init, + mlx5_ib_roce_cleanup), STAGE_CREATE(MLX5_IB_STAGE_QP, mlx5_init_qp_table, mlx5_cleanup_qp_table), @@ -7158,17 +4646,17 @@ static const struct mlx5_ib_profile pf_profile = { mlx5_init_srq_table, mlx5_cleanup_srq_table), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, - mlx5_ib_stage_dev_res_init, - mlx5_ib_stage_dev_res_cleanup), + mlx5_ib_dev_res_init, + mlx5_ib_dev_res_cleanup), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER, mlx5_ib_stage_dev_notifier_init, mlx5_ib_stage_dev_notifier_cleanup), STAGE_CREATE(MLX5_IB_STAGE_ODP, - mlx5_ib_stage_odp_init, - mlx5_ib_stage_odp_cleanup), + mlx5_ib_odp_init_one, + mlx5_ib_odp_cleanup_one), STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, - mlx5_ib_stage_counters_init, - mlx5_ib_stage_counters_cleanup), + mlx5_ib_counters_init, + mlx5_ib_counters_cleanup), STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS, mlx5_ib_stage_cong_debugfs_init, mlx5_ib_stage_cong_debugfs_cleanup), @@ -7182,8 +4670,8 @@ static const struct mlx5_ib_profile pf_profile = { NULL, mlx5_ib_stage_pre_ib_reg_umr_cleanup), STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID, - mlx5_ib_stage_devx_init, - mlx5_ib_stage_devx_cleanup), + mlx5_ib_devx_init, + mlx5_ib_devx_cleanup), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), @@ -7193,15 +4681,18 @@ static const struct mlx5_ib_profile pf_profile = { STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP, mlx5_ib_stage_delay_drop_init, mlx5_ib_stage_delay_drop_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_RESTRACK, + mlx5_ib_restrack_init, + NULL), }; const struct mlx5_ib_profile raw_eth_profile = { STAGE_CREATE(MLX5_IB_STAGE_INIT, mlx5_ib_stage_init_init, mlx5_ib_stage_init_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB, - mlx5_ib_stage_flow_db_init, - mlx5_ib_stage_flow_db_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_FS, + mlx5_ib_fs_init, + mlx5_ib_fs_cleanup), STAGE_CREATE(MLX5_IB_STAGE_CAPS, mlx5_ib_stage_caps_init, mlx5_ib_stage_caps_cleanup), @@ -7209,8 +4700,8 @@ const struct mlx5_ib_profile raw_eth_profile = { mlx5_ib_stage_raw_eth_non_default_cb, NULL), STAGE_CREATE(MLX5_IB_STAGE_ROCE, - mlx5_ib_stage_raw_eth_roce_init, - mlx5_ib_stage_raw_eth_roce_cleanup), + mlx5_ib_roce_init, + mlx5_ib_roce_cleanup), STAGE_CREATE(MLX5_IB_STAGE_QP, mlx5_init_qp_table, mlx5_cleanup_qp_table), @@ -7218,14 +4709,14 @@ const struct mlx5_ib_profile raw_eth_profile = { mlx5_init_srq_table, mlx5_cleanup_srq_table), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, - mlx5_ib_stage_dev_res_init, - mlx5_ib_stage_dev_res_cleanup), + mlx5_ib_dev_res_init, + mlx5_ib_dev_res_cleanup), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER, mlx5_ib_stage_dev_notifier_init, mlx5_ib_stage_dev_notifier_cleanup), STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, - mlx5_ib_stage_counters_init, - mlx5_ib_stage_counters_cleanup), + mlx5_ib_counters_init, + mlx5_ib_counters_cleanup), STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS, mlx5_ib_stage_cong_debugfs_init, mlx5_ib_stage_cong_debugfs_cleanup), @@ -7239,14 +4730,17 @@ const struct mlx5_ib_profile raw_eth_profile = { NULL, mlx5_ib_stage_pre_ib_reg_umr_cleanup), STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID, - mlx5_ib_stage_devx_init, - mlx5_ib_stage_devx_cleanup), + mlx5_ib_devx_init, + mlx5_ib_devx_cleanup), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR, mlx5_ib_stage_post_ib_reg_umr_init, NULL), + STAGE_CREATE(MLX5_IB_STAGE_RESTRACK, + mlx5_ib_restrack_init, + NULL), }; static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 5dbe3eb0d9cb..5287fc868662 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1,33 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. */ #ifndef MLX5_IB_H @@ -730,8 +703,8 @@ struct mlx5_ib_port_resources { struct mlx5_ib_resources { struct ib_cq *c0; - struct ib_xrcd *x0; - struct ib_xrcd *x1; + u32 xrcdn0; + u32 xrcdn1; struct ib_pd *p0; struct ib_srq *s0; struct ib_srq *s1; @@ -832,7 +805,7 @@ struct mlx5_ib_delay_drop { enum mlx5_ib_stages { MLX5_IB_STAGE_INIT, - MLX5_IB_STAGE_FLOW_DB, + MLX5_IB_STAGE_FS, MLX5_IB_STAGE_CAPS, MLX5_IB_STAGE_NON_DEFAULT_CB, MLX5_IB_STAGE_ROCE, @@ -850,7 +823,7 @@ enum mlx5_ib_stages { MLX5_IB_STAGE_IB_REG, MLX5_IB_STAGE_POST_IB_REG_UMR, MLX5_IB_STAGE_DELAY_DROP, - MLX5_IB_STAGE_CLASS_ATTR, + MLX5_IB_STAGE_RESTRACK, MLX5_IB_STAGE_MAX, }; @@ -1078,11 +1051,6 @@ static inline struct mlx5_ib_rwq *to_mibrwq(struct mlx5_core_qp *core_qp) return container_of(core_qp, struct mlx5_ib_rwq, core_qp); } -static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mkey *mmkey) -{ - return container_of(mmkey, struct mlx5_ib_mr, mmkey); -} - static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd) { return container_of(ibpd, struct mlx5_ib_pd, ibpd); @@ -1210,7 +1178,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, struct ib_pd *pd, struct ib_udata *udata); int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); + u32 max_num_sg); struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd, u32 max_num_sg, u32 max_num_meta_sg); @@ -1224,9 +1192,8 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const struct ib_mad *in, struct ib_mad *out, size_t *out_mad_size, u16 *out_mad_pkey_index); -struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, - struct ib_udata *udata); -int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); +int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); +void mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset); int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, @@ -1375,46 +1342,12 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev, u8 *native_port_num); void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, u8 port_num); -int mlx5_ib_fill_res_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res); -int mlx5_ib_fill_stat_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res); extern const struct uapi_definition mlx5_ib_devx_defs[]; extern const struct uapi_definition mlx5_ib_flow_defs[]; extern const struct uapi_definition mlx5_ib_qos_defs[]; +extern const struct uapi_definition mlx5_ib_std_types_defs[]; -#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) -int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); -void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid); -void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev); -void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev); -struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( - struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, - struct mlx5_flow_context *flow_context, - struct mlx5_flow_act *flow_act, u32 counter_id, - void *cmd_in, int inlen, int dest_id, int dest_type); -bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type); -bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id); -void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction); -#else -static inline int -mlx5_ib_devx_create(struct mlx5_ib_dev *dev, - bool is_user) { return -EOPNOTSUPP; } -static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {} -static inline void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev) {} -static inline void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev) {} -static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, - int *dest_type) -{ - return false; -} -static inline void -mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction) -{ - return; -}; -#endif static inline void init_query_mad(struct ib_smp *mad) { mad->base_version = 1; @@ -1423,15 +1356,6 @@ static inline void init_query_mad(struct ib_smp *mad) mad->method = IB_MGMT_METHOD_GET; } -static inline u8 convert_access(int acc) -{ - return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) | - (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) | - (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) | - (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) | - MLX5_PERM_LOCAL_READ; -} - static inline int is_qp1(enum ib_qp_type qp_type) { return qp_type == MLX5_IB_QPT_HW_GSI; @@ -1518,9 +1442,6 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, u32 bfregn, bool dyn_bfreg); -int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter); -u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num); - static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev, bool do_modify_atomic, int access_flags) { @@ -1533,14 +1454,18 @@ static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev, return false; if (access_flags & IB_ACCESS_RELAXED_ORDERING && - (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) || - MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))) + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) + return false; + + if (access_flags & IB_ACCESS_RELAXED_ORDERING && + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) return false; return true; } -int mlx5_ib_enable_driver(struct ib_device *dev); int mlx5_ib_test_wc(struct mlx5_ib_dev *dev); static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 44683073be0c..3e6f2f9c6655 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1961,7 +1961,7 @@ err_free: } struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0); } diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 77dca1e05bba..cfd7efab114e 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -816,6 +816,7 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, { struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + lockdep_assert_held(&mr->dev->odp_srcu); if (unlikely(io_virt < mr->mmkey.iova)) return -EFAULT; @@ -929,11 +930,6 @@ next_mr: if (ret < 0) goto srcu_unlock; - /* - * When prefetching a page, page fault is generated - * in order to bring the page to the main memory. - * In the current flow, page faults are being counted. - */ mlx5_update_odp_stats(mr, faults, ret); npages += ret; @@ -1770,13 +1766,26 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w) { struct prefetch_mr_work *work = container_of(w, struct prefetch_mr_work, work); + struct mlx5_ib_dev *dev; u32 bytes_mapped = 0; + int srcu_key; + int ret; u32 i; - for (i = 0; i < work->num_sge; ++i) - pagefault_mr(work->frags[i].mr, work->frags[i].io_virt, - work->frags[i].length, &bytes_mapped, - work->pf_flags); + /* We rely on IB/core that work is executed if we have num_sge != 0 only. */ + WARN_ON(!work->num_sge); + dev = work->frags[0].mr->dev; + /* SRCU should be held when calling to mlx5_odp_populate_xlt() */ + srcu_key = srcu_read_lock(&dev->odp_srcu); + for (i = 0; i < work->num_sge; ++i) { + ret = pagefault_mr(work->frags[i].mr, work->frags[i].io_virt, + work->frags[i].length, &bytes_mapped, + work->pf_flags); + if (ret <= 0) + continue; + mlx5_update_odp_stats(work->frags[i].mr, prefetch, ret); + } + srcu_read_unlock(&dev->odp_srcu, srcu_key); destroy_prefetch_work(work); } @@ -1832,6 +1841,7 @@ static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, &bytes_mapped, pf_flags); if (ret < 0) goto out; + mlx5_update_odp_stats(mr, prefetch, ret); } ret = 0; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 1225b8d77510..59fce5fac7a3 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -38,6 +38,7 @@ #include <linux/mlx5/fs.h> #include "mlx5_ib.h" #include "ib_rep.h" +#include "counters.h" #include "cmd.h" #include "qp.h" #include "wr.h" @@ -2031,15 +2032,15 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, switch (init_attr->qp_type) { case IB_QPT_XRC_INI: MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn); - MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn); + MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1); MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn); break; default: if (init_attr->srq) { - MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x0)->xrcdn); + MLX5_SET(qpc, qpc, xrcd, devr->xrcdn0); MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(init_attr->srq)->msrq.srqn); } else { - MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn); + MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1); MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s1)->msrq.srqn); } } @@ -2178,11 +2179,11 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_SET(qpc, qpc, no_sq, 1); if (attr->srq) { - MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x0)->xrcdn); + MLX5_SET(qpc, qpc, xrcd, devr->xrcdn0); MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(attr->srq)->msrq.srqn); } else { - MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn); + MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1); MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s1)->msrq.srqn); } @@ -3554,7 +3555,7 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, switch (raw_qp_param->operation) { case MLX5_CMD_OP_RST2INIT_QP: rq_state = MLX5_RQC_STATE_RDY; - sq_state = MLX5_SQC_STATE_RDY; + sq_state = MLX5_SQC_STATE_RST; break; case MLX5_CMD_OP_2ERR_QP: rq_state = MLX5_RQC_STATE_ERR; @@ -3566,13 +3567,11 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, break; case MLX5_CMD_OP_RTR2RTS_QP: case MLX5_CMD_OP_RTS2RTS_QP: - if (raw_qp_param->set_mask == - MLX5_RAW_QP_RATE_LIMIT) { - modify_rq = 0; - sq_state = sq->state; - } else { - return raw_qp_param->set_mask ? -EINVAL : 0; - } + if (raw_qp_param->set_mask & ~MLX5_RAW_QP_RATE_LIMIT) + return -EINVAL; + + modify_rq = 0; + sq_state = MLX5_SQC_STATE_RDY; break; case MLX5_CMD_OP_INIT2INIT_QP: case MLX5_CMD_OP_INIT2RTR_QP: @@ -4114,9 +4113,9 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_ib_dev *dev = to_mdev(ibqp->device); enum ib_qp_state cur_state, new_state; - int err = 0; int required = IB_QP_STATE; void *dctc; + int err; if (!(attr_mask & IB_QP_STATE)) return -EINVAL; @@ -4208,11 +4207,9 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, mlx5_ib_warn(dev, "Modify DCT: Invalid transition from %d to %d\n", cur_state, new_state); return -EINVAL; } - if (err) - qp->state = IB_QPS_ERR; - else - qp->state = new_state; - return err; + + qp->state = new_state; + return 0; } int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, @@ -4450,7 +4447,7 @@ static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state, [MLX5_SQ_STATE_NA] = IB_QPS_RESET, }, [MLX5_RQC_STATE_RDY] = { - [MLX5_SQC_STATE_RST] = MLX5_QP_STATE_BAD, + [MLX5_SQC_STATE_RST] = MLX5_QP_STATE, [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE, [MLX5_SQC_STATE_ERR] = IB_QPS_SQE, [MLX5_SQ_STATE_NA] = MLX5_QP_STATE, @@ -4462,7 +4459,7 @@ static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state, [MLX5_SQ_STATE_NA] = IB_QPS_ERR, }, [MLX5_RQ_STATE_NA] = { - [MLX5_SQC_STATE_RST] = IB_QPS_RESET, + [MLX5_SQC_STATE_RST] = MLX5_QP_STATE, [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE, [MLX5_SQC_STATE_ERR] = MLX5_QP_STATE, [MLX5_SQ_STATE_NA] = MLX5_QP_STATE_BAD, @@ -4708,41 +4705,23 @@ out: return err; } -struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, - struct ib_udata *udata) +int mlx5_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata) { - struct mlx5_ib_dev *dev = to_mdev(ibdev); - struct mlx5_ib_xrcd *xrcd; - int err; + struct mlx5_ib_dev *dev = to_mdev(ibxrcd->device); + struct mlx5_ib_xrcd *xrcd = to_mxrcd(ibxrcd); if (!MLX5_CAP_GEN(dev->mdev, xrc)) - return ERR_PTR(-ENOSYS); - - xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL); - if (!xrcd) - return ERR_PTR(-ENOMEM); - - err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0); - if (err) { - kfree(xrcd); - return ERR_PTR(-ENOMEM); - } + return -EOPNOTSUPP; - return &xrcd->ibxrcd; + return mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0); } -int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) +void mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(xrcd->device); u32 xrcdn = to_mxrcd(xrcd)->xrcdn; - int err; - err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0); - if (err) - mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn); - - kfree(xrcd); - return 0; + mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0); } static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type) diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h index 82ea2b94dfa6..ba899df44c5b 100644 --- a/drivers/infiniband/hw/mlx5/qp.h +++ b/drivers/infiniband/hw/mlx5/qp.h @@ -43,4 +43,5 @@ void mlx5_core_res_put(struct mlx5_core_rsc_common *res); int mlx5_core_xrcd_alloc(struct mlx5_ib_dev *dev, u32 *xrcdn); int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn); +int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter); #endif /* _MLX5_IB_QP_H */ diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c index 8f6c04f12531..887270dd3ce2 100644 --- a/drivers/infiniband/hw/mlx5/restrack.c +++ b/drivers/infiniband/hw/mlx5/restrack.c @@ -1,17 +1,85 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* - * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2019-2020, Mellanox Technologies Ltd. All rights reserved. */ #include <uapi/rdma/rdma_netlink.h> +#include <linux/mlx5/rsc_dump.h> #include <rdma/ib_umem_odp.h> #include <rdma/restrack.h> #include "mlx5_ib.h" +#include "restrack.h" -static int fill_stat_mr_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) +#define MAX_DUMP_SIZE 1024 + +static int dump_rsc(struct mlx5_core_dev *dev, enum mlx5_sgmt_type type, + int index, void *data, int *data_len) +{ + struct mlx5_core_dev *mdev = dev; + struct mlx5_rsc_dump_cmd *cmd; + struct mlx5_rsc_key key = {}; + struct page *page; + int offset = 0; + int err = 0; + int cmd_err; + int size; + + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + key.size = PAGE_SIZE; + key.rsc = type; + key.index1 = index; + key.num_of_obj1 = 1; + + cmd = mlx5_rsc_dump_cmd_create(mdev, &key); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto free_page; + } + + do { + cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size); + if (cmd_err < 0 || size + offset > MAX_DUMP_SIZE) { + err = cmd_err; + goto destroy_cmd; + } + memcpy(data + offset, page_address(page), size); + offset += size; + } while (cmd_err > 0); + *data_len = offset; + +destroy_cmd: + mlx5_rsc_dump_cmd_destroy(cmd); +free_page: + __free_page(page); + return err; +} + +static int fill_res_raw(struct sk_buff *msg, struct mlx5_ib_dev *dev, + enum mlx5_sgmt_type type, u32 key) +{ + int len = 0; + void *data; + int err; + + data = kzalloc(MAX_DUMP_SIZE, GFP_KERNEL); + if (!data) + return -ENOMEM; + + err = dump_rsc(dev->mdev, type, key, data, &len); + if (err) + goto out; + + err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data); +out: + kfree(data); + return err; +} + +static int fill_stat_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr) { - struct ib_mr *ibmr = container_of(res, struct ib_mr, res); struct mlx5_ib_mr *mr = to_mmr(ibmr); struct nlattr *table_attr; @@ -31,6 +99,9 @@ static int fill_stat_mr_entry(struct sk_buff *msg, msg, "page_invalidations", atomic64_read(&mr->odp_stats.invalidations))) goto err_table; + if (rdma_nl_stat_hwcounter_entry(msg, "page_prefetch", + atomic64_read(&mr->odp_stats.prefetch))) + goto err_table; nla_nest_end(msg, table_attr); return 0; @@ -41,10 +112,16 @@ err: return -EMSGSIZE; } -static int fill_res_mr_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) +static int fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ibmr) +{ + struct mlx5_ib_mr *mr = to_mmr(ibmr); + + return fill_res_raw(msg, mr->dev, MLX5_SGMT_TYPE_PRM_QUERY_MKEY, + mlx5_mkey_to_idx(mr->mmkey.key)); +} + +static int fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr) { - struct ib_mr *ibmr = container_of(res, struct ib_mr, res); struct mlx5_ib_mr *mr = to_mmr(ibmr); struct nlattr *table_attr; @@ -71,20 +148,32 @@ err: return -EMSGSIZE; } -int mlx5_ib_fill_res_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) +static int fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ibcq) { - if (res->type == RDMA_RESTRACK_MR) - return fill_res_mr_entry(msg, res); + struct mlx5_ib_dev *dev = to_mdev(ibcq->device); + struct mlx5_ib_cq *cq = to_mcq(ibcq); - return 0; + return fill_res_raw(msg, dev, MLX5_SGMT_TYPE_PRM_QUERY_CQ, cq->mcq.cqn); } -int mlx5_ib_fill_stat_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) +static int fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ibqp) { - if (res->type == RDMA_RESTRACK_MR) - return fill_stat_mr_entry(msg, res); + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + + return fill_res_raw(msg, dev, MLX5_SGMT_TYPE_PRM_QUERY_QP, + ibqp->qp_num); +} +static const struct ib_device_ops restrack_ops = { + .fill_res_cq_entry_raw = fill_res_cq_entry_raw, + .fill_res_mr_entry = fill_res_mr_entry, + .fill_res_mr_entry_raw = fill_res_mr_entry_raw, + .fill_res_qp_entry_raw = fill_res_qp_entry_raw, + .fill_stat_mr_entry = fill_stat_mr_entry, +}; + +int mlx5_ib_restrack_init(struct mlx5_ib_dev *dev) +{ + ib_set_device_ops(&dev->ib_dev, &restrack_ops); return 0; } diff --git a/drivers/infiniband/hw/mlx5/restrack.h b/drivers/infiniband/hw/mlx5/restrack.h new file mode 100644 index 000000000000..e8d81270f1b6 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/restrack.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2013-2020, Mellanox Technologies Ltd. All rights reserved. + */ + +#ifndef _MLX5_IB_RESTRACK_H +#define _MLX5_IB_RESTRACK_H + +#include "mlx5_ib.h" + +int mlx5_ib_restrack_init(struct mlx5_ib_dev *dev); + +#endif /* _MLX5_IB_RESTRACK_H */ diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 6d1ff13d2283..7e10cbcb6d5c 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -274,10 +274,10 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq, if (srq->wq_sig) in.flags |= MLX5_SRQ_FLAG_WQ_SIG; - if (init_attr->srq_type == IB_SRQT_XRC) + if (init_attr->srq_type == IB_SRQT_XRC && init_attr->ext.xrc.xrcd) in.xrcd = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn; else - in.xrcd = to_mxrcd(dev->devr.x0)->xrcdn; + in.xrcd = dev->devr.xrcdn0; if (init_attr->srq_type == IB_SRQT_TM) { in.tm_log_list_size = diff --git a/drivers/infiniband/hw/mlx5/std_types.c b/drivers/infiniband/hw/mlx5/std_types.c new file mode 100644 index 000000000000..16145fda68d0 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/std_types.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. + */ + +#include <rdma/uverbs_ioctl.h> +#include <rdma/mlx5_user_ioctl_cmds.h> +#include <rdma/mlx5_user_ioctl_verbs.h> +#include <linux/mlx5/driver.h> +#include "mlx5_ib.h" + +#define UVERBS_MODULE_NAME mlx5_ib +#include <rdma/uverbs_named_ioctl.h> + +static int UVERBS_HANDLER(MLX5_IB_METHOD_PD_QUERY)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_pd *pd = + uverbs_attr_get_obj(attrs, MLX5_IB_ATTR_QUERY_PD_HANDLE); + struct mlx5_ib_pd *mpd = to_mpd(pd); + + return uverbs_copy_to(attrs, MLX5_IB_ATTR_QUERY_PD_RESP_PDN, + &mpd->pdn, sizeof(mpd->pdn)); +} + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_PD_QUERY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_QUERY_PD_HANDLE, + UVERBS_OBJECT_PD, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_QUERY_PD_RESP_PDN, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + +ADD_UVERBS_METHODS(mlx5_ib_pd, + UVERBS_OBJECT_PD, + &UVERBS_METHOD(MLX5_IB_METHOD_PD_QUERY)); + +const struct uapi_definition mlx5_ib_std_types_defs[] = { + UAPI_DEF_CHAIN_OBJ_TREE( + UVERBS_OBJECT_PD, + &mlx5_ib_pd), + {}, +}; diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c index bc35dbe4855b..43880973a512 100644 --- a/drivers/infiniband/hw/mlx5/wr.c +++ b/drivers/infiniband/hw/mlx5/wr.c @@ -263,7 +263,9 @@ static __be64 get_umr_update_translation_mask(void) return cpu_to_be64(result); } -static __be64 get_umr_update_access_mask(int atomic) +static __be64 get_umr_update_access_mask(int atomic, + int relaxed_ordering_write, + int relaxed_ordering_read) { u64 result; @@ -275,6 +277,12 @@ static __be64 get_umr_update_access_mask(int atomic) if (atomic) result |= MLX5_MKEY_MASK_A; + if (relaxed_ordering_write) + result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE; + + if (relaxed_ordering_read) + result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ; + return cpu_to_be64(result); } @@ -289,17 +297,28 @@ static __be64 get_umr_update_pd_mask(void) static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask) { - if ((mask & MLX5_MKEY_MASK_PAGE_SIZE && - MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) || - (mask & MLX5_MKEY_MASK_A && - MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))) + if (mask & MLX5_MKEY_MASK_PAGE_SIZE && + MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) return -EPERM; + + if (mask & MLX5_MKEY_MASK_A && + MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) + return -EPERM; + + if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) + return -EPERM; + + if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) + return -EPERM; + return 0; } static int set_reg_umr_segment(struct mlx5_ib_dev *dev, struct mlx5_wqe_umr_ctrl_seg *umr, - const struct ib_send_wr *wr, int atomic) + const struct ib_send_wr *wr) { const struct mlx5_umr_wr *umrwr = umr_wr(wr); @@ -325,7 +344,10 @@ static int set_reg_umr_segment(struct mlx5_ib_dev *dev, if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION) umr->mkey_mask |= get_umr_update_translation_mask(); if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) { - umr->mkey_mask |= get_umr_update_access_mask(atomic); + umr->mkey_mask |= get_umr_update_access_mask( + !!(MLX5_CAP_GEN(dev->mdev, atomic)), + !!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)), + !!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))); umr->mkey_mask |= get_umr_update_pd_mask(); } if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR) @@ -383,20 +405,31 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, memset(seg, 0, sizeof(*seg)); if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR) - seg->status = MLX5_MKEY_STATUS_FREE; + MLX5_SET(mkc, seg, free, 1); + + MLX5_SET(mkc, seg, a, + !!(umrwr->access_flags & IB_ACCESS_REMOTE_ATOMIC)); + MLX5_SET(mkc, seg, rw, + !!(umrwr->access_flags & IB_ACCESS_REMOTE_WRITE)); + MLX5_SET(mkc, seg, rr, !!(umrwr->access_flags & IB_ACCESS_REMOTE_READ)); + MLX5_SET(mkc, seg, lw, !!(umrwr->access_flags & IB_ACCESS_LOCAL_WRITE)); + MLX5_SET(mkc, seg, lr, 1); + MLX5_SET(mkc, seg, relaxed_ordering_write, + !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING)); + MLX5_SET(mkc, seg, relaxed_ordering_read, + !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING)); - seg->flags = convert_access(umrwr->access_flags); if (umrwr->pd) - seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn); + MLX5_SET(mkc, seg, pd, to_mpd(umrwr->pd)->pdn); if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION && !umrwr->length) - seg->flags_pd |= cpu_to_be32(MLX5_MKEY_LEN64); + MLX5_SET(mkc, seg, length64, 1); - seg->start_addr = cpu_to_be64(umrwr->virt_addr); - seg->len = cpu_to_be64(umrwr->length); - seg->log2_page_size = umrwr->page_shift; - seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 | - mlx5_mkey_variant(umrwr->mkey)); + MLX5_SET64(mkc, seg, start_addr, umrwr->virt_addr); + MLX5_SET64(mkc, seg, len, umrwr->length); + MLX5_SET(mkc, seg, log_page_size, umrwr->page_shift); + MLX5_SET(mkc, seg, qpn, 0xffffff); + MLX5_SET(mkc, seg, mkey_7_0, mlx5_mkey_variant(umrwr->mkey)); } static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, @@ -1224,8 +1257,7 @@ static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; (*ctrl)->imm = cpu_to_be32(umr_wr(wr)->mkey); - err = set_reg_umr_segment(dev, *seg, wr, - !!(MLX5_CAP_GEN(dev->mdev, atomic))); + err = set_reg_umr_segment(dev, *seg, wr); if (unlikely(err)) goto out; *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index d11c74390a12..6cdbec13756a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -2901,7 +2901,7 @@ int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags) } struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { int status; struct ocrdma_mr *mr; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index 3a5010881be5..df8e3b923a44 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -101,7 +101,7 @@ struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc); struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length, u64 virt, int acc, struct ib_udata *); struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); + u32 max_num_sg); int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index b1de8d608e4d..d85f992bac29 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -110,7 +110,6 @@ static int qedr_iw_port_immutable(struct ib_device *ibdev, u8 port_num, if (err) return err; - immutable->pkey_tbl_len = 1; immutable->gid_tbl_len = 1; immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; immutable->max_mad_size = 0; @@ -179,6 +178,7 @@ static int qedr_iw_register_device(struct qedr_dev *dev) static const struct ib_device_ops qedr_roce_dev_ops = { .get_port_immutable = qedr_roce_port_immutable, + .query_pkey = qedr_query_pkey, }; static void qedr_roce_register_device(struct qedr_dev *dev) @@ -221,7 +221,6 @@ static const struct ib_device_ops qedr_dev_ops = { .post_srq_recv = qedr_post_srq_recv, .process_mad = qedr_process_mad, .query_device = qedr_query_device, - .query_pkey = qedr_query_pkey, .query_port = qedr_query_port, .query_qp = qedr_query_qp, .query_srq = qedr_query_srq, diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index fdf90ecb2699..460292179b32 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -235,6 +235,7 @@ struct qedr_ucontext { u32 dpi_size; u16 dpi; bool db_rec; + u8 edpm_mode; }; union db_prod32 { @@ -344,10 +345,10 @@ struct qedr_srq_hwq_info { u32 wqe_prod; u32 sge_prod; u32 wr_prod_cnt; - u32 wr_cons_cnt; + atomic_t wr_cons_cnt; u32 num_elems; - u32 *virt_prod_pair_addr; + struct rdma_srq_producers *virt_prod_pair_addr; dma_addr_t phy_prod_pair_addr; }; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index f5aa85a5377c..4ce4e2eef6cc 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -239,7 +239,6 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) attr->ip_gids = true; if (rdma_protocol_iwarp(&dev->ibdev, 1)) { attr->gid_tbl_len = 1; - attr->pkey_tbl_len = 1; } else { attr->gid_tbl_len = QEDR_MAX_SGID; attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN; @@ -275,7 +274,8 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) DP_ERR(dev, "Problem copying data from user space\n"); return -EFAULT; } - + ctx->edpm_mode = !!(ureq.context_flags & + QEDR_ALLOC_UCTX_EDPM_MODE); ctx->db_rec = !!(ureq.context_flags & QEDR_ALLOC_UCTX_DB_REC); } @@ -316,11 +316,15 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) uresp.dpm_flags = QEDR_DPM_TYPE_IWARP_LEGACY; else uresp.dpm_flags = QEDR_DPM_TYPE_ROCE_ENHANCED | - QEDR_DPM_TYPE_ROCE_LEGACY; + QEDR_DPM_TYPE_ROCE_LEGACY | + QEDR_DPM_TYPE_ROCE_EDPM_MODE; - uresp.dpm_flags |= QEDR_DPM_SIZES_SET; - uresp.ldpm_limit_size = QEDR_LDPM_MAX_SIZE; - uresp.edpm_trans_size = QEDR_EDPM_TRANS_SIZE; + if (ureq.context_flags & QEDR_SUPPORT_DPM_SIZES) { + uresp.dpm_flags |= QEDR_DPM_SIZES_SET; + uresp.ldpm_limit_size = QEDR_LDPM_MAX_SIZE; + uresp.edpm_trans_size = QEDR_EDPM_TRANS_SIZE; + uresp.edpm_limit_size = QEDR_EDPM_MAX_SIZE; + } uresp.wids_enabled = 1; uresp.wid_count = oparams.wid_count; @@ -1754,7 +1758,7 @@ static int qedr_create_user_qp(struct qedr_dev *dev, struct qed_rdma_create_qp_out_params out_params; struct qedr_pd *pd = get_qedr_pd(ibpd); struct qedr_create_qp_uresp uresp; - struct qedr_ucontext *ctx = NULL; + struct qedr_ucontext *ctx = pd ? pd->uctx : NULL; struct qedr_create_qp_ureq ureq; int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1); int rc = -EINVAL; @@ -1792,6 +1796,9 @@ static int qedr_create_user_qp(struct qedr_dev *dev, in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa; } + if (ctx) + SET_FIELD(in_params.flags, QED_ROCE_EDPM_MODE, ctx->edpm_mode); + qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx, &in_params, &out_params); @@ -3004,7 +3011,7 @@ err0: } struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { struct qedr_mr *mr; @@ -3687,7 +3694,7 @@ static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq) * count and consumer count and subtract it from max * work request supported so that we get elements left. */ - used = hw_srq->wr_prod_cnt - hw_srq->wr_cons_cnt; + used = hw_srq->wr_prod_cnt - (u32)atomic_read(&hw_srq->wr_cons_cnt); return hw_srq->max_wr - used; } @@ -3702,7 +3709,6 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, unsigned long flags; int status = 0; u32 num_sge; - u32 offset; spin_lock_irqsave(&srq->lock, flags); @@ -3715,7 +3721,8 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, if (!qedr_srq_elem_left(hw_srq) || wr->num_sge > srq->hw_srq.max_sges) { DP_ERR(dev, "Can't post WR (%d,%d) || (%d > %d)\n", - hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt, + hw_srq->wr_prod_cnt, + atomic_read(&hw_srq->wr_cons_cnt), wr->num_sge, srq->hw_srq.max_sges); status = -ENOMEM; *bad_wr = wr; @@ -3749,22 +3756,20 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, hw_srq->sge_prod++; } - /* Flush WQE and SGE information before + /* Update WQE and SGE information before * updating producer. */ - wmb(); + dma_wmb(); /* SRQ producer is 8 bytes. Need to update SGE producer index * in first 4 bytes and need to update WQE producer in * next 4 bytes. */ - *srq->hw_srq.virt_prod_pair_addr = hw_srq->sge_prod; - offset = offsetof(struct rdma_srq_producers, wqe_prod); - *((u8 *)srq->hw_srq.virt_prod_pair_addr + offset) = - hw_srq->wqe_prod; + srq->hw_srq.virt_prod_pair_addr->sge_prod = hw_srq->sge_prod; + /* Make sure sge producer is updated first */ + dma_wmb(); + srq->hw_srq.virt_prod_pair_addr->wqe_prod = hw_srq->wqe_prod; - /* Flush producer after updating it. */ - wmb(); wr = wr->next; } @@ -4183,7 +4188,7 @@ static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp, } else { __process_resp_one(dev, qp, cq, wc, resp, wr_id); } - srq->hw_srq.wr_cons_cnt++; + atomic_inc(&srq->hw_srq.wr_cons_cnt); return 1; } diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 5e02387e068d..39dd6286ba39 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -84,7 +84,7 @@ int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); struct ib_mr *qedr_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); + u32 max_num_sg); int qedr_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc); int qedr_post_send(struct ib_qp *, const struct ib_send_wr *, const struct ib_send_wr **bad_wr); diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.c b/drivers/infiniband/hw/usnic/usnic_fwd.c index 7875883621f4..398c4c00b932 100644 --- a/drivers/infiniband/hw/usnic/usnic_fwd.c +++ b/drivers/infiniband/hw/usnic/usnic_fwd.c @@ -214,7 +214,7 @@ usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter, if (!flow) return ERR_PTR(-ENOMEM); - tlv = pci_alloc_consistent(pdev, tlv_size, &tlv_pa); + tlv = dma_alloc_coherent(&pdev->dev, tlv_size, &tlv_pa, GFP_ATOMIC); if (!tlv) { usnic_err("Failed to allocate memory\n"); status = -ENOMEM; @@ -258,7 +258,7 @@ usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter, out_free_tlv: spin_unlock(&ufdev->lock); - pci_free_consistent(pdev, tlv_size, tlv, tlv_pa); + dma_free_coherent(&pdev->dev, tlv_size, tlv, tlv_pa); if (!status) return flow; out_free_flow: diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c index b039f1f00e05..77a010e68208 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c @@ -202,7 +202,7 @@ err_umem: * @return: ib_mr pointer on success, otherwise returns an errno. */ struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { struct pvrdma_dev *dev = to_vdev(pd->device); struct pvrdma_user_mr *mr; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index 267702226f10..699b20849a7e 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -406,7 +406,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct ib_udata *udata); int pvrdma_dereg_mr(struct ib_mr *mr, struct ib_udata *udata); struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); + u32 max_num_sg); int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, |