diff options
author | Saeed Mahameed <saeedm@mellanox.com> | 2018-12-10 15:43:47 -0800 |
---|---|---|
committer | Saeed Mahameed <saeedm@mellanox.com> | 2018-12-10 15:50:50 -0800 |
commit | 2f62747c77e2e5a8acb720aaec9ee4860d55118f (patch) | |
tree | ec4b02de31627ae7ff4850fe519cefa86d97d028 /drivers/infiniband | |
parent | d8ed257f313f64e9835e61d1365dea95a0a1c9c6 (diff) | |
parent | 6c22a11957f46ca7e9b8db20ac7c6b05441c55ed (diff) | |
download | linux-2f62747c77e2e5a8acb720aaec9ee4860d55118f.tar.bz2 |
Merge branch 'mlx5-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux
mlx5-next shared branch with rdma subtree to avoid mlx5 rdma v.s. netdev
conflicts.
Highlights:
1) RDMA ODP (On Demand Paging) improvements and moving ODP logic to
mlx5 RDMA driver
2) Improved mlx5 core driver and device events handling and provided API
for upper layers to subscribe to device events.
3) RDMA only code cleanup from mlx5 core
4) Add helper to get CQE opcode
5) Rework handling of port module events
6) shared mlx5_ifc.h updates to avoid conflicts
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r-- | drivers/infiniband/core/umem_odp.c | 14 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/Makefile | 4 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cq.c | 12 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/ib_rep.c | 4 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 231 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 24 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/odp.c | 331 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/srq.c | 49 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/srq.h | 73 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/srq_cmd.c | 722 |
10 files changed, 1321 insertions, 143 deletions
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 676c1fd1119d..9608681224e6 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -647,8 +647,13 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, flags, local_page_list, NULL, NULL); up_read(&owning_mm->mmap_sem); - if (npages < 0) + if (npages < 0) { + if (npages != -EAGAIN) + pr_warn("fail to get %zu user pages with error %d\n", gup_num_pages, npages); + else + pr_debug("fail to get %zu user pages with error %d\n", gup_num_pages, npages); break; + } bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt); mutex_lock(&umem_odp->umem_mutex); @@ -666,8 +671,13 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, ret = ib_umem_odp_map_dma_single_page( umem_odp, k, local_page_list[j], access_mask, current_seq); - if (ret < 0) + if (ret < 0) { + if (ret != -EAGAIN) + pr_warn("ib_umem_odp_map_dma_single_page failed with error %d\n", ret); + else + pr_debug("ib_umem_odp_map_dma_single_page failed with error %d\n", ret); break; + } p = page_to_phys(local_page_list[j]); k++; diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index b8e4b15e2674..33f5adb14e4e 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -1,6 +1,8 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o -mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o +mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq_cmd.o \ + srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o \ + cong.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 7d769b5538b4..26ab9041f94a 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -35,6 +35,7 @@ #include <rdma/ib_user_verbs.h> #include <rdma/ib_cache.h> #include "mlx5_ib.h" +#include "srq.h" static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq) { @@ -81,7 +82,7 @@ static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n) cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; - if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) && + if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) && !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) { return cqe; } else { @@ -177,8 +178,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, struct mlx5_core_srq *msrq = NULL; if (qp->ibqp.xrcd) { - msrq = mlx5_core_get_srq(dev->mdev, - be32_to_cpu(cqe->srqn)); + msrq = mlx5_cmd_get_srq(dev, be32_to_cpu(cqe->srqn)); srq = to_mibsrq(msrq); } else { srq = to_msrq(qp->ibqp.srq); @@ -197,7 +197,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, } wc->byte_len = be32_to_cpu(cqe->byte_cnt); - switch (cqe->op_own >> 4) { + switch (get_cqe_opcode(cqe)) { case MLX5_CQE_RESP_WR_IMM: wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; wc->wc_flags = IB_WC_WITH_IMM; @@ -537,7 +537,7 @@ repoll: */ rmb(); - opcode = cqe64->op_own >> 4; + opcode = get_cqe_opcode(cqe64); if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) { if (likely(cq->resize_buf)) { free_cq_buf(dev, &cq->buf); @@ -1295,7 +1295,7 @@ static int copy_resize_cqes(struct mlx5_ib_cq *cq) return -EINVAL; } - while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) { + while (get_cqe_opcode(scqe64) != MLX5_CQE_RESIZE_CQ) { dcqe = mlx5_frag_buf_get_wqe(&cq->resize_buf->fbc, (i + 1) & cq->resize_buf->nent); dcqe64 = dsize == 64 ? dcqe : dcqe + 64; diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 584ff2ea7810..8a682d86d634 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -4,6 +4,7 @@ */ #include "ib_rep.h" +#include "srq.h" static const struct mlx5_ib_profile rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_INIT, @@ -21,6 +22,9 @@ static const struct mlx5_ib_profile rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_ROCE, mlx5_ib_stage_rep_roce_init, mlx5_ib_stage_rep_roce_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_SRQ, + mlx5_init_srq_table, + mlx5_cleanup_srq_table), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, mlx5_ib_stage_dev_res_init, mlx5_ib_stage_dev_res_cleanup), diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 3569fda07e07..0eeefff09c1e 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -60,6 +60,7 @@ #include "mlx5_ib.h" #include "ib_rep.h" #include "cmd.h" +#include "srq.h" #include <linux/mlx5/fs_helpers.h> #include <linux/mlx5/accel.h> #include <rdma/uverbs_std_types.h> @@ -82,10 +83,13 @@ static char mlx5_version[] = struct mlx5_ib_event_work { struct work_struct work; - struct mlx5_core_dev *dev; - void *context; - enum mlx5_dev_event event; - unsigned long param; + union { + struct mlx5_ib_dev *dev; + struct mlx5_ib_multiport_info *mpi; + }; + bool is_slave; + unsigned int event; + void *param; }; enum { @@ -2669,11 +2673,11 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, ntohs(ib_spec->gre.val.protocol)); memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c, - gre_key_h), + gre_key.nvgre.hi), &ib_spec->gre.mask.key, sizeof(ib_spec->gre.mask.key)); memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v, - gre_key_h), + gre_key.nvgre.hi), &ib_spec->gre.val.key, sizeof(ib_spec->gre.val.key)); break; @@ -4226,6 +4230,63 @@ static void delay_drop_handler(struct work_struct *work) mutex_unlock(&delay_drop->lock); } +static void handle_general_event(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe, + struct ib_event *ibev) +{ + switch (eqe->sub_type) { + case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT: + schedule_work(&ibdev->delay_drop.delay_drop_work); + break; + default: /* do nothing */ + return; + } +} + +static int handle_port_change(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe, + struct ib_event *ibev) +{ + u8 port = (eqe->data.port.port >> 4) & 0xf; + + ibev->element.port_num = port; + + switch (eqe->sub_type) { + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: + /* In RoCE, port up/down events are handled in + * mlx5_netdev_event(). + */ + if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) == + IB_LINK_LAYER_ETHERNET) + return -EINVAL; + + ibev->event = (eqe->sub_type == MLX5_PORT_CHANGE_SUBTYPE_ACTIVE) ? + IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; + break; + + case MLX5_PORT_CHANGE_SUBTYPE_LID: + ibev->event = IB_EVENT_LID_CHANGE; + break; + + case MLX5_PORT_CHANGE_SUBTYPE_PKEY: + ibev->event = IB_EVENT_PKEY_CHANGE; + schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work); + break; + + case MLX5_PORT_CHANGE_SUBTYPE_GUID: + ibev->event = IB_EVENT_GID_CHANGE; + break; + + case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: + ibev->event = IB_EVENT_CLIENT_REREGISTER; + break; + default: + return -EINVAL; + } + + return 0; +} + static void mlx5_ib_handle_event(struct work_struct *_work) { struct mlx5_ib_event_work *work = @@ -4233,65 +4294,37 @@ static void mlx5_ib_handle_event(struct work_struct *_work) struct mlx5_ib_dev *ibdev; struct ib_event ibev; bool fatal = false; - u8 port = (u8)work->param; - if (mlx5_core_is_mp_slave(work->dev)) { - ibdev = mlx5_ib_get_ibdev_from_mpi(work->context); + if (work->is_slave) { + ibdev = mlx5_ib_get_ibdev_from_mpi(work->mpi); if (!ibdev) goto out; } else { - ibdev = work->context; + ibdev = work->dev; } switch (work->event) { case MLX5_DEV_EVENT_SYS_ERROR: ibev.event = IB_EVENT_DEVICE_FATAL; mlx5_ib_handle_internal_error(ibdev); + ibev.element.port_num = (u8)(unsigned long)work->param; fatal = true; break; - - case MLX5_DEV_EVENT_PORT_UP: - case MLX5_DEV_EVENT_PORT_DOWN: - case MLX5_DEV_EVENT_PORT_INITIALIZED: - /* In RoCE, port up/down events are handled in - * mlx5_netdev_event(). - */ - if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) == - IB_LINK_LAYER_ETHERNET) + case MLX5_EVENT_TYPE_PORT_CHANGE: + if (handle_port_change(ibdev, work->param, &ibev)) goto out; - - ibev.event = (work->event == MLX5_DEV_EVENT_PORT_UP) ? - IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; break; - - case MLX5_DEV_EVENT_LID_CHANGE: - ibev.event = IB_EVENT_LID_CHANGE; - break; - - case MLX5_DEV_EVENT_PKEY_CHANGE: - ibev.event = IB_EVENT_PKEY_CHANGE; - schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work); - break; - - case MLX5_DEV_EVENT_GUID_CHANGE: - ibev.event = IB_EVENT_GID_CHANGE; - break; - - case MLX5_DEV_EVENT_CLIENT_REREG: - ibev.event = IB_EVENT_CLIENT_REREGISTER; - break; - case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT: - schedule_work(&ibdev->delay_drop.delay_drop_work); - goto out; + case MLX5_EVENT_TYPE_GENERAL_EVENT: + handle_general_event(ibdev, work->param, &ibev); + /* fall through */ default: goto out; } - ibev.device = &ibdev->ib_dev; - ibev.element.port_num = port; + ibev.device = &ibdev->ib_dev; - if (!rdma_is_port_valid(&ibdev->ib_dev, port)) { - mlx5_ib_warn(ibdev, "warning: event on port %d\n", port); + if (!rdma_is_port_valid(&ibdev->ib_dev, ibev.element.port_num)) { + mlx5_ib_warn(ibdev, "warning: event on port %d\n", ibev.element.port_num); goto out; } @@ -4304,22 +4337,43 @@ out: kfree(work); } -static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, - enum mlx5_dev_event event, unsigned long param) +static int mlx5_ib_event(struct notifier_block *nb, + unsigned long event, void *param) { struct mlx5_ib_event_work *work; work = kmalloc(sizeof(*work), GFP_ATOMIC); if (!work) - return; + return NOTIFY_DONE; INIT_WORK(&work->work, mlx5_ib_handle_event); - work->dev = dev; + work->dev = container_of(nb, struct mlx5_ib_dev, mdev_events); + work->is_slave = false; work->param = param; - work->context = context; work->event = event; queue_work(mlx5_ib_event_wq, &work->work); + + return NOTIFY_OK; +} + +static int mlx5_ib_event_slave_port(struct notifier_block *nb, + unsigned long event, void *param) +{ + struct mlx5_ib_event_work *work; + + work = kmalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return NOTIFY_DONE; + + INIT_WORK(&work->work, mlx5_ib_handle_event); + work->mpi = container_of(nb, struct mlx5_ib_multiport_info, mdev_events); + work->is_slave = true; + work->param = param; + work->event = event; + queue_work(mlx5_ib_event_wq, &work->work); + + return NOTIFY_OK; } static int set_has_smi_cap(struct mlx5_ib_dev *dev) @@ -5330,7 +5384,7 @@ mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - return mlx5_get_vector_affinity_hint(dev->mdev, comp_vector); + return mlx5_comp_irq_get_affinity_mask(dev->mdev, comp_vector); } /* The mlx5_ib_multiport_mutex should be held when calling this function */ @@ -5350,6 +5404,11 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, spin_unlock(&port->mp.mpi_lock); return; } + + if (mpi->mdev_events.notifier_call) + mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events); + mpi->mdev_events.notifier_call = NULL; + mpi->ibdev = NULL; spin_unlock(&port->mp.mpi_lock); @@ -5405,6 +5464,7 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, ibdev->port[port_num].mp.mpi = mpi; mpi->ibdev = ibdev; + mpi->mdev_events.notifier_call = NULL; spin_unlock(&ibdev->port[port_num].mp.mpi_lock); err = mlx5_nic_vport_affiliate_multiport(ibdev->mdev, mpi->mdev); @@ -5422,6 +5482,9 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, goto unbind; } + mpi->mdev_events.notifier_call = mlx5_ib_event_slave_port; + mlx5_notifier_register(mpi->mdev, &mpi->mdev_events); + err = mlx5_ib_init_cong_debugfs(ibdev, port_num); if (err) goto unbind; @@ -5694,8 +5757,7 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; dev->ib_dev.phys_port_cnt = dev->num_ports; - dev->ib_dev.num_comp_vectors = - dev->mdev->priv.eq_table.num_comp_vectors; + dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_count(mdev); dev->ib_dev.dev.parent = &mdev->pdev->dev; mutex_init(&dev->cap_mask_mutex); @@ -6034,6 +6096,11 @@ static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev) return mlx5_ib_odp_init_one(dev); } +void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev) +{ + mlx5_ib_odp_cleanup_one(dev); +} + int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) { @@ -6152,6 +6219,34 @@ static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev) mlx5_ib_unregister_vport_reps(dev); } +static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev) +{ + dev->mdev_events.notifier_call = mlx5_ib_event; + mlx5_notifier_register(dev->mdev, &dev->mdev_events); + return 0; +} + +static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev) +{ + mlx5_notifier_unregister(dev->mdev, &dev->mdev_events); +} + +static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev) +{ + int uid; + + uid = mlx5_ib_devx_create(dev); + if (uid > 0) + dev->devx_whitelist_uid = uid; + + return 0; +} +static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev) +{ + if (dev->devx_whitelist_uid) + mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid); +} + void __mlx5_ib_remove(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile, int stage) @@ -6163,8 +6258,6 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev, profile->stage[stage].cleanup(dev); } - if (dev->devx_whitelist_uid) - mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid); ib_dealloc_device((struct ib_device *)dev); } @@ -6173,7 +6266,6 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev, { int err; int i; - int uid; for (i = 0; i < MLX5_IB_STAGE_MAX; i++) { if (profile->stage[i].init) { @@ -6183,10 +6275,6 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev, } } - uid = mlx5_ib_devx_create(dev); - if (uid > 0) - dev->devx_whitelist_uid = uid; - dev->profile = profile; dev->ib_active = true; @@ -6214,12 +6302,18 @@ static const struct mlx5_ib_profile pf_profile = { STAGE_CREATE(MLX5_IB_STAGE_ROCE, mlx5_ib_stage_roce_init, mlx5_ib_stage_roce_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_SRQ, + mlx5_init_srq_table, + mlx5_cleanup_srq_table), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, mlx5_ib_stage_dev_res_init, mlx5_ib_stage_dev_res_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER, + mlx5_ib_stage_dev_notifier_init, + mlx5_ib_stage_dev_notifier_cleanup), STAGE_CREATE(MLX5_IB_STAGE_ODP, mlx5_ib_stage_odp_init, - NULL), + mlx5_ib_stage_odp_cleanup), STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, mlx5_ib_stage_counters_init, mlx5_ib_stage_counters_cleanup), @@ -6238,6 +6332,9 @@ static const struct mlx5_ib_profile pf_profile = { STAGE_CREATE(MLX5_IB_STAGE_SPECS, mlx5_ib_stage_populate_specs, NULL), + STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID, + mlx5_ib_stage_devx_init, + mlx5_ib_stage_devx_cleanup), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), @@ -6265,9 +6362,15 @@ static const struct mlx5_ib_profile nic_rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_ROCE, mlx5_ib_stage_rep_roce_init, mlx5_ib_stage_rep_roce_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_SRQ, + mlx5_init_srq_table, + mlx5_cleanup_srq_table), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, mlx5_ib_stage_dev_res_init, mlx5_ib_stage_dev_res_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER, + mlx5_ib_stage_dev_notifier_init, + mlx5_ib_stage_dev_notifier_cleanup), STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, mlx5_ib_stage_counters_init, mlx5_ib_stage_counters_cleanup), @@ -6388,10 +6491,6 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) static struct mlx5_interface mlx5_ib_interface = { .add = mlx5_ib_add, .remove = mlx5_ib_remove, - .event = mlx5_ib_event, -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - .pfault = mlx5_ib_pfault, -#endif .protocol = MLX5_INTERFACE_PROTOCOL_IB, }; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b651a7a6fde9..861b68f2e330 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -41,7 +41,6 @@ #include <linux/mlx5/cq.h> #include <linux/mlx5/fs.h> #include <linux/mlx5/qp.h> -#include <linux/mlx5/srq.h> #include <linux/mlx5/fs.h> #include <linux/types.h> #include <linux/mlx5/transobj.h> @@ -50,6 +49,8 @@ #include <rdma/uverbs_ioctl.h> #include <rdma/mlx5_user_ioctl_cmds.h> +#include "srq.h" + #define mlx5_ib_dbg(_dev, format, arg...) \ dev_dbg(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \ __LINE__, current->pid, ##arg) @@ -774,7 +775,9 @@ enum mlx5_ib_stages { MLX5_IB_STAGE_CAPS, MLX5_IB_STAGE_NON_DEFAULT_CB, MLX5_IB_STAGE_ROCE, + MLX5_IB_STAGE_SRQ, MLX5_IB_STAGE_DEVICE_RESOURCES, + MLX5_IB_STAGE_DEVICE_NOTIFIER, MLX5_IB_STAGE_ODP, MLX5_IB_STAGE_COUNTERS, MLX5_IB_STAGE_CONG_DEBUGFS, @@ -782,6 +785,7 @@ enum mlx5_ib_stages { MLX5_IB_STAGE_BFREG, MLX5_IB_STAGE_PRE_IB_REG_UMR, MLX5_IB_STAGE_SPECS, + MLX5_IB_STAGE_WHITELIST_UID, MLX5_IB_STAGE_IB_REG, MLX5_IB_STAGE_POST_IB_REG_UMR, MLX5_IB_STAGE_DELAY_DROP, @@ -806,6 +810,7 @@ struct mlx5_ib_multiport_info { struct list_head list; struct mlx5_ib_dev *ibdev; struct mlx5_core_dev *mdev; + struct notifier_block mdev_events; struct completion unref_comp; u64 sys_image_guid; u32 mdev_refcnt; @@ -880,10 +885,20 @@ struct mlx5_ib_lb_state { bool enabled; }; +struct mlx5_ib_pf_eq { + struct mlx5_ib_dev *dev; + struct mlx5_eq *core; + struct work_struct work; + spinlock_t lock; /* Pagefaults spinlock */ + struct workqueue_struct *wq; + mempool_t *pool; +}; + struct mlx5_ib_dev { struct ib_device ib_dev; const struct uverbs_object_tree_def *driver_trees[7]; struct mlx5_core_dev *mdev; + struct notifier_block mdev_events; struct mlx5_roce roce[MLX5_MAX_PORTS]; int num_ports; /* serialize update of capability mask @@ -902,6 +917,8 @@ struct mlx5_ib_dev { #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING struct ib_odp_caps odp_caps; u64 odp_max_size; + struct mlx5_ib_pf_eq odp_pf_eq; + /* * Sleepable RCU that prevents destruction of MRs while they are still * being used by a page fault handler. @@ -927,6 +944,7 @@ struct mlx5_ib_dev { u64 sys_image_guid; struct mlx5_memic memic; u16 devx_whitelist_uid; + struct mlx5_srq_table srq_table; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -1158,9 +1176,8 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev); -void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context, - struct mlx5_pagefault *pfault); int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev); +void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev); int __init mlx5_ib_odp_init(void); void mlx5_ib_odp_cleanup(void); void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, @@ -1175,6 +1192,7 @@ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) } static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; } +static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {} static inline int mlx5_ib_odp_init(void) { return 0; } static inline void mlx5_ib_odp_cleanup(void) {} static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {} diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 2cc3d69ab6f6..4ead8c0fff5a 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -37,6 +37,46 @@ #include "mlx5_ib.h" #include "cmd.h" +#include <linux/mlx5/eq.h> + +/* Contains the details of a pagefault. */ +struct mlx5_pagefault { + u32 bytes_committed; + u32 token; + u8 event_subtype; + u8 type; + union { + /* Initiator or send message responder pagefault details. */ + struct { + /* Received packet size, only valid for responders. */ + u32 packet_size; + /* + * Number of resource holding WQE, depends on type. + */ + u32 wq_num; + /* + * WQE index. Refers to either the send queue or + * receive queue, according to event_subtype. + */ + u16 wqe_index; + } wqe; + /* RDMA responder pagefault details */ + struct { + u32 r_key; + /* + * Received packet size, minimal size page fault + * resolution required for forward progress. + */ + u32 packet_size; + u32 rdma_op_len; + u64 rdma_va; + } rdma; + }; + + struct mlx5_ib_pf_eq *eq; + struct work_struct work; +}; + #define MAX_PREFETCH_LEN (4*1024*1024U) /* Timeout in ms to wait for an active mmu notifier to complete when handling @@ -304,14 +344,20 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, { int wq_num = pfault->event_subtype == MLX5_PFAULT_SUBTYPE_WQE ? pfault->wqe.wq_num : pfault->token; - int ret = mlx5_core_page_fault_resume(dev->mdev, - pfault->token, - wq_num, - pfault->type, - error); - if (ret) - mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x\n", - wq_num); + u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = { }; + u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = { }; + int err; + + MLX5_SET(page_fault_resume_in, in, opcode, MLX5_CMD_OP_PAGE_FAULT_RESUME); + MLX5_SET(page_fault_resume_in, in, page_fault_type, pfault->type); + MLX5_SET(page_fault_resume_in, in, token, pfault->token); + MLX5_SET(page_fault_resume_in, in, wq_number, wq_num); + MLX5_SET(page_fault_resume_in, in, error, !!error); + + err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); + if (err) + mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x err %d\n", + wq_num, err); } static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, @@ -607,8 +653,8 @@ out: if (!wait_for_completion_timeout( &odp->notifier_completion, timeout)) { - mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d\n", - current_seq, odp->notifiers_seq); + mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n", + current_seq, odp->notifiers_seq, odp->notifiers_count); } } else { /* The MR is being killed, kill the QP as well. */ @@ -1026,16 +1072,31 @@ invalid_transport_or_opcode: return 0; } -static struct mlx5_ib_qp *mlx5_ib_odp_find_qp(struct mlx5_ib_dev *dev, - u32 wq_num) +static inline struct mlx5_core_rsc_common *odp_get_rsc(struct mlx5_ib_dev *dev, + u32 wq_num, int pf_type) { - struct mlx5_core_qp *mqp = __mlx5_qp_lookup(dev->mdev, wq_num); + enum mlx5_res_type res_type; - if (!mqp) { - mlx5_ib_err(dev, "QPN 0x%6x not found\n", wq_num); + switch (pf_type) { + case MLX5_WQE_PF_TYPE_RMP: + res_type = MLX5_RES_SRQ; + break; + case MLX5_WQE_PF_TYPE_REQ_SEND_OR_WRITE: + case MLX5_WQE_PF_TYPE_RESP: + case MLX5_WQE_PF_TYPE_REQ_READ_OR_ATOMIC: + res_type = MLX5_RES_QP; + break; + default: return NULL; } + return mlx5_core_res_hold(dev->mdev, wq_num, res_type); +} + +static inline struct mlx5_ib_qp *res_to_qp(struct mlx5_core_rsc_common *res) +{ + struct mlx5_core_qp *mqp = (struct mlx5_core_qp *)res; + return to_mibqp(mqp); } @@ -1049,18 +1110,30 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, int resume_with_error = 1; u16 wqe_index = pfault->wqe.wqe_index; int requestor = pfault->type & MLX5_PFAULT_REQUESTOR; + struct mlx5_core_rsc_common *res; struct mlx5_ib_qp *qp; + res = odp_get_rsc(dev, pfault->wqe.wq_num, pfault->type); + if (!res) { + mlx5_ib_dbg(dev, "wqe page fault for missing resource %d\n", pfault->wqe.wq_num); + return; + } + + switch (res->res) { + case MLX5_RES_QP: + qp = res_to_qp(res); + break; + default: + mlx5_ib_err(dev, "wqe page fault for unsupported type %d\n", pfault->type); + goto resolve_page_fault; + } + buffer = (char *)__get_free_page(GFP_KERNEL); if (!buffer) { mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n"); goto resolve_page_fault; } - qp = mlx5_ib_odp_find_qp(dev, pfault->wqe.wq_num); - if (!qp) - goto resolve_page_fault; - ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer, PAGE_SIZE, &qp->trans_qp.base); if (ret < 0) { @@ -1100,6 +1173,7 @@ resolve_page_fault: mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, type: 0x%x\n", pfault->wqe.wq_num, resume_with_error, pfault->type); + mlx5_core_res_put(res); free_page((unsigned long)buffer); } @@ -1178,10 +1252,8 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, } } -void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context, - struct mlx5_pagefault *pfault) +static void mlx5_ib_pfault(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault) { - struct mlx5_ib_dev *dev = context; u8 event_subtype = pfault->event_subtype; switch (event_subtype) { @@ -1198,6 +1270,203 @@ void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context, } } +static void mlx5_ib_eqe_pf_action(struct work_struct *work) +{ + struct mlx5_pagefault *pfault = container_of(work, + struct mlx5_pagefault, + work); + struct mlx5_ib_pf_eq *eq = pfault->eq; + + mlx5_ib_pfault(eq->dev, pfault); + mempool_free(pfault, eq->pool); +} + +static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq) +{ + struct mlx5_eqe_page_fault *pf_eqe; + struct mlx5_pagefault *pfault; + struct mlx5_eqe *eqe; + int cc = 0; + + while ((eqe = mlx5_eq_get_eqe(eq->core, cc))) { + pfault = mempool_alloc(eq->pool, GFP_ATOMIC); + if (!pfault) { + schedule_work(&eq->work); + break; + } + + pf_eqe = &eqe->data.page_fault; + pfault->event_subtype = eqe->sub_type; + pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed); + + mlx5_ib_dbg(eq->dev, + "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n", + eqe->sub_type, pfault->bytes_committed); + + switch (eqe->sub_type) { + case MLX5_PFAULT_SUBTYPE_RDMA: + /* RDMA based event */ + pfault->type = + be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24; + pfault->token = + be32_to_cpu(pf_eqe->rdma.pftype_token) & + MLX5_24BIT_MASK; + pfault->rdma.r_key = + be32_to_cpu(pf_eqe->rdma.r_key); + pfault->rdma.packet_size = + be16_to_cpu(pf_eqe->rdma.packet_length); + pfault->rdma.rdma_op_len = + be32_to_cpu(pf_eqe->rdma.rdma_op_len); + pfault->rdma.rdma_va = + be64_to_cpu(pf_eqe->rdma.rdma_va); + mlx5_ib_dbg(eq->dev, + "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n", + pfault->type, pfault->token, + pfault->rdma.r_key); + mlx5_ib_dbg(eq->dev, + "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n", + pfault->rdma.rdma_op_len, + pfault->rdma.rdma_va); + break; + + case MLX5_PFAULT_SUBTYPE_WQE: + /* WQE based event */ + pfault->type = + (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7; + pfault->token = + be32_to_cpu(pf_eqe->wqe.token); + pfault->wqe.wq_num = + be32_to_cpu(pf_eqe->wqe.pftype_wq) & + MLX5_24BIT_MASK; + pfault->wqe.wqe_index = + be16_to_cpu(pf_eqe->wqe.wqe_index); + pfault->wqe.packet_size = + be16_to_cpu(pf_eqe->wqe.packet_length); + mlx5_ib_dbg(eq->dev, + "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n", + pfault->type, pfault->token, + pfault->wqe.wq_num, + pfault->wqe.wqe_index); + break; + + default: + mlx5_ib_warn(eq->dev, + "Unsupported page fault event sub-type: 0x%02hhx\n", + eqe->sub_type); + /* Unsupported page faults should still be + * resolved by the page fault handler + */ + } + + pfault->eq = eq; + INIT_WORK(&pfault->work, mlx5_ib_eqe_pf_action); + queue_work(eq->wq, &pfault->work); + + cc = mlx5_eq_update_cc(eq->core, ++cc); + } + + mlx5_eq_update_ci(eq->core, cc, 1); +} + +static irqreturn_t mlx5_ib_eq_pf_int(int irq, void *eq_ptr) +{ + struct mlx5_ib_pf_eq *eq = eq_ptr; + unsigned long flags; + + if (spin_trylock_irqsave(&eq->lock, flags)) { + mlx5_ib_eq_pf_process(eq); + spin_unlock_irqrestore(&eq->lock, flags); + } else { + schedule_work(&eq->work); + } + + return IRQ_HANDLED; +} + +/* mempool_refill() was proposed but unfortunately wasn't accepted + * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html + * Cheap workaround. + */ +static void mempool_refill(mempool_t *pool) +{ + while (pool->curr_nr < pool->min_nr) + mempool_free(mempool_alloc(pool, GFP_KERNEL), pool); +} + +static void mlx5_ib_eq_pf_action(struct work_struct *work) +{ + struct mlx5_ib_pf_eq *eq = + container_of(work, struct mlx5_ib_pf_eq, work); + + mempool_refill(eq->pool); + + spin_lock_irq(&eq->lock); + mlx5_ib_eq_pf_process(eq); + spin_unlock_irq(&eq->lock); +} + +enum { + MLX5_IB_NUM_PF_EQE = 0x1000, + MLX5_IB_NUM_PF_DRAIN = 64, +}; + +static int +mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) +{ + struct mlx5_eq_param param = {}; + int err; + + INIT_WORK(&eq->work, mlx5_ib_eq_pf_action); + spin_lock_init(&eq->lock); + eq->dev = dev; + + eq->pool = mempool_create_kmalloc_pool(MLX5_IB_NUM_PF_DRAIN, + sizeof(struct mlx5_pagefault)); + if (!eq->pool) + return -ENOMEM; + + eq->wq = alloc_workqueue("mlx5_ib_page_fault", + WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM, + MLX5_NUM_CMD_EQE); + if (!eq->wq) { + err = -ENOMEM; + goto err_mempool; + } + + param = (struct mlx5_eq_param) { + .index = MLX5_EQ_PFAULT_IDX, + .mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT, + .nent = MLX5_IB_NUM_PF_EQE, + .context = eq, + .handler = mlx5_ib_eq_pf_int + }; + eq->core = mlx5_eq_create_generic(dev->mdev, "mlx5_ib_page_fault_eq", ¶m); + if (IS_ERR(eq->core)) { + err = PTR_ERR(eq->core); + goto err_wq; + } + + return 0; +err_wq: + destroy_workqueue(eq->wq); +err_mempool: + mempool_destroy(eq->pool); + return err; +} + +static int +mlx5_ib_destroy_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) +{ + int err; + + err = mlx5_eq_destroy_generic(dev->mdev, eq->core); + cancel_work_sync(&eq->work); + destroy_workqueue(eq->wq); + mempool_destroy(eq->pool); + + return err; +} + void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) { if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) @@ -1226,7 +1495,7 @@ void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) { - int ret; + int ret = 0; if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) { ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey); @@ -1236,7 +1505,20 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) } } - return 0; + if (!MLX5_CAP_GEN(dev->mdev, pg)) + return ret; + + ret = mlx5_ib_create_pf_eq(dev, &dev->odp_pf_eq); + + return ret; +} + +void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *dev) +{ + if (!MLX5_CAP_GEN(dev->mdev, pg)) + return; + + mlx5_ib_destroy_pf_eq(dev, &dev->odp_pf_eq); } int mlx5_ib_odp_init(void) @@ -1246,4 +1528,3 @@ int mlx5_ib_odp_init(void) return 0; } - diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index d012e7dbcc38..91dcd3918d96 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -1,46 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright (c) 2013-2018, Mellanox Technologies inc. All rights reserved. */ #include <linux/module.h> #include <linux/mlx5/qp.h> -#include <linux/mlx5/srq.h> #include <linux/slab.h> #include <rdma/ib_umem.h> #include <rdma/ib_user_verbs.h> - #include "mlx5_ib.h" - -/* not supported currently */ -static int srq_signature; +#include "srq.h" static void *get_wqe(struct mlx5_ib_srq *srq, int n) { @@ -202,7 +171,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, err = -ENOMEM; goto err_in; } - srq->wq_sig = !!srq_signature; + srq->wq_sig = 0; in->log_page_size = srq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT; if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && @@ -327,7 +296,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, in.pd = to_mpd(pd)->pdn; in.db_record = srq->db.dma; - err = mlx5_core_create_srq(dev->mdev, &srq->msrq, &in); + err = mlx5_cmd_create_srq(dev, &srq->msrq, &in); kvfree(in.pas); if (err) { mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err); @@ -351,7 +320,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, return &srq->ibsrq; err_core: - mlx5_core_destroy_srq(dev->mdev, &srq->msrq); + mlx5_cmd_destroy_srq(dev, &srq->msrq); err_usr_kern_srq: if (pd->uobject) @@ -381,7 +350,7 @@ int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, return -EINVAL; mutex_lock(&srq->mutex); - ret = mlx5_core_arm_srq(dev->mdev, &srq->msrq, attr->srq_limit, 1); + ret = mlx5_cmd_arm_srq(dev, &srq->msrq, attr->srq_limit, 1); mutex_unlock(&srq->mutex); if (ret) @@ -402,7 +371,7 @@ int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) if (!out) return -ENOMEM; - ret = mlx5_core_query_srq(dev->mdev, &srq->msrq, out); + ret = mlx5_cmd_query_srq(dev, &srq->msrq, out); if (ret) goto out_box; @@ -420,7 +389,7 @@ int mlx5_ib_destroy_srq(struct ib_srq *srq) struct mlx5_ib_dev *dev = to_mdev(srq->device); struct mlx5_ib_srq *msrq = to_msrq(srq); - mlx5_core_destroy_srq(dev->mdev, &msrq->msrq); + mlx5_cmd_destroy_srq(dev, &msrq->msrq); if (srq->uobject) { mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db); diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h new file mode 100644 index 000000000000..75eb5839ae95 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/srq.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2013-2018, Mellanox Technologies. All rights reserved. + */ + +#ifndef MLX5_IB_SRQ_H +#define MLX5_IB_SRQ_H + +enum { + MLX5_SRQ_FLAG_ERR = (1 << 0), + MLX5_SRQ_FLAG_WQ_SIG = (1 << 1), + MLX5_SRQ_FLAG_RNDV = (1 << 2), +}; + +struct mlx5_srq_attr { + u32 type; + u32 flags; + u32 log_size; + u32 wqe_shift; + u32 log_page_size; + u32 wqe_cnt; + u32 srqn; + u32 xrcd; + u32 page_offset; + u32 cqn; + u32 pd; + u32 lwm; + u32 user_index; + u64 db_record; + __be64 *pas; + u32 tm_log_list_size; + u32 tm_next_tag; + u32 tm_hw_phase_cnt; + u32 tm_sw_phase_cnt; + u16 uid; +}; + +struct mlx5_ib_dev; + +struct mlx5_core_srq { + struct mlx5_core_rsc_common common; /* must be first */ + u32 srqn; + int max; + size_t max_gs; + size_t max_avail_gather; + int wqe_shift; + void (*event)(struct mlx5_core_srq *srq, enum mlx5_event e); + + atomic_t refcount; + struct completion free; + u16 uid; +}; + +struct mlx5_srq_table { + struct notifier_block nb; + /* protect radix tree + */ + spinlock_t lock; + struct radix_tree_root tree; +}; + +int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in); +int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq); +int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out); +int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + u16 lwm, int is_srq); +struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn); + +int mlx5_init_srq_table(struct mlx5_ib_dev *dev); +void mlx5_cleanup_srq_table(struct mlx5_ib_dev *dev); +#endif /* MLX5_IB_SRQ_H */ diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c new file mode 100644 index 000000000000..7aaaffbd4afa --- /dev/null +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -0,0 +1,722 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2013-2018, Mellanox Technologies inc. All rights reserved. + */ + +#include <linux/kernel.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#include "mlx5_ib.h" +#include "srq.h" + +static int get_pas_size(struct mlx5_srq_attr *in) +{ + u32 log_page_size = in->log_page_size + 12; + u32 log_srq_size = in->log_size; + u32 log_rq_stride = in->wqe_shift; + u32 page_offset = in->page_offset; + u32 po_quanta = 1 << (log_page_size - 6); + u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride); + u32 page_size = 1 << log_page_size; + u32 rq_sz_po = rq_sz + (page_offset * po_quanta); + u32 rq_num_pas = DIV_ROUND_UP(rq_sz_po, page_size); + + return rq_num_pas * sizeof(u64); +} + +static void set_wq(void *wq, struct mlx5_srq_attr *in) +{ + MLX5_SET(wq, wq, wq_signature, !!(in->flags + & MLX5_SRQ_FLAG_WQ_SIG)); + MLX5_SET(wq, wq, log_wq_pg_sz, in->log_page_size); + MLX5_SET(wq, wq, log_wq_stride, in->wqe_shift + 4); + MLX5_SET(wq, wq, log_wq_sz, in->log_size); + MLX5_SET(wq, wq, page_offset, in->page_offset); + MLX5_SET(wq, wq, lwm, in->lwm); + MLX5_SET(wq, wq, pd, in->pd); + MLX5_SET64(wq, wq, dbr_addr, in->db_record); +} + +static void set_srqc(void *srqc, struct mlx5_srq_attr *in) +{ + MLX5_SET(srqc, srqc, wq_signature, !!(in->flags + & MLX5_SRQ_FLAG_WQ_SIG)); + MLX5_SET(srqc, srqc, log_page_size, in->log_page_size); + MLX5_SET(srqc, srqc, log_rq_stride, in->wqe_shift); + MLX5_SET(srqc, srqc, log_srq_size, in->log_size); + MLX5_SET(srqc, srqc, page_offset, in->page_offset); + MLX5_SET(srqc, srqc, lwm, in->lwm); + MLX5_SET(srqc, srqc, pd, in->pd); + MLX5_SET64(srqc, srqc, dbr_addr, in->db_record); + MLX5_SET(srqc, srqc, xrcd, in->xrcd); + MLX5_SET(srqc, srqc, cqn, in->cqn); +} + +static void get_wq(void *wq, struct mlx5_srq_attr *in) +{ + if (MLX5_GET(wq, wq, wq_signature)) + in->flags &= MLX5_SRQ_FLAG_WQ_SIG; + in->log_page_size = MLX5_GET(wq, wq, log_wq_pg_sz); + in->wqe_shift = MLX5_GET(wq, wq, log_wq_stride) - 4; + in->log_size = MLX5_GET(wq, wq, log_wq_sz); + in->page_offset = MLX5_GET(wq, wq, page_offset); + in->lwm = MLX5_GET(wq, wq, lwm); + in->pd = MLX5_GET(wq, wq, pd); + in->db_record = MLX5_GET64(wq, wq, dbr_addr); +} + +static void get_srqc(void *srqc, struct mlx5_srq_attr *in) +{ + if (MLX5_GET(srqc, srqc, wq_signature)) + in->flags &= MLX5_SRQ_FLAG_WQ_SIG; + in->log_page_size = MLX5_GET(srqc, srqc, log_page_size); + in->wqe_shift = MLX5_GET(srqc, srqc, log_rq_stride); + in->log_size = MLX5_GET(srqc, srqc, log_srq_size); + in->page_offset = MLX5_GET(srqc, srqc, page_offset); + in->lwm = MLX5_GET(srqc, srqc, lwm); + in->pd = MLX5_GET(srqc, srqc, pd); + in->db_record = MLX5_GET64(srqc, srqc, dbr_addr); +} + +struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn) +{ + struct mlx5_srq_table *table = &dev->srq_table; + struct mlx5_core_srq *srq; + + spin_lock(&table->lock); + + srq = radix_tree_lookup(&table->tree, srqn); + if (srq) + atomic_inc(&srq->refcount); + + spin_unlock(&table->lock); + + return srq; +} + +static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0}; + void *create_in; + void *srqc; + void *pas; + int pas_size; + int inlen; + int err; + + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size; + create_in = kvzalloc(inlen, GFP_KERNEL); + if (!create_in) + return -ENOMEM; + + MLX5_SET(create_srq_in, create_in, uid, in->uid); + srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry); + pas = MLX5_ADDR_OF(create_srq_in, create_in, pas); + + set_srqc(srqc, in); + memcpy(pas, in->pas, pas_size); + + MLX5_SET(create_srq_in, create_in, opcode, + MLX5_CMD_OP_CREATE_SRQ); + + err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, + sizeof(create_out)); + kvfree(create_in); + if (!err) { + srq->srqn = MLX5_GET(create_srq_out, create_out, srqn); + srq->uid = in->uid; + } + + return err; +} + +static int destroy_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) +{ + u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0}; + u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0}; + + MLX5_SET(destroy_srq_in, srq_in, opcode, + MLX5_CMD_OP_DESTROY_SRQ); + MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn); + MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid); + + return mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out, + sizeof(srq_out)); +} + +static int arm_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + u16 lwm, int is_srq) +{ + u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; + u32 srq_out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; + + MLX5_SET(arm_rq_in, srq_in, opcode, MLX5_CMD_OP_ARM_RQ); + MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ); + MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn); + MLX5_SET(arm_rq_in, srq_in, lwm, lwm); + MLX5_SET(arm_rq_in, srq_in, uid, srq->uid); + + return mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out, + sizeof(srq_out)); +} + +static int query_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0}; + u32 *srq_out; + void *srqc; + int err; + + srq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_srq_out), GFP_KERNEL); + if (!srq_out) + return -ENOMEM; + + MLX5_SET(query_srq_in, srq_in, opcode, + MLX5_CMD_OP_QUERY_SRQ); + MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn); + err = mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out, + MLX5_ST_SZ_BYTES(query_srq_out)); + if (err) + goto out; + + srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry); + get_srqc(srqc, out); + if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD) + out->flags |= MLX5_SRQ_FLAG_ERR; +out: + kvfree(srq_out); + return err; +} + +static int create_xrc_srq_cmd(struct mlx5_ib_dev *dev, + struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)]; + void *create_in; + void *xrc_srqc; + void *pas; + int pas_size; + int inlen; + int err; + + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size; + create_in = kvzalloc(inlen, GFP_KERNEL); + if (!create_in) + return -ENOMEM; + + MLX5_SET(create_xrc_srq_in, create_in, uid, in->uid); + xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in, + xrc_srq_context_entry); + pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas); + + set_srqc(xrc_srqc, in); + MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index); + memcpy(pas, in->pas, pas_size); + MLX5_SET(create_xrc_srq_in, create_in, opcode, + MLX5_CMD_OP_CREATE_XRC_SRQ); + + memset(create_out, 0, sizeof(create_out)); + err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, + sizeof(create_out)); + if (err) + goto out; + + srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn); + srq->uid = in->uid; +out: + kvfree(create_in); + return err; +} + +static int destroy_xrc_srq_cmd(struct mlx5_ib_dev *dev, + struct mlx5_core_srq *srq) +{ + u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0}; + u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0}; + + MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode, + MLX5_CMD_OP_DESTROY_XRC_SRQ); + MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid); + + return mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, sizeof(xrcsrq_out)); +} + +static int arm_xrc_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + u16 lwm) +{ + u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; + u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; + + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid); + + return mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, sizeof(xrcsrq_out)); +} + +static int query_xrc_srq_cmd(struct mlx5_ib_dev *dev, + struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)]; + u32 *xrcsrq_out; + void *xrc_srqc; + int err; + + xrcsrq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out), GFP_KERNEL); + if (!xrcsrq_out) + return -ENOMEM; + memset(xrcsrq_in, 0, sizeof(xrcsrq_in)); + + MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode, + MLX5_CMD_OP_QUERY_XRC_SRQ); + MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + + err = mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, MLX5_ST_SZ_BYTES(query_xrc_srq_out)); + if (err) + goto out; + + xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out, + xrc_srq_context_entry); + get_srqc(xrc_srqc, out); + if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD) + out->flags |= MLX5_SRQ_FLAG_ERR; + +out: + kvfree(xrcsrq_out); + return err; +} + +static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + void *create_out = NULL; + void *create_in = NULL; + void *rmpc; + void *wq; + int pas_size; + int outlen; + int inlen; + int err; + + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; + outlen = MLX5_ST_SZ_BYTES(create_rmp_out); + create_in = kvzalloc(inlen, GFP_KERNEL); + create_out = kvzalloc(outlen, GFP_KERNEL); + if (!create_in || !create_out) { + err = -ENOMEM; + goto out; + } + + rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx); + wq = MLX5_ADDR_OF(rmpc, rmpc, wq); + + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + MLX5_SET(create_rmp_in, create_in, uid, in->uid); + set_wq(wq, in); + memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); + + MLX5_SET(create_rmp_in, create_in, opcode, MLX5_CMD_OP_CREATE_RMP); + err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, outlen); + if (!err) { + srq->srqn = MLX5_GET(create_rmp_out, create_out, rmpn); + srq->uid = in->uid; + } + +out: + kvfree(create_in); + kvfree(create_out); + return err; +} + +static int destroy_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {}; + + MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP); + MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn); + MLX5_SET(destroy_rmp_in, in, uid, srq->uid); + return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); +} + +static int arm_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + u16 lwm) +{ + void *out = NULL; + void *in = NULL; + void *rmpc; + void *wq; + void *bitmask; + int outlen; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_rmp_in); + outlen = MLX5_ST_SZ_BYTES(modify_rmp_out); + + in = kvzalloc(inlen, GFP_KERNEL); + out = kvzalloc(outlen, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx); + bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask); + wq = MLX5_ADDR_OF(rmpc, rmpc, wq); + + MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY); + MLX5_SET(modify_rmp_in, in, rmpn, srq->srqn); + MLX5_SET(modify_rmp_in, in, uid, srq->uid); + MLX5_SET(wq, wq, lwm, lwm); + MLX5_SET(rmp_bitmask, bitmask, lwm, 1); + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP); + + err = mlx5_cmd_exec(dev->mdev, in, inlen, out, outlen); + +out: + kvfree(in); + kvfree(out); + return err; +} + +static int query_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + u32 *rmp_out = NULL; + u32 *rmp_in = NULL; + void *rmpc; + int outlen; + int inlen; + int err; + + outlen = MLX5_ST_SZ_BYTES(query_rmp_out); + inlen = MLX5_ST_SZ_BYTES(query_rmp_in); + + rmp_out = kvzalloc(outlen, GFP_KERNEL); + rmp_in = kvzalloc(inlen, GFP_KERNEL); + if (!rmp_out || !rmp_in) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(query_rmp_in, rmp_in, opcode, MLX5_CMD_OP_QUERY_RMP); + MLX5_SET(query_rmp_in, rmp_in, rmpn, srq->srqn); + err = mlx5_cmd_exec(dev->mdev, rmp_in, inlen, rmp_out, outlen); + if (err) + goto out; + + rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context); + get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out); + if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY) + out->flags |= MLX5_SRQ_FLAG_ERR; + +out: + kvfree(rmp_out); + kvfree(rmp_in); + return err; +} + +static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + u32 create_out[MLX5_ST_SZ_DW(create_xrq_out)] = {0}; + void *create_in; + void *xrqc; + void *wq; + int pas_size; + int inlen; + int err; + + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size; + create_in = kvzalloc(inlen, GFP_KERNEL); + if (!create_in) + return -ENOMEM; + + xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context); + wq = MLX5_ADDR_OF(xrqc, xrqc, wq); + + set_wq(wq, in); + memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size); + + if (in->type == IB_SRQT_TM) { + MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING); + if (in->flags & MLX5_SRQ_FLAG_RNDV) + MLX5_SET(xrqc, xrqc, offload, MLX5_XRQC_OFFLOAD_RNDV); + MLX5_SET(xrqc, xrqc, + tag_matching_topology_context.log_matching_list_sz, + in->tm_log_list_size); + } + MLX5_SET(xrqc, xrqc, user_index, in->user_index); + MLX5_SET(xrqc, xrqc, cqn, in->cqn); + MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ); + MLX5_SET(create_xrq_in, create_in, uid, in->uid); + err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, + sizeof(create_out)); + kvfree(create_in); + if (!err) { + srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn); + srq->uid = in->uid; + } + + return err; +} + +static int destroy_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) +{ + u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0}; + + MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ); + MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn); + MLX5_SET(destroy_xrq_in, in, uid, srq->uid); + + return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); +} + +static int arm_xrq_cmd(struct mlx5_ib_dev *dev, + struct mlx5_core_srq *srq, + u16 lwm) +{ + u32 out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; + + MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ); + MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ); + MLX5_SET(arm_rq_in, in, srq_number, srq->srqn); + MLX5_SET(arm_rq_in, in, lwm, lwm); + MLX5_SET(arm_rq_in, in, uid, srq->uid); + + return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); +} + +static int query_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0}; + u32 *xrq_out; + int outlen = MLX5_ST_SZ_BYTES(query_xrq_out); + void *xrqc; + int err; + + xrq_out = kvzalloc(outlen, GFP_KERNEL); + if (!xrq_out) + return -ENOMEM; + + MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ); + MLX5_SET(query_xrq_in, in, xrqn, srq->srqn); + + err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), xrq_out, outlen); + if (err) + goto out; + + xrqc = MLX5_ADDR_OF(query_xrq_out, xrq_out, xrq_context); + get_wq(MLX5_ADDR_OF(xrqc, xrqc, wq), out); + if (MLX5_GET(xrqc, xrqc, state) != MLX5_XRQC_STATE_GOOD) + out->flags |= MLX5_SRQ_FLAG_ERR; + out->tm_next_tag = + MLX5_GET(xrqc, xrqc, + tag_matching_topology_context.append_next_index); + out->tm_hw_phase_cnt = + MLX5_GET(xrqc, xrqc, + tag_matching_topology_context.hw_phase_cnt); + out->tm_sw_phase_cnt = + MLX5_GET(xrqc, xrqc, + tag_matching_topology_context.sw_phase_cnt); + +out: + kvfree(xrq_out); + return err; +} + +static int create_srq_split(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + if (!dev->mdev->issi) + return create_srq_cmd(dev, srq, in); + switch (srq->common.res) { + case MLX5_RES_XSRQ: + return create_xrc_srq_cmd(dev, srq, in); + case MLX5_RES_XRQ: + return create_xrq_cmd(dev, srq, in); + default: + return create_rmp_cmd(dev, srq, in); + } +} + +static int destroy_srq_split(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) +{ + if (!dev->mdev->issi) + return destroy_srq_cmd(dev, srq); + switch (srq->common.res) { + case MLX5_RES_XSRQ: + return destroy_xrc_srq_cmd(dev, srq); + case MLX5_RES_XRQ: + return destroy_xrq_cmd(dev, srq); + default: + return destroy_rmp_cmd(dev, srq); + } +} + +int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + struct mlx5_srq_table *table = &dev->srq_table; + int err; + + switch (in->type) { + case IB_SRQT_XRC: + srq->common.res = MLX5_RES_XSRQ; + break; + case IB_SRQT_TM: + srq->common.res = MLX5_RES_XRQ; + break; + default: + srq->common.res = MLX5_RES_SRQ; + } + + err = create_srq_split(dev, srq, in); + if (err) + return err; + + atomic_set(&srq->refcount, 1); + init_completion(&srq->free); + + spin_lock_irq(&table->lock); + err = radix_tree_insert(&table->tree, srq->srqn, srq); + spin_unlock_irq(&table->lock); + if (err) + goto err_destroy_srq_split; + + return 0; + +err_destroy_srq_split: + destroy_srq_split(dev, srq); + + return err; +} + +int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) +{ + struct mlx5_srq_table *table = &dev->srq_table; + struct mlx5_core_srq *tmp; + int err; + + spin_lock_irq(&table->lock); + tmp = radix_tree_delete(&table->tree, srq->srqn); + spin_unlock_irq(&table->lock); + if (!tmp || tmp != srq) + return -EINVAL; + + err = destroy_srq_split(dev, srq); + if (err) + return err; + + if (atomic_dec_and_test(&srq->refcount)) + complete(&srq->free); + wait_for_completion(&srq->free); + + return 0; +} + +int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + if (!dev->mdev->issi) + return query_srq_cmd(dev, srq, out); + switch (srq->common.res) { + case MLX5_RES_XSRQ: + return query_xrc_srq_cmd(dev, srq, out); + case MLX5_RES_XRQ: + return query_xrq_cmd(dev, srq, out); + default: + return query_rmp_cmd(dev, srq, out); + } +} + +int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + u16 lwm, int is_srq) +{ + if (!dev->mdev->issi) + return arm_srq_cmd(dev, srq, lwm, is_srq); + switch (srq->common.res) { + case MLX5_RES_XSRQ: + return arm_xrc_srq_cmd(dev, srq, lwm); + case MLX5_RES_XRQ: + return arm_xrq_cmd(dev, srq, lwm); + default: + return arm_rmp_cmd(dev, srq, lwm); + } +} + +static int srq_event_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_srq_table *table; + struct mlx5_core_srq *srq; + struct mlx5_eqe *eqe; + u32 srqn; + + if (type != MLX5_EVENT_TYPE_SRQ_CATAS_ERROR && + type != MLX5_EVENT_TYPE_SRQ_RQ_LIMIT) + return NOTIFY_DONE; + + table = container_of(nb, struct mlx5_srq_table, nb); + + eqe = data; + srqn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; + + spin_lock(&table->lock); + + srq = radix_tree_lookup(&table->tree, srqn); + if (srq) + atomic_inc(&srq->refcount); + + spin_unlock(&table->lock); + + if (!srq) + return NOTIFY_OK; + + srq->event(srq, eqe->type); + + if (atomic_dec_and_test(&srq->refcount)) + complete(&srq->free); + + return NOTIFY_OK; +} + +int mlx5_init_srq_table(struct mlx5_ib_dev *dev) +{ + struct mlx5_srq_table *table = &dev->srq_table; + + memset(table, 0, sizeof(*table)); + spin_lock_init(&table->lock); + INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); + + table->nb.notifier_call = srq_event_notifier; + mlx5_notifier_register(dev->mdev, &table->nb); + + return 0; +} + +void mlx5_cleanup_srq_table(struct mlx5_ib_dev *dev) +{ + struct mlx5_srq_table *table = &dev->srq_table; + + mlx5_notifier_unregister(dev->mdev, &table->nb); +} |