From c74d90c11c05bdfd78f8e29ee96b8a6f23daea99 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 7 Nov 2018 20:31:37 +0200 Subject: net/mlx5: Fix offsets of ifc reserved fields Fix wrong offsets of reserved fields in ifc file. Issues found using pahole. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 34e17e6f8942..6f64e814cc10 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -349,7 +349,7 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 reformat_l3_tunnel_to_l2[0x1]; u8 reformat_l2_to_l3_tunnel[0x1]; u8 reformat_and_modify_action[0x1]; - u8 reserved_at_14[0xb]; + u8 reserved_at_15[0xb]; u8 reserved_at_20[0x2]; u8 log_max_ft_size[0x6]; u8 log_max_modify_header_context[0x8]; @@ -586,7 +586,7 @@ struct mlx5_ifc_flow_table_eswitch_cap_bits { u8 fdb_multi_path_to_table[0x1]; u8 reserved_at_1d[0x1]; u8 multi_fdb_encap[0x1]; - u8 reserved_at_1e[0x1e1]; + u8 reserved_at_1f[0x1e1]; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_esw_fdb; @@ -829,7 +829,7 @@ struct mlx5_ifc_vector_calc_cap_bits { struct mlx5_ifc_calc_op calc2; struct mlx5_ifc_calc_op calc3; - u8 reserved_at_e0[0x720]; + u8 reserved_at_c0[0x720]; }; enum { @@ -5567,7 +5567,7 @@ struct mlx5_ifc_modify_nic_vport_context_out_bits { struct mlx5_ifc_modify_nic_vport_field_select_bits { u8 reserved_at_0[0x12]; u8 affiliation[0x1]; - u8 reserved_at_e[0x1]; + u8 reserved_at_13[0x1]; u8 disable_uc_local_lb[0x1]; u8 disable_mc_local_lb[0x1]; u8 node_guid[0x1]; @@ -9028,7 +9028,7 @@ struct mlx5_ifc_dcbx_param_bits { u8 dcbx_cee_cap[0x1]; u8 dcbx_ieee_cap[0x1]; u8 dcbx_standby_cap[0x1]; - u8 reserved_at_0[0x5]; + u8 reserved_at_3[0x5]; u8 port_number[0x8]; u8 reserved_at_10[0xa]; u8 max_application_table_size[6]; -- cgit v1.2.3 From 698114968a22f6c0c9f42e983ba033cc36bb7217 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 8 Nov 2018 21:10:08 +0200 Subject: net/mlx5: Release resource on error flow Fix reference counting leakage when the event handler aborts due to an unsupported event for the resource type. Fixes: a14c2d4beee5 ("net/mlx5_core: Warn on unsupported events of QP/RQ/SQ") Signed-off-by: Moni Shoua Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/qp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index 91b8139a388d..690dc1dd9391 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -132,7 +132,7 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type)) { mlx5_core_warn(dev, "event 0x%.2x is not allowed on resource 0x%.8x\n", event_type, rsn); - return; + goto out; } switch (common->res) { @@ -150,7 +150,7 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) default: mlx5_core_warn(dev, "invalid resource type for 0x%x\n", rsn); } - +out: mlx5_core_put_rsc(common); } -- cgit v1.2.3 From 27e95603f4dfec470c6d26bea5174aa71b30e971 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 8 Nov 2018 21:10:10 +0200 Subject: net/mlx5: Add interface to hold and release core resources Sometimes upper layers may want to prevent the destruction of a core resource for a period of time while work on that resource is in progress. Add API to support this. Signed-off-by: Moni Shoua Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/qp.c | 16 ++++++++++++++++ include/linux/mlx5/qp.h | 5 +++++ 2 files changed, 21 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index 690dc1dd9391..cba4a435043a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -670,3 +670,19 @@ int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id, return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); } EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter); + +struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_core_dev *dev, + int res_num, + enum mlx5_res_type res_type) +{ + u32 rsn = res_num | (res_type << MLX5_USER_INDEX_LEN); + + return mlx5_get_rsc(dev, rsn); +} +EXPORT_SYMBOL_GPL(mlx5_core_res_hold); + +void mlx5_core_res_put(struct mlx5_core_rsc_common *res) +{ + mlx5_core_put_rsc(res); +} +EXPORT_SYMBOL_GPL(mlx5_core_res_put); diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index fbe322c966bc..b26ea9077384 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -596,6 +596,11 @@ int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id); int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id, int reset, void *out, int out_size); +struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_core_dev *dev, + int res_num, + enum mlx5_res_type res_type); +void mlx5_core_res_put(struct mlx5_core_rsc_common *res); + static inline const char *mlx5_qp_type_str(int type) { switch (type) { -- cgit v1.2.3 From c99fefea2cc907c98e7f39b3571bb697c8d42106 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 8 Nov 2018 21:10:11 +0200 Subject: net/mlx5: Enumerate page fault types Give meaningful names to type of WQE page faults. Signed-off-by: Moni Shoua Signed-off-by: Leon Romanovsky --- include/linux/mlx5/device.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index b4c0457fbebd..e326524bafcc 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -212,6 +212,13 @@ enum { MLX5_PFAULT_SUBTYPE_RDMA = 1, }; +enum wqe_page_fault_type { + MLX5_WQE_PF_TYPE_RMP = 0, + MLX5_WQE_PF_TYPE_REQ_SEND_OR_WRITE = 1, + MLX5_WQE_PF_TYPE_RESP = 2, + MLX5_WQE_PF_TYPE_REQ_READ_OR_ATOMIC = 3, +}; + enum { MLX5_PERM_LOCAL_READ = 1 << 2, MLX5_PERM_LOCAL_WRITE = 1 << 3, -- cgit v1.2.3 From 032080ab43ac02cf7446b3954ace7ccce1abcad6 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 8 Nov 2018 21:10:12 +0200 Subject: IB/mlx5: Lock QP during page fault handling When page fault event for a WQE arrives, the event data contains the resource (e.g. QP) number which will later be used by the page fault handler to retrieve the resource. Meanwhile, another context can destroy the resource and cause use-after-free. To avoid that, take a reference on the resource when handler starts and release it when it ends. Page fault events for RDMA operations don't need to be protected because the driver doesn't need to access the QP in the page fault handler. Fixes: d9aaed838765 ("{net,IB}/mlx5: Refactor page fault handling") Signed-off-by: Moni Shoua Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/odp.c | 46 ++++++++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index b04eb6775326..abce55b8b9ba 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1016,16 +1016,31 @@ invalid_transport_or_opcode: return 0; } -static struct mlx5_ib_qp *mlx5_ib_odp_find_qp(struct mlx5_ib_dev *dev, - u32 wq_num) +static inline struct mlx5_core_rsc_common *odp_get_rsc(struct mlx5_ib_dev *dev, + u32 wq_num, int pf_type) { - struct mlx5_core_qp *mqp = __mlx5_qp_lookup(dev->mdev, wq_num); + enum mlx5_res_type res_type; - if (!mqp) { - mlx5_ib_err(dev, "QPN 0x%6x not found\n", wq_num); + switch (pf_type) { + case MLX5_WQE_PF_TYPE_RMP: + res_type = MLX5_RES_SRQ; + break; + case MLX5_WQE_PF_TYPE_REQ_SEND_OR_WRITE: + case MLX5_WQE_PF_TYPE_RESP: + case MLX5_WQE_PF_TYPE_REQ_READ_OR_ATOMIC: + res_type = MLX5_RES_QP; + break; + default: return NULL; } + return mlx5_core_res_hold(dev->mdev, wq_num, res_type); +} + +static inline struct mlx5_ib_qp *res_to_qp(struct mlx5_core_rsc_common *res) +{ + struct mlx5_core_qp *mqp = (struct mlx5_core_qp *)res; + return to_mibqp(mqp); } @@ -1039,18 +1054,30 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, int resume_with_error = 1; u16 wqe_index = pfault->wqe.wqe_index; int requestor = pfault->type & MLX5_PFAULT_REQUESTOR; + struct mlx5_core_rsc_common *res; struct mlx5_ib_qp *qp; + res = odp_get_rsc(dev, pfault->wqe.wq_num, pfault->type); + if (!res) { + mlx5_ib_dbg(dev, "wqe page fault for missing resource %d\n", pfault->wqe.wq_num); + return; + } + + switch (res->res) { + case MLX5_RES_QP: + qp = res_to_qp(res); + break; + default: + mlx5_ib_err(dev, "wqe page fault for unsupported type %d\n", pfault->type); + goto resolve_page_fault; + } + buffer = (char *)__get_free_page(GFP_KERNEL); if (!buffer) { mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n"); goto resolve_page_fault; } - qp = mlx5_ib_odp_find_qp(dev, pfault->wqe.wq_num); - if (!qp) - goto resolve_page_fault; - ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer, PAGE_SIZE, &qp->trans_qp.base); if (ret < 0) { @@ -1090,6 +1117,7 @@ resolve_page_fault: mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, type: 0x%x\n", pfault->wqe.wq_num, resume_with_error, pfault->type); + mlx5_core_res_put(res); free_page((unsigned long)buffer); } -- cgit v1.2.3 From ef90c5e9757d5abc80b947cbcaacc43d1e916488 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 8 Nov 2018 21:10:13 +0200 Subject: net/mlx5: Return success for PAGE_FAULT_RESUME in internal error state When the device is in internal error state, command interface isn't accessible and the driver decides which commands to fail and which to pass. Move the PAGE_FAULT_RESUME command to the pass list in order to avoid redundant failure messages. Fixes: 89d44f0a6c73 ("net/mlx5_core: Add pci error handlers to mlx5_core driver") Signed-off-by: Moni Shoua Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index a5a0823e5ada..7b18aff955f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -313,6 +313,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_FPGA_DESTROY_QP: case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT: case MLX5_CMD_OP_DEALLOC_MEMIC: + case MLX5_CMD_OP_PAGE_FAULT_RESUME: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -326,7 +327,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_CREATE_MKEY: case MLX5_CMD_OP_QUERY_MKEY: case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS: - case MLX5_CMD_OP_PAGE_FAULT_RESUME: case MLX5_CMD_OP_CREATE_EQ: case MLX5_CMD_OP_QUERY_EQ: case MLX5_CMD_OP_GEN_EQE: -- cgit v1.2.3 From 90290db7669ba680b37b7006cbf6e5cee6cba779 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 8 Nov 2018 21:10:14 +0200 Subject: net/mlx5: Use multi threaded workqueue for page fault handling Page fault events are processed in a workqueue context. Since each QP can have up to two concurrent unrelated page-faults, one for requester and one for responder, page-fault handling can be done in parallel. Achieve this by changing the workqueue to be multi-threaded. The number of threads is the same as the number of command interface channels to avoid command interface bottlenecks. In addition to multi-threads, change the workqueue flags to give it high priority. Stress benchmark shows that before this change 85% of page faults were waiting in queue 8 seconds or more while after the change 98% of page faults were waiting in queue 64 milliseconds or less. The number of threads was chosen as the number of channels to the command interface. Fixes: d9aaed838765 ("{net,IB}/mlx5: Refactor page fault handling") Signed-off-by: Moni Shoua Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index c1e1a16a9b07..aeab0c4f60f4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -356,8 +356,9 @@ static int init_pf_ctx(struct mlx5_eq_pagefault *pf_ctx, const char *name) spin_lock_init(&pf_ctx->lock); INIT_WORK(&pf_ctx->work, eq_pf_action); - pf_ctx->wq = alloc_ordered_workqueue(name, - WQ_MEM_RECLAIM); + pf_ctx->wq = alloc_workqueue(name, + WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM, + MLX5_NUM_CMD_EQE); if (!pf_ctx->wq) return -ENOMEM; -- cgit v1.2.3 From b02394aa75e3942bea8dac6efc7f1a179fbe011f Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 8 Nov 2018 21:10:17 +0200 Subject: IB/mlx5: Improve ODP debugging messages Add and modify debug messages to ODP related error flows. In that context, return code EAGAIN is considered less severe and print level for it is set debug instead of warn. Signed-off-by: Moni Shoua Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/umem_odp.c | 14 ++++++++++++-- drivers/infiniband/hw/mlx5/odp.c | 4 ++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 2b4c5e7dd5a1..e334480b56bf 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -655,8 +655,13 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, flags, local_page_list, NULL, NULL); up_read(&owning_mm->mmap_sem); - if (npages < 0) + if (npages < 0) { + if (npages != -EAGAIN) + pr_warn("fail to get %zu user pages with error %d\n", gup_num_pages, npages); + else + pr_debug("fail to get %zu user pages with error %d\n", gup_num_pages, npages); break; + } bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt); mutex_lock(&umem_odp->umem_mutex); @@ -674,8 +679,13 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, ret = ib_umem_odp_map_dma_single_page( umem_odp, k, local_page_list[j], access_mask, current_seq); - if (ret < 0) + if (ret < 0) { + if (ret != -EAGAIN) + pr_warn("ib_umem_odp_map_dma_single_page failed with error %d\n", ret); + else + pr_debug("ib_umem_odp_map_dma_single_page failed with error %d\n", ret); break; + } p = page_to_phys(local_page_list[j]); k++; diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index abce55b8b9ba..7d784b40e017 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -607,8 +607,8 @@ out: if (!wait_for_completion_timeout( &odp->notifier_completion, timeout)) { - mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d\n", - current_seq, odp->notifiers_seq); + mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n", + current_seq, odp->notifiers_seq, odp->notifiers_count); } } else { /* The MR is being killed, kill the QP as well. */ -- cgit v1.2.3 From 1e86ace4c140fd5a693e266c9b23409358f25381 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 19 Nov 2018 10:52:31 -0800 Subject: net/mlx5: EQ, Use the right place to store/read IRQ affinity hint Currently the cpu affinity hint mask for completion EQs is stored and read from the wrong place, since reading and storing is done from the same index, there is no actual issue with that, but internal irq_info for completion EQs stars at MLX5_EQ_VEC_COMP_BASE offset in irq_info array, this patch changes the code to use the correct offset to store and read the IRQ affinity hint. Signed-off-by: Saeed Mahameed Reviewed-by: Leon Romanovsky Reviewed-by: Tariq Toukan Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 14 ++++++++------ include/linux/mlx5/driver.h | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 1243edbedc9e..2839c30dd3a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1760,7 +1760,7 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq) static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix) { - return cpumask_first(priv->mdev->priv.irq_info[ix].mask); + return cpumask_first(priv->mdev->priv.irq_info[ix + MLX5_EQ_VEC_COMP_BASE].mask); } static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 28132c7dc05f..d5cea0a36e6a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -640,18 +640,19 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev) static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) { struct mlx5_priv *priv = &mdev->priv; - int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i); + int vecidx = MLX5_EQ_VEC_COMP_BASE + i; + int irq = pci_irq_vector(mdev->pdev, vecidx); - if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) { + if (!zalloc_cpumask_var(&priv->irq_info[vecidx].mask, GFP_KERNEL)) { mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); return -ENOMEM; } cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), - priv->irq_info[i].mask); + priv->irq_info[vecidx].mask); if (IS_ENABLED(CONFIG_SMP) && - irq_set_affinity_hint(irq, priv->irq_info[i].mask)) + irq_set_affinity_hint(irq, priv->irq_info[vecidx].mask)) mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); return 0; @@ -659,11 +660,12 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i) { + int vecidx = MLX5_EQ_VEC_COMP_BASE + i; struct mlx5_priv *priv = &mdev->priv; - int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i); + int irq = pci_irq_vector(mdev->pdev, vecidx); irq_set_affinity_hint(irq, NULL); - free_cpumask_var(priv->irq_info[i].mask); + free_cpumask_var(priv->irq_info[vecidx].mask); } static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index aa5963b5d38e..7d4ed995b4ce 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1309,7 +1309,7 @@ enum { static inline const struct cpumask * mlx5_get_vector_affinity_hint(struct mlx5_core_dev *dev, int vector) { - return dev->priv.irq_info[vector].mask; + return dev->priv.irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask; } #endif /* MLX5_DRIVER_H */ -- cgit v1.2.3 From 4de45c758636c37efd313589f91c739f613fbe7d Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 19 Nov 2018 10:52:32 -0800 Subject: net/mlx5: EQ, Remove unused fields and structures Some fields and structures are not referenced nor used by the driver, remove them. Signed-off-by: Saeed Mahameed Reviewed-by: Leon Romanovsky Reviewed-by: Tariq Toukan Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 11 ----------- include/linux/mlx5/driver.h | 3 --- 2 files changed, 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index aeab0c4f60f4..fd5926daa0a6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -78,17 +78,6 @@ enum { (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \ (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT)) -struct map_eq_in { - u64 mask; - u32 reserved; - u32 unmap_eqn; -}; - -struct cre_des_eq { - u8 reserved[15]; - u8 eqn; -}; - static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) { u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0}; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7d4ed995b4ce..15cf6727a62d 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -398,7 +398,6 @@ struct mlx5_eq { unsigned int irqn; u8 eqn; int nent; - u64 mask; struct list_head list; int index; struct mlx5_rsc_debug *dbg; @@ -478,8 +477,6 @@ struct mlx5_core_srq { }; struct mlx5_eq_table { - void __iomem *update_ci; - void __iomem *update_arm_ci; struct list_head comp_eqs_list; struct mlx5_eq pages_eq; struct mlx5_eq async_eq; -- cgit v1.2.3 From 2883f352571b9b830561ca21b8a666936366a120 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 19 Nov 2018 10:52:33 -0800 Subject: net/mlx5: EQ, No need to store eq index as a field eq->index is used only for completion EQs and is assigned to be the completion eq index, it is used only when traversing the completion eqs list, and it can be calculated dynamically, thus remove the eq->index field. Signed-off-by: Saeed Mahameed Reviewed-by: Leon Romanovsky Reviewed-by: Tariq Toukan Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 4 ++-- include/linux/mlx5/driver.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index d5cea0a36e6a..f5e6d375a8cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -702,10 +702,11 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, struct mlx5_eq_table *table = &dev->priv.eq_table; struct mlx5_eq *eq, *n; int err = -ENOENT; + int i = 0; spin_lock(&table->lock); list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { - if (eq->index == vector) { + if (i++ == vector) { *eqn = eq->eqn; *irqn = eq->irqn; err = 0; @@ -797,7 +798,6 @@ static int alloc_comp_eqs(struct mlx5_core_dev *dev) goto clean; } mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn); - eq->index = i; spin_lock(&table->lock); list_add_tail(&eq->list, &table->comp_eqs_list); spin_unlock(&table->lock); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 15cf6727a62d..4b62d71825c1 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -399,7 +399,6 @@ struct mlx5_eq { u8 eqn; int nent; struct list_head list; - int index; struct mlx5_rsc_debug *dbg; enum mlx5_eq_type type; union { -- cgit v1.2.3 From aaa553a64438640ee4e41a2c1027c3435a75c0e7 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 19 Nov 2018 10:52:34 -0800 Subject: net/mlx5: EQ, Remove redundant completion EQ list lock Completion EQs list is only modified on driver load/unload, locking is not required, remove it. Signed-off-by: Saeed Mahameed Reviewed-by: Leon Romanovsky Reviewed-by: Tariq Toukan Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 -- drivers/net/ethernet/mellanox/mlx5/core/main.c | 17 +++-------------- include/linux/mlx5/driver.h | 3 --- 3 files changed, 3 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index fd5926daa0a6..e75272503027 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -810,8 +810,6 @@ int mlx5_eq_init(struct mlx5_core_dev *dev) { int err; - spin_lock_init(&dev->priv.eq_table.lock); - err = mlx5_eq_debugfs_init(dev); return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index f5e6d375a8cc..f692c2a42130 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -704,7 +704,6 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, int err = -ENOENT; int i = 0; - spin_lock(&table->lock); list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { if (i++ == vector) { *eqn = eq->eqn; @@ -713,7 +712,6 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, break; } } - spin_unlock(&table->lock); return err; } @@ -724,14 +722,11 @@ struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn) struct mlx5_eq_table *table = &dev->priv.eq_table; struct mlx5_eq *eq; - spin_lock(&table->lock); - list_for_each_entry(eq, &table->comp_eqs_list, list) - if (eq->eqn == eqn) { - spin_unlock(&table->lock); + list_for_each_entry(eq, &table->comp_eqs_list, list) { + if (eq->eqn == eqn) return eq; - } + } - spin_unlock(&table->lock); return ERR_PTR(-ENOENT); } @@ -747,17 +742,13 @@ static void free_comp_eqs(struct mlx5_core_dev *dev) dev->rmap = NULL; } #endif - spin_lock(&table->lock); list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { list_del(&eq->list); - spin_unlock(&table->lock); if (mlx5_destroy_unmap_eq(dev, eq)) mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn); kfree(eq); - spin_lock(&table->lock); } - spin_unlock(&table->lock); } static int alloc_comp_eqs(struct mlx5_core_dev *dev) @@ -798,9 +789,7 @@ static int alloc_comp_eqs(struct mlx5_core_dev *dev) goto clean; } mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn); - spin_lock(&table->lock); list_add_tail(&eq->list, &table->comp_eqs_list); - spin_unlock(&table->lock); } return 0; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 4b62d71825c1..852e397c7624 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -484,9 +484,6 @@ struct mlx5_eq_table { struct mlx5_eq pfault_eq; #endif int num_comp_vectors; - /* protect EQs list - */ - spinlock_t lock; }; struct mlx5_uars_page { -- cgit v1.2.3 From ca828cb4686f1aece8d30541e11b8e21de1a7b0e Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 19 Nov 2018 10:52:35 -0800 Subject: net/mlx5: EQ, Move all EQ logic to eq.c Move completion EQs flows from main.c to eq.c, reasons: 1) It is where this logic belongs. 2) It will help centralize the EQ logic in one file for downstream refactoring, and future extensions/updates. Signed-off-by: Saeed Mahameed Reviewed-by: Leon Romanovsky Reviewed-by: Tariq Toukan Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 176 ++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/main.c | 179 +-------------------- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 + 3 files changed, 181 insertions(+), 176 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index e75272503027..4d79a4ccb758 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -820,6 +820,8 @@ void mlx5_eq_cleanup(struct mlx5_core_dev *dev) mlx5_eq_debugfs_cleanup(dev); } +/* Async EQs */ + int mlx5_start_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = &dev->priv.eq_table; @@ -953,12 +955,186 @@ int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } +/* Completion EQs */ + +static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + struct mlx5_priv *priv = &mdev->priv; + int vecidx = MLX5_EQ_VEC_COMP_BASE + i; + int irq = pci_irq_vector(mdev->pdev, vecidx); + + if (!zalloc_cpumask_var(&priv->irq_info[vecidx].mask, GFP_KERNEL)) { + mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); + return -ENOMEM; + } + + cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), + priv->irq_info[vecidx].mask); + + if (IS_ENABLED(CONFIG_SMP) && + irq_set_affinity_hint(irq, priv->irq_info[vecidx].mask)) + mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); + + return 0; +} + +static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + int vecidx = MLX5_EQ_VEC_COMP_BASE + i; + struct mlx5_priv *priv = &mdev->priv; + int irq = pci_irq_vector(mdev->pdev, vecidx); + + irq_set_affinity_hint(irq, NULL); + free_cpumask_var(priv->irq_info[vecidx].mask); +} + +static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev) +{ + int err; + int i; + + for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) { + err = mlx5_irq_set_affinity_hint(mdev, i); + if (err) + goto err_out; + } + + return 0; + +err_out: + for (i--; i >= 0; i--) + mlx5_irq_clear_affinity_hint(mdev, i); + + return err; +} + +static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev) +{ + int i; + + for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) + mlx5_irq_clear_affinity_hint(mdev, i); +} + +void mlx5_free_comp_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq *eq, *n; + + mlx5_irq_clear_affinity_hints(dev); + +#ifdef CONFIG_RFS_ACCEL + if (dev->rmap) { + free_irq_cpu_rmap(dev->rmap); + dev->rmap = NULL; + } +#endif + list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { + list_del(&eq->list); + if (mlx5_destroy_unmap_eq(dev, eq)) + mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n", + eq->eqn); + kfree(eq); + } +} + +int mlx5_alloc_comp_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + char name[MLX5_MAX_IRQ_NAME]; + struct mlx5_eq *eq; + int ncomp_vec; + int nent; + int err; + int i; + + INIT_LIST_HEAD(&table->comp_eqs_list); + ncomp_vec = table->num_comp_vectors; + nent = MLX5_COMP_EQ_SIZE; +#ifdef CONFIG_RFS_ACCEL + dev->rmap = alloc_irq_cpu_rmap(ncomp_vec); + if (!dev->rmap) + return -ENOMEM; +#endif + for (i = 0; i < ncomp_vec; i++) { + int vecidx = i + MLX5_EQ_VEC_COMP_BASE; + + eq = kzalloc(sizeof(*eq), GFP_KERNEL); + if (!eq) { + err = -ENOMEM; + goto clean; + } + +#ifdef CONFIG_RFS_ACCEL + irq_cpu_rmap_add(dev->rmap, pci_irq_vector(dev->pdev, vecidx)); +#endif + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); + err = mlx5_create_map_eq(dev, eq, vecidx, nent, 0, + name, MLX5_EQ_TYPE_COMP); + if (err) { + kfree(eq); + goto clean; + } + mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn); + /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */ + list_add_tail(&eq->list, &table->comp_eqs_list); + } + + err = mlx5_irq_set_affinity_hints(dev); + if (err) { + mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n"); + goto clean; + } + + return 0; + +clean: + mlx5_free_comp_eqs(dev); + return err; +} + +int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, + unsigned int *irqn) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq *eq, *n; + int err = -ENOENT; + int i = 0; + + list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { + if (i++ == vector) { + *eqn = eq->eqn; + *irqn = eq->irqn; + err = 0; + break; + } + } + + return err; +} +EXPORT_SYMBOL(mlx5_vector2eqn); + +struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq *eq; + + list_for_each_entry(eq, &table->comp_eqs_list, list) { + if (eq->eqn == eqn) + return eq; + } + + return ERR_PTR(-ENOENT); +} + /* This function should only be called after mlx5_cmd_force_teardown_hca */ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = &dev->priv.eq_table; struct mlx5_eq *eq; + mlx5_irq_clear_affinity_hints(dev); + #ifdef CONFIG_RFS_ACCEL if (dev->rmap) { free_irq_cpu_rmap(dev->rmap); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index f692c2a42130..244fec4b2ef2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -637,168 +637,6 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev) return (u64)timer_l | (u64)timer_h1 << 32; } -static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) -{ - struct mlx5_priv *priv = &mdev->priv; - int vecidx = MLX5_EQ_VEC_COMP_BASE + i; - int irq = pci_irq_vector(mdev->pdev, vecidx); - - if (!zalloc_cpumask_var(&priv->irq_info[vecidx].mask, GFP_KERNEL)) { - mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); - return -ENOMEM; - } - - cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), - priv->irq_info[vecidx].mask); - - if (IS_ENABLED(CONFIG_SMP) && - irq_set_affinity_hint(irq, priv->irq_info[vecidx].mask)) - mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); - - return 0; -} - -static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i) -{ - int vecidx = MLX5_EQ_VEC_COMP_BASE + i; - struct mlx5_priv *priv = &mdev->priv; - int irq = pci_irq_vector(mdev->pdev, vecidx); - - irq_set_affinity_hint(irq, NULL); - free_cpumask_var(priv->irq_info[vecidx].mask); -} - -static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev) -{ - int err; - int i; - - for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) { - err = mlx5_irq_set_affinity_hint(mdev, i); - if (err) - goto err_out; - } - - return 0; - -err_out: - for (i--; i >= 0; i--) - mlx5_irq_clear_affinity_hint(mdev, i); - - return err; -} - -static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev) -{ - int i; - - for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) - mlx5_irq_clear_affinity_hint(mdev, i); -} - -int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, - unsigned int *irqn) -{ - struct mlx5_eq_table *table = &dev->priv.eq_table; - struct mlx5_eq *eq, *n; - int err = -ENOENT; - int i = 0; - - list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { - if (i++ == vector) { - *eqn = eq->eqn; - *irqn = eq->irqn; - err = 0; - break; - } - } - - return err; -} -EXPORT_SYMBOL(mlx5_vector2eqn); - -struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn) -{ - struct mlx5_eq_table *table = &dev->priv.eq_table; - struct mlx5_eq *eq; - - list_for_each_entry(eq, &table->comp_eqs_list, list) { - if (eq->eqn == eqn) - return eq; - } - - - return ERR_PTR(-ENOENT); -} - -static void free_comp_eqs(struct mlx5_core_dev *dev) -{ - struct mlx5_eq_table *table = &dev->priv.eq_table; - struct mlx5_eq *eq, *n; - -#ifdef CONFIG_RFS_ACCEL - if (dev->rmap) { - free_irq_cpu_rmap(dev->rmap); - dev->rmap = NULL; - } -#endif - list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { - list_del(&eq->list); - if (mlx5_destroy_unmap_eq(dev, eq)) - mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n", - eq->eqn); - kfree(eq); - } -} - -static int alloc_comp_eqs(struct mlx5_core_dev *dev) -{ - struct mlx5_eq_table *table = &dev->priv.eq_table; - char name[MLX5_MAX_IRQ_NAME]; - struct mlx5_eq *eq; - int ncomp_vec; - int nent; - int err; - int i; - - INIT_LIST_HEAD(&table->comp_eqs_list); - ncomp_vec = table->num_comp_vectors; - nent = MLX5_COMP_EQ_SIZE; -#ifdef CONFIG_RFS_ACCEL - dev->rmap = alloc_irq_cpu_rmap(ncomp_vec); - if (!dev->rmap) - return -ENOMEM; -#endif - for (i = 0; i < ncomp_vec; i++) { - eq = kzalloc(sizeof(*eq), GFP_KERNEL); - if (!eq) { - err = -ENOMEM; - goto clean; - } - -#ifdef CONFIG_RFS_ACCEL - irq_cpu_rmap_add(dev->rmap, pci_irq_vector(dev->pdev, - MLX5_EQ_VEC_COMP_BASE + i)); -#endif - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); - err = mlx5_create_map_eq(dev, eq, - i + MLX5_EQ_VEC_COMP_BASE, nent, 0, - name, MLX5_EQ_TYPE_COMP); - if (err) { - kfree(eq); - goto clean; - } - mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn); - list_add_tail(&eq->list, &table->comp_eqs_list); - } - - return 0; - -clean: - free_comp_eqs(dev); - return err; -} - static int mlx5_core_set_issi(struct mlx5_core_dev *dev) { u32 query_in[MLX5_ST_SZ_DW(query_issi_in)] = {0}; @@ -1177,18 +1015,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_fw_tracer; } - err = alloc_comp_eqs(dev); + err = mlx5_alloc_comp_eqs(dev); if (err) { dev_err(&pdev->dev, "Failed to alloc completion EQs\n"); goto err_comp_eqs; } - err = mlx5_irq_set_affinity_hints(dev); - if (err) { - dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n"); - goto err_affinity_hints; - } - err = mlx5_fpga_device_start(dev); if (err) { dev_err(&pdev->dev, "fpga device start failed %d\n", err); @@ -1257,10 +1089,7 @@ err_ipsec_start: mlx5_fpga_device_stop(dev); err_fpga_start: - mlx5_irq_clear_affinity_hints(dev); - -err_affinity_hints: - free_comp_eqs(dev); + mlx5_free_comp_eqs(dev); err_comp_eqs: mlx5_fw_tracer_cleanup(dev->tracer); @@ -1331,8 +1160,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_accel_ipsec_cleanup(dev); mlx5_accel_tls_cleanup(dev); mlx5_fpga_device_stop(dev); - mlx5_irq_clear_affinity_hints(dev); - free_comp_eqs(dev); + mlx5_free_comp_eqs(dev); mlx5_fw_tracer_cleanup(dev->tracer); mlx5_stop_eqs(dev); mlx5_put_uars_page(dev, priv->uar); @@ -1628,7 +1456,6 @@ succeed: * kexec. There is no need to cleanup the mlx5_core software * contexts. */ - mlx5_irq_clear_affinity_hints(dev); mlx5_core_eq_free_irqs(dev); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 0594d0961cb3..22cff00faa5a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -136,6 +136,8 @@ int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u32 *out, int outlen); int mlx5_start_eqs(struct mlx5_core_dev *dev); void mlx5_stop_eqs(struct mlx5_core_dev *dev); +int mlx5_alloc_comp_eqs(struct mlx5_core_dev *dev); +void mlx5_free_comp_eqs(struct mlx5_core_dev *dev); /* This function should only be called after mlx5_cmd_force_teardown_hca */ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn); -- cgit v1.2.3 From c8e21b3b576b78fe1b07522aea046af2634a24e8 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 19 Nov 2018 10:52:36 -0800 Subject: net/mlx5: EQ, Create all EQs in one place Instead of creating the EQ table in three steps at driver load, - allocate irq vectors - allocate async EQs - allocate completion EQs Gather all of the procedures into one function in eq.c and call it from driver load. This will help us reduce the EQ and EQ table private structures visibility to eq.c in downstream refactoring. Signed-off-by: Saeed Mahameed Reviewed-by: Leon Romanovsky Reviewed-by: Tariq Toukan Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/debugfs.c | 10 ++ drivers/net/ethernet/mellanox/mlx5/core/eq.c | 121 ++++++++++++++++----- drivers/net/ethernet/mellanox/mlx5/core/main.c | 81 ++------------ .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 8 +- 4 files changed, 116 insertions(+), 104 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index 90fabd612b6c..b76766fb6c67 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -349,6 +349,16 @@ out: return param; } +static int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {}; + + MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ); + MLX5_SET(query_eq_in, in, eq_number, eq->eqn); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} + static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq, int index) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 4d79a4ccb758..44ccd4206104 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -822,7 +822,7 @@ void mlx5_eq_cleanup(struct mlx5_core_dev *dev) /* Async EQs */ -int mlx5_start_eqs(struct mlx5_core_dev *dev) +static int create_async_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = &dev->priv.eq_table; u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; @@ -914,7 +914,7 @@ err1: return err; } -void mlx5_stop_eqs(struct mlx5_core_dev *dev) +static void destroy_async_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = &dev->priv.eq_table; int err; @@ -945,19 +945,9 @@ void mlx5_stop_eqs(struct mlx5_core_dev *dev) err); } -int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, - u32 *out, int outlen) -{ - u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {0}; - - MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ); - MLX5_SET(query_eq_in, in, eq_number, eq->eqn); - return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); -} - /* Completion EQs */ -static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) +static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) { struct mlx5_priv *priv = &mdev->priv; int vecidx = MLX5_EQ_VEC_COMP_BASE + i; @@ -978,7 +968,7 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) return 0; } -static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i) +static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) { int vecidx = MLX5_EQ_VEC_COMP_BASE + i; struct mlx5_priv *priv = &mdev->priv; @@ -988,13 +978,13 @@ static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i) free_cpumask_var(priv->irq_info[vecidx].mask); } -static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev) +static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev) { int err; int i; for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) { - err = mlx5_irq_set_affinity_hint(mdev, i); + err = set_comp_irq_affinity_hint(mdev, i); if (err) goto err_out; } @@ -1003,25 +993,25 @@ static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev) err_out: for (i--; i >= 0; i--) - mlx5_irq_clear_affinity_hint(mdev, i); + clear_comp_irq_affinity_hint(mdev, i); return err; } -static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev) +static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev) { int i; for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) - mlx5_irq_clear_affinity_hint(mdev, i); + clear_comp_irq_affinity_hint(mdev, i); } -void mlx5_free_comp_eqs(struct mlx5_core_dev *dev) +static void destroy_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = &dev->priv.eq_table; struct mlx5_eq *eq, *n; - mlx5_irq_clear_affinity_hints(dev); + clear_comp_irqs_affinity_hints(dev); #ifdef CONFIG_RFS_ACCEL if (dev->rmap) { @@ -1038,7 +1028,7 @@ void mlx5_free_comp_eqs(struct mlx5_core_dev *dev) } } -int mlx5_alloc_comp_eqs(struct mlx5_core_dev *dev) +static int create_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = &dev->priv.eq_table; char name[MLX5_MAX_IRQ_NAME]; @@ -1080,7 +1070,7 @@ int mlx5_alloc_comp_eqs(struct mlx5_core_dev *dev) list_add_tail(&eq->list, &table->comp_eqs_list); } - err = mlx5_irq_set_affinity_hints(dev); + err = set_comp_irq_affinity_hints(dev); if (err) { mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n"); goto clean; @@ -1089,7 +1079,7 @@ int mlx5_alloc_comp_eqs(struct mlx5_core_dev *dev) return 0; clean: - mlx5_free_comp_eqs(dev); + destroy_comp_eqs(dev); return err; } @@ -1133,7 +1123,7 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) struct mlx5_eq_table *table = &dev->priv.eq_table; struct mlx5_eq *eq; - mlx5_irq_clear_affinity_hints(dev); + clear_comp_irqs_affinity_hints(dev); #ifdef CONFIG_RFS_ACCEL if (dev->rmap) { @@ -1153,3 +1143,84 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) #endif pci_free_irq_vectors(dev->pdev); } + +static int alloc_irq_vectors(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_eq_table *table = &priv->eq_table; + int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? + MLX5_CAP_GEN(dev, max_num_eqs) : + 1 << MLX5_CAP_GEN(dev, log_max_eq); + int nvec; + int err; + + nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + + MLX5_EQ_VEC_COMP_BASE; + nvec = min_t(int, nvec, num_eqs); + if (nvec <= MLX5_EQ_VEC_COMP_BASE) + return -ENOMEM; + + priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL); + if (!priv->irq_info) + return -ENOMEM; + + nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1, + nvec, PCI_IRQ_MSIX); + if (nvec < 0) { + err = nvec; + goto err_free_irq_info; + } + + table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE; + + return 0; + +err_free_irq_info: + kfree(priv->irq_info); + return err; +} + +static void free_irq_vectors(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + + pci_free_irq_vectors(dev->pdev); + kfree(priv->irq_info); +} + +int mlx5_eq_table_create(struct mlx5_core_dev *dev) +{ + int err; + + err = alloc_irq_vectors(dev); + if (err) { + mlx5_core_err(dev, "alloc irq vectors failed\n"); + return err; + } + + err = create_async_eqs(dev); + if (err) { + mlx5_core_err(dev, "Failed to create async EQs\n"); + goto err_async_eqs; + } + + err = create_comp_eqs(dev); + if (err) { + mlx5_core_err(dev, "Failed to create completion EQs\n"); + goto err_comp_eqs; + } + + return 0; +err_comp_eqs: + destroy_async_eqs(dev); +err_async_eqs: + free_irq_vectors(dev); + return err; +} + +void mlx5_eq_table_destroy(struct mlx5_core_dev *dev) +{ + destroy_comp_eqs(dev); + destroy_async_eqs(dev); + free_irq_vectors(dev); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 244fec4b2ef2..21cc9bbc2563 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -319,51 +319,6 @@ static void release_bar(struct pci_dev *pdev) pci_release_regions(pdev); } -static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_eq_table *table = &priv->eq_table; - int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? - MLX5_CAP_GEN(dev, max_num_eqs) : - 1 << MLX5_CAP_GEN(dev, log_max_eq); - int nvec; - int err; - - nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + - MLX5_EQ_VEC_COMP_BASE; - nvec = min_t(int, nvec, num_eqs); - if (nvec <= MLX5_EQ_VEC_COMP_BASE) - return -ENOMEM; - - priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL); - if (!priv->irq_info) - return -ENOMEM; - - nvec = pci_alloc_irq_vectors(dev->pdev, - MLX5_EQ_VEC_COMP_BASE + 1, nvec, - PCI_IRQ_MSIX); - if (nvec < 0) { - err = nvec; - goto err_free_irq_info; - } - - table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE; - - return 0; - -err_free_irq_info: - kfree(priv->irq_info); - return err; -} - -static void mlx5_free_irq_vectors(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - - pci_free_irq_vectors(dev->pdev); - kfree(priv->irq_info); -} - struct mlx5_reg_host_endianness { u8 he; u8 rsvd[15]; @@ -990,23 +945,17 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, } } - err = mlx5_alloc_irq_vectors(dev); - if (err) { - dev_err(&pdev->dev, "alloc irq vectors failed\n"); - goto err_cleanup_once; - } - dev->priv.uar = mlx5_get_uars_page(dev); if (IS_ERR(dev->priv.uar)) { dev_err(&pdev->dev, "Failed allocating uar, aborting\n"); err = PTR_ERR(dev->priv.uar); - goto err_disable_msix; + goto err_get_uars; } - err = mlx5_start_eqs(dev); + err = mlx5_eq_table_create(dev); if (err) { - dev_err(&pdev->dev, "Failed to start pages and async EQs\n"); - goto err_put_uars; + dev_err(&pdev->dev, "Failed to create EQs\n"); + goto err_eq_table; } err = mlx5_fw_tracer_init(dev->tracer); @@ -1015,12 +964,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_fw_tracer; } - err = mlx5_alloc_comp_eqs(dev); - if (err) { - dev_err(&pdev->dev, "Failed to alloc completion EQs\n"); - goto err_comp_eqs; - } - err = mlx5_fpga_device_start(dev); if (err) { dev_err(&pdev->dev, "fpga device start failed %d\n", err); @@ -1089,21 +1032,15 @@ err_ipsec_start: mlx5_fpga_device_stop(dev); err_fpga_start: - mlx5_free_comp_eqs(dev); - -err_comp_eqs: mlx5_fw_tracer_cleanup(dev->tracer); err_fw_tracer: - mlx5_stop_eqs(dev); + mlx5_eq_table_destroy(dev); -err_put_uars: +err_eq_table: mlx5_put_uars_page(dev, priv->uar); -err_disable_msix: - mlx5_free_irq_vectors(dev); - -err_cleanup_once: +err_get_uars: if (boot) mlx5_cleanup_once(dev); @@ -1160,11 +1097,9 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_accel_ipsec_cleanup(dev); mlx5_accel_tls_cleanup(dev); mlx5_fpga_device_stop(dev); - mlx5_free_comp_eqs(dev); mlx5_fw_tracer_cleanup(dev->tracer); - mlx5_stop_eqs(dev); + mlx5_eq_table_destroy(dev); mlx5_put_uars_page(dev, priv->uar); - mlx5_free_irq_vectors(dev); if (cleanup) mlx5_cleanup_once(dev); mlx5_stop_health_poll(dev, cleanup); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 22cff00faa5a..3fa6d26875fe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -132,12 +132,8 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); -int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, - u32 *out, int outlen); -int mlx5_start_eqs(struct mlx5_core_dev *dev); -void mlx5_stop_eqs(struct mlx5_core_dev *dev); -int mlx5_alloc_comp_eqs(struct mlx5_core_dev *dev); -void mlx5_free_comp_eqs(struct mlx5_core_dev *dev); +int mlx5_eq_table_create(struct mlx5_core_dev *dev); +void mlx5_eq_table_destroy(struct mlx5_core_dev *dev); /* This function should only be called after mlx5_cmd_force_teardown_hca */ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn); -- cgit v1.2.3 From d674a9aa434409826b2408609be493739e61e6f6 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 19 Nov 2018 10:52:37 -0800 Subject: net/mlx5: EQ, irq_info and rmap belong to eq_table irq_info and rmap are EQ properties of the driver, and only needed for EQ objects, move them to the eq_table EQs database structure. Signed-off-by: Saeed Mahameed Reviewed-by: Leon Romanovsky Reviewed-by: Tariq Toukan Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 +-- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 40 ++++++++++++----------- include/linux/mlx5/driver.h | 10 +++--- 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 2839c30dd3a0..32ea47c28324 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1760,7 +1760,7 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq) static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix) { - return cpumask_first(priv->mdev->priv.irq_info[ix + MLX5_EQ_VEC_COMP_BASE].mask); + return cpumask_first(priv->mdev->priv.eq_table.irq_info[ix + MLX5_EQ_VEC_COMP_BASE].mask); } static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, @@ -4960,7 +4960,7 @@ int mlx5e_netdev_init(struct net_device *netdev, netif_carrier_off(netdev); #ifdef CONFIG_MLX5_EN_ARFS - netdev->rx_cpu_rmap = mdev->rmap; + netdev->rx_cpu_rmap = mdev->priv.eq_table.rmap; #endif return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 44ccd4206104..70f62f10065e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -694,7 +694,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, if (err) goto err_in; - snprintf(priv->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", + snprintf(priv->eq_table.irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", name, pci_name(dev->pdev)); eq->eqn = MLX5_GET(create_eq_out, out, eq_number); @@ -702,7 +702,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, eq->dev = dev; eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; err = request_irq(eq->irqn, handler, 0, - priv->irq_info[vecidx].name, eq); + priv->eq_table.irq_info[vecidx].name, eq); if (err) goto err_eq; @@ -952,17 +952,18 @@ static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) struct mlx5_priv *priv = &mdev->priv; int vecidx = MLX5_EQ_VEC_COMP_BASE + i; int irq = pci_irq_vector(mdev->pdev, vecidx); + struct mlx5_irq_info *irq_info = &priv->eq_table.irq_info[vecidx]; - if (!zalloc_cpumask_var(&priv->irq_info[vecidx].mask, GFP_KERNEL)) { + if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) { mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); return -ENOMEM; } cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), - priv->irq_info[vecidx].mask); + irq_info->mask); if (IS_ENABLED(CONFIG_SMP) && - irq_set_affinity_hint(irq, priv->irq_info[vecidx].mask)) + irq_set_affinity_hint(irq, irq_info->mask)) mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); return 0; @@ -973,9 +974,10 @@ static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) int vecidx = MLX5_EQ_VEC_COMP_BASE + i; struct mlx5_priv *priv = &mdev->priv; int irq = pci_irq_vector(mdev->pdev, vecidx); + struct mlx5_irq_info *irq_info = &priv->eq_table.irq_info[vecidx]; irq_set_affinity_hint(irq, NULL); - free_cpumask_var(priv->irq_info[vecidx].mask); + free_cpumask_var(irq_info->mask); } static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev) @@ -1014,9 +1016,9 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) clear_comp_irqs_affinity_hints(dev); #ifdef CONFIG_RFS_ACCEL - if (dev->rmap) { - free_irq_cpu_rmap(dev->rmap); - dev->rmap = NULL; + if (table->rmap) { + free_irq_cpu_rmap(table->rmap); + table->rmap = NULL; } #endif list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { @@ -1042,8 +1044,8 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) ncomp_vec = table->num_comp_vectors; nent = MLX5_COMP_EQ_SIZE; #ifdef CONFIG_RFS_ACCEL - dev->rmap = alloc_irq_cpu_rmap(ncomp_vec); - if (!dev->rmap) + table->rmap = alloc_irq_cpu_rmap(ncomp_vec); + if (!table->rmap) return -ENOMEM; #endif for (i = 0; i < ncomp_vec; i++) { @@ -1056,7 +1058,7 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) } #ifdef CONFIG_RFS_ACCEL - irq_cpu_rmap_add(dev->rmap, pci_irq_vector(dev->pdev, vecidx)); + irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx)); #endif snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); err = mlx5_create_map_eq(dev, eq, vecidx, nent, 0, @@ -1126,9 +1128,9 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) clear_comp_irqs_affinity_hints(dev); #ifdef CONFIG_RFS_ACCEL - if (dev->rmap) { - free_irq_cpu_rmap(dev->rmap); - dev->rmap = NULL; + if (table->rmap) { + free_irq_cpu_rmap(table->rmap); + table->rmap = NULL; } #endif list_for_each_entry(eq, &table->comp_eqs_list, list) @@ -1160,8 +1162,8 @@ static int alloc_irq_vectors(struct mlx5_core_dev *dev) if (nvec <= MLX5_EQ_VEC_COMP_BASE) return -ENOMEM; - priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL); - if (!priv->irq_info) + table->irq_info = kcalloc(nvec, sizeof(*table->irq_info), GFP_KERNEL); + if (!table->irq_info) return -ENOMEM; nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1, @@ -1176,7 +1178,7 @@ static int alloc_irq_vectors(struct mlx5_core_dev *dev) return 0; err_free_irq_info: - kfree(priv->irq_info); + kfree(table->irq_info); return err; } @@ -1185,7 +1187,7 @@ static void free_irq_vectors(struct mlx5_core_dev *dev) struct mlx5_priv *priv = &dev->priv; pci_free_irq_vectors(dev->pdev); - kfree(priv->irq_info); + kfree(priv->eq_table.irq_info); } int mlx5_eq_table_create(struct mlx5_core_dev *dev) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 852e397c7624..dcc3f7aa8572 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -484,6 +484,10 @@ struct mlx5_eq_table { struct mlx5_eq pfault_eq; #endif int num_comp_vectors; + struct mlx5_irq_info *irq_info; +#ifdef CONFIG_RFS_ACCEL + struct cpu_rmap *rmap; +#endif }; struct mlx5_uars_page { @@ -640,7 +644,6 @@ struct mlx5_port_module_event_stats { struct mlx5_priv { char name[MLX5_MAX_NAME_LEN]; struct mlx5_eq_table eq_table; - struct mlx5_irq_info *irq_info; /* pages stuff */ struct workqueue_struct *pg_wq; @@ -851,9 +854,6 @@ struct mlx5_core_dev { } roce; #ifdef CONFIG_MLX5_FPGA struct mlx5_fpga_device *fpga; -#endif -#ifdef CONFIG_RFS_ACCEL - struct cpu_rmap *rmap; #endif struct mlx5_clock clock; struct mlx5_ib_clock_info *clock_info; @@ -1302,7 +1302,7 @@ enum { static inline const struct cpumask * mlx5_get_vector_affinity_hint(struct mlx5_core_dev *dev, int vector) { - return dev->priv.irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask; + return dev->priv.eq_table.irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask; } #endif /* MLX5_DRIVER_H */ -- cgit v1.2.3 From f2f3df5501391bc784c8462dc97d989c2194fb74 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 19 Nov 2018 10:52:38 -0800 Subject: net/mlx5: EQ, Privatize eq_table and friends Move unnecessary EQ table structures and declaration from the public include/linux/mlx5/driver.h into the private area of mlx5_core and into eq.c/eq.h. Introduce new mlx5 EQ APIs: mlx5_comp_vectors_count(dev); mlx5_comp_irq_get_affinity_mask(dev, vector); And use them from mlx5_ib or mlx5e netdevice instead of direct access to mlx5_core internal structures. Signed-off-by: Saeed Mahameed Reviewed-by: Leon Romanovsky Reviewed-by: Tariq Toukan Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 5 +- drivers/net/ethernet/mellanox/mlx5/core/cq.c | 5 +- drivers/net/ethernet/mellanox/mlx5/core/debugfs.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 10 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 102 ++++++++++++++++----- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/health.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h | 77 ++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/main.c | 7 +- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 15 --- include/linux/mlx5/driver.h | 87 +----------------- 12 files changed, 179 insertions(+), 135 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e9c428071df3..6fbc0cba1bac 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -5337,7 +5337,7 @@ mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - return mlx5_get_vector_affinity_hint(dev->mdev, comp_vector); + return mlx5_comp_irq_get_affinity_mask(dev->mdev, comp_vector); } /* The mlx5_ib_multiport_mutex should be held when calling this function */ @@ -5701,8 +5701,7 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; dev->ib_dev.phys_port_cnt = dev->num_ports; - dev->ib_dev.num_comp_vectors = - dev->mdev->priv.eq_table.num_comp_vectors; + dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_count(mdev); dev->ib_dev.dev.parent = &mdev->pdev->dev; mutex_init(&dev->cap_mask_mutex); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 4b85abb5c9f7..6e55d2f37c6d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -38,6 +38,7 @@ #include #include #include "mlx5_core.h" +#include "lib/eq.h" #define TASKLET_MAX_TIME 2 #define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME) @@ -124,7 +125,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, goto err_cmd; /* Add to async EQ CQ tree to recv async events */ - err = mlx5_eq_add_cq(&dev->priv.eq_table.async_eq, cq); + err = mlx5_eq_add_cq(mlx5_get_async_eq(dev), cq); if (err) goto err_cq_add; @@ -157,7 +158,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0}; int err; - err = mlx5_eq_del_cq(&dev->priv.eq_table.async_eq, cq); + err = mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index b76766fb6c67..a11e22d0b0cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -36,6 +36,7 @@ #include #include #include "mlx5_core.h" +#include "lib/eq.h" enum { QP_PID, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index d7fbd5b6ac95..aea74856c702 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -178,8 +178,7 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) { return is_kdump_kernel() ? MLX5E_MIN_NUM_CHANNELS : - min_t(int, mdev->priv.eq_table.num_comp_vectors, - MLX5E_MAX_NUM_CHANNELS); + min_t(int, mlx5_comp_vectors_count(mdev), MLX5E_MAX_NUM_CHANNELS); } /* Use this function to get max num channels after netdev was created */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 32ea47c28324..c23caade31bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -49,6 +49,7 @@ #include "lib/clock.h" #include "en/port.h" #include "en/xdp.h" +#include "lib/eq.h" struct mlx5e_rq_param { u32 rqc[MLX5_ST_SZ_DW(rqc)]; @@ -1758,11 +1759,6 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq) mlx5e_free_cq(cq); } -static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix) -{ - return cpumask_first(priv->mdev->priv.eq_table.irq_info[ix + MLX5_EQ_VEC_COMP_BASE].mask); -} - static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_channel_param *cparam) @@ -1913,9 +1909,9 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_channel_param *cparam, struct mlx5e_channel **cp) { + int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix)); struct net_dim_cq_moder icocq_moder = {0, 0}; struct net_device *netdev = priv->netdev; - int cpu = mlx5e_get_cpu(priv, ix); struct mlx5e_channel *c; unsigned int irq; int err; @@ -4960,7 +4956,7 @@ int mlx5e_netdev_init(struct net_device *netdev, netif_carrier_off(netdev); #ifdef CONFIG_MLX5_EN_ARFS - netdev->rx_cpu_rmap = mdev->priv.eq_table.rmap; + netdev->rx_cpu_rmap = mlx5_eq_table_get_rmap(mdev); #endif return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 70f62f10065e..32ce20221c44 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -38,6 +38,7 @@ #include #endif #include "mlx5_core.h" +#include "lib/eq.h" #include "fpga/core.h" #include "eswitch.h" #include "lib/clock.h" @@ -65,6 +66,26 @@ enum { MLX5_EQ_DOORBEL_OFFSET = 0x40, }; +struct mlx5_irq_info { + cpumask_var_t mask; + char name[MLX5_MAX_IRQ_NAME]; +}; + +struct mlx5_eq_table { + struct list_head comp_eqs_list; + struct mlx5_eq pages_eq; + struct mlx5_eq async_eq; + struct mlx5_eq cmd_eq; +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + struct mlx5_eq pfault_eq; +#endif + int num_comp_vectors; + struct mlx5_irq_info *irq_info; +#ifdef CONFIG_RFS_ACCEL + struct cpu_rmap *rmap; +#endif +}; + #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \ (1ull << MLX5_EVENT_TYPE_COMM_EST) | \ (1ull << MLX5_EVENT_TYPE_SQ_DRAINED) | \ @@ -633,10 +654,11 @@ static void init_eq_buf(struct mlx5_eq *eq) } } -int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, - int nent, u64 mask, const char *name, - enum mlx5_eq_type type) +static int +mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, + int nent, u64 mask, const char *name, enum mlx5_eq_type type) { + struct mlx5_eq_table *eq_table = dev->priv.eq_table; struct mlx5_cq_table *cq_table = &eq->cq_table; u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; @@ -694,7 +716,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, if (err) goto err_in; - snprintf(priv->eq_table.irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", + snprintf(eq_table->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", name, pci_name(dev->pdev)); eq->eqn = MLX5_GET(create_eq_out, out, eq_number); @@ -702,7 +724,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, eq->dev = dev; eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; err = request_irq(eq->irqn, handler, 0, - priv->eq_table.irq_info[vecidx].name, eq); + eq_table->irq_info[vecidx].name, eq); if (err) goto err_eq; @@ -746,7 +768,7 @@ err_buf: return err; } -int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +static int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) { int err; @@ -806,25 +828,35 @@ int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) return 0; } -int mlx5_eq_init(struct mlx5_core_dev *dev) +int mlx5_eq_table_init(struct mlx5_core_dev *dev) { + struct mlx5_eq_table *eq_table; int err; + eq_table = kvzalloc(sizeof(*eq_table), GFP_KERNEL); + if (!eq_table) + return -ENOMEM; + + dev->priv.eq_table = eq_table; + err = mlx5_eq_debugfs_init(dev); + if (err) + kvfree(eq_table); return err; } -void mlx5_eq_cleanup(struct mlx5_core_dev *dev) +void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev) { mlx5_eq_debugfs_cleanup(dev); + kvfree(dev->priv.eq_table); } /* Async EQs */ static int create_async_eqs(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq_table *table = dev->priv.eq_table; u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; int err; @@ -916,7 +948,7 @@ err1: static void destroy_async_eqs(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq_table *table = dev->priv.eq_table; int err; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING @@ -945,6 +977,11 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) err); } +struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev) +{ + return &dev->priv.eq_table->async_eq; +} + /* Completion EQs */ static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) @@ -952,7 +989,7 @@ static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) struct mlx5_priv *priv = &mdev->priv; int vecidx = MLX5_EQ_VEC_COMP_BASE + i; int irq = pci_irq_vector(mdev->pdev, vecidx); - struct mlx5_irq_info *irq_info = &priv->eq_table.irq_info[vecidx]; + struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx]; if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) { mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); @@ -974,7 +1011,7 @@ static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) int vecidx = MLX5_EQ_VEC_COMP_BASE + i; struct mlx5_priv *priv = &mdev->priv; int irq = pci_irq_vector(mdev->pdev, vecidx); - struct mlx5_irq_info *irq_info = &priv->eq_table.irq_info[vecidx]; + struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx]; irq_set_affinity_hint(irq, NULL); free_cpumask_var(irq_info->mask); @@ -985,7 +1022,7 @@ static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev) int err; int i; - for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) { + for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) { err = set_comp_irq_affinity_hint(mdev, i); if (err) goto err_out; @@ -1004,13 +1041,13 @@ static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev) { int i; - for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) + for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) clear_comp_irq_affinity_hint(mdev, i); } static void destroy_comp_eqs(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq *eq, *n; clear_comp_irqs_affinity_hints(dev); @@ -1032,7 +1069,7 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) static int create_comp_eqs(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq_table *table = dev->priv.eq_table; char name[MLX5_MAX_IRQ_NAME]; struct mlx5_eq *eq; int ncomp_vec; @@ -1088,7 +1125,7 @@ clean: int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, unsigned int *irqn) { - struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq *eq, *n; int err = -ENOENT; int i = 0; @@ -1106,9 +1143,32 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, } EXPORT_SYMBOL(mlx5_vector2eqn); +unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev) +{ + return dev->priv.eq_table->num_comp_vectors; +} +EXPORT_SYMBOL(mlx5_comp_vectors_count); + +struct cpumask * +mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) +{ + /* TODO: consider irq_get_affinity_mask(irq) */ + return dev->priv.eq_table->irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask; +} +EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask); + +struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev) +{ +#ifdef CONFIG_RFS_ACCEL + return dev->priv.eq_table->rmap; +#else + return NULL; +#endif +} + struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn) { - struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq *eq; list_for_each_entry(eq, &table->comp_eqs_list, list) { @@ -1122,7 +1182,7 @@ struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn) /* This function should only be called after mlx5_cmd_force_teardown_hca */ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq *eq; clear_comp_irqs_affinity_hints(dev); @@ -1149,7 +1209,7 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) static int alloc_irq_vectors(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; - struct mlx5_eq_table *table = &priv->eq_table; + struct mlx5_eq_table *table = priv->eq_table; int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? MLX5_CAP_GEN(dev, max_num_eqs) : 1 << MLX5_CAP_GEN(dev, log_max_eq); @@ -1187,7 +1247,7 @@ static void free_irq_vectors(struct mlx5_core_dev *dev) struct mlx5_priv *priv = &dev->priv; pci_free_irq_vectors(dev->pdev); - kfree(priv->eq_table.irq_info); + kfree(priv->eq_table->irq_info); } int mlx5_eq_table_create(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d004957328f9..324606227b1a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -38,6 +38,7 @@ #include "mlx5_core.h" #include "eswitch.h" #include "fs_core.h" +#include "lib/eq.h" #define UPLINK_VPORT 0xFFFF diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 43118de8ee99..b5be6f0b9ed5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -38,6 +38,7 @@ #include #include #include "mlx5_core.h" +#include "lib/eq.h" enum { MLX5_HEALTH_POLL_INTERVAL = 2 * HZ, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h new file mode 100644 index 000000000000..48ee37797b3f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies */ + +#ifndef __LIB_MLX5_EQ_H__ +#define __LIB_MLX5_EQ_H__ +#include + +#define MLX5_MAX_IRQ_NAME (32) + +enum { + MLX5_EQ_VEC_PAGES = 0, + MLX5_EQ_VEC_CMD = 1, + MLX5_EQ_VEC_ASYNC = 2, + MLX5_EQ_VEC_PFAULT = 3, + MLX5_EQ_VEC_COMP_BASE, +}; + +struct mlx5_eq_tasklet { + struct list_head list; + struct list_head process_list; + struct tasklet_struct task; + spinlock_t lock; /* lock completion tasklet list */ +}; + +struct mlx5_eq_pagefault { + struct work_struct work; + spinlock_t lock; /* Pagefaults spinlock */ + struct workqueue_struct *wq; + mempool_t *pool; +}; + +struct mlx5_cq_table { + spinlock_t lock; /* protect radix tree */ + struct radix_tree_root tree; +}; + +struct mlx5_eq { + struct mlx5_core_dev *dev; + struct mlx5_cq_table cq_table; + __be32 __iomem *doorbell; + u32 cons_index; + struct mlx5_frag_buf buf; + int size; + unsigned int irqn; + u8 eqn; + int nent; + struct list_head list; + struct mlx5_rsc_debug *dbg; + enum mlx5_eq_type type; + union { + struct mlx5_eq_tasklet tasklet_ctx; +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + struct mlx5_eq_pagefault pf_ctx; +#endif + }; +}; + +int mlx5_eq_table_init(struct mlx5_core_dev *dev); +void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev); +int mlx5_eq_table_create(struct mlx5_core_dev *dev); +void mlx5_eq_table_destroy(struct mlx5_core_dev *dev); +int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); +int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); +struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn); +struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev); +u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq); +void mlx5_cq_tasklet_cb(unsigned long data); +struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix); + +/* This function should only be called after mlx5_cmd_force_teardown_hca */ +void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); + +#ifdef CONFIG_RFS_ACCEL +struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev); +#endif + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 21cc9bbc2563..5d11ef92c8b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -54,6 +54,7 @@ #include #include "mlx5_core.h" #include "fs_core.h" +#include "lib/eq.h" #include "lib/mpfs.h" #include "eswitch.h" #include "lib/mlx5.h" @@ -728,7 +729,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto out; } - err = mlx5_eq_init(dev); + err = mlx5_eq_table_init(dev); if (err) { dev_err(&pdev->dev, "failed to initialize eq\n"); goto out; @@ -802,7 +803,7 @@ err_tables_cleanup: mlx5_cq_debugfs_cleanup(dev); err_eq_cleanup: - mlx5_eq_cleanup(dev); + mlx5_eq_table_cleanup(dev); out: return err; @@ -823,7 +824,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cq_debugfs_cleanup(dev); - mlx5_eq_cleanup(dev); + mlx5_eq_table_cleanup(dev); } static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 3fa6d26875fe..4d39adcfb0eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -124,21 +124,6 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev); -int mlx5_eq_init(struct mlx5_core_dev *dev); -void mlx5_eq_cleanup(struct mlx5_core_dev *dev); -int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, - int nent, u64 mask, const char *name, - enum mlx5_eq_type type); -int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); -int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); -int mlx5_eq_table_create(struct mlx5_core_dev *dev); -void mlx5_eq_table_destroy(struct mlx5_core_dev *dev); -/* This function should only be called after mlx5_cmd_force_teardown_hca */ -void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); -struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn); -u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq); -void mlx5_cq_tasklet_cb(unsigned long data); void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index dcc3f7aa8572..4d6246cb6c19 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -84,18 +84,6 @@ enum { MLX5_MAX_PORTS = 2, }; -enum { - MLX5_EQ_VEC_PAGES = 0, - MLX5_EQ_VEC_CMD = 1, - MLX5_EQ_VEC_ASYNC = 2, - MLX5_EQ_VEC_PFAULT = 3, - MLX5_EQ_VEC_COMP_BASE, -}; - -enum { - MLX5_MAX_IRQ_NAME = 32 -}; - enum { MLX5_ATOMIC_MODE_OFFSET = 16, MLX5_ATOMIC_MODE_IB_COMP = 1, @@ -366,49 +354,6 @@ struct mlx5_frag_buf_ctrl { u8 log_frag_strides; }; -struct mlx5_eq_tasklet { - struct list_head list; - struct list_head process_list; - struct tasklet_struct task; - /* lock on completion tasklet list */ - spinlock_t lock; -}; - -struct mlx5_eq_pagefault { - struct work_struct work; - /* Pagefaults lock */ - spinlock_t lock; - struct workqueue_struct *wq; - mempool_t *pool; -}; - -struct mlx5_cq_table { - /* protect radix tree */ - spinlock_t lock; - struct radix_tree_root tree; -}; - -struct mlx5_eq { - struct mlx5_core_dev *dev; - struct mlx5_cq_table cq_table; - __be32 __iomem *doorbell; - u32 cons_index; - struct mlx5_frag_buf buf; - int size; - unsigned int irqn; - u8 eqn; - int nent; - struct list_head list; - struct mlx5_rsc_debug *dbg; - enum mlx5_eq_type type; - union { - struct mlx5_eq_tasklet tasklet_ctx; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - struct mlx5_eq_pagefault pf_ctx; -#endif - }; -}; - struct mlx5_core_psv { u32 psv_idx; struct psv_layout { @@ -475,21 +420,6 @@ struct mlx5_core_srq { u16 uid; }; -struct mlx5_eq_table { - struct list_head comp_eqs_list; - struct mlx5_eq pages_eq; - struct mlx5_eq async_eq; - struct mlx5_eq cmd_eq; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - struct mlx5_eq pfault_eq; -#endif - int num_comp_vectors; - struct mlx5_irq_info *irq_info; -#ifdef CONFIG_RFS_ACCEL - struct cpu_rmap *rmap; -#endif -}; - struct mlx5_uars_page { void __iomem *map; bool wc; @@ -572,11 +502,6 @@ struct mlx5_core_sriov { int enabled_vfs; }; -struct mlx5_irq_info { - cpumask_var_t mask; - char name[MLX5_MAX_IRQ_NAME]; -}; - struct mlx5_fc_stats { spinlock_t counters_idr_lock; /* protects counters_idr */ struct idr counters_idr; @@ -594,6 +519,7 @@ struct mlx5_mpfs; struct mlx5_eswitch; struct mlx5_lag; struct mlx5_pagefault; +struct mlx5_eq_table; struct mlx5_rate_limit { u32 rate; @@ -643,7 +569,7 @@ struct mlx5_port_module_event_stats { struct mlx5_priv { char name[MLX5_MAX_NAME_LEN]; - struct mlx5_eq_table eq_table; + struct mlx5_eq_table *eq_table; /* pages stuff */ struct workqueue_struct *pg_wq; @@ -1148,6 +1074,9 @@ int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, bool map_wc, bool fast_path); void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg); +unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev); +struct cpumask * +mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector); unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev); int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index, u8 roce_version, u8 roce_l3_type, const u8 *gid, @@ -1299,10 +1228,4 @@ enum { MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32, }; -static inline const struct cpumask * -mlx5_get_vector_affinity_hint(struct mlx5_core_dev *dev, int vector) -{ - return dev->priv.eq_table.irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask; -} - #endif /* MLX5_DRIVER_H */ -- cgit v1.2.3 From 16d760839ceef510cf95cbfadc069c4473c7a277 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 19 Nov 2018 10:52:39 -0800 Subject: net/mlx5: EQ, Different EQ types In mlx5 we have three types of usages for EQs, 1. Asynchronous EQs, used internally by mlx5 core for a. FW command completions b. FW page requests c. one EQ for all other Asynchronous events 2. Completion EQs, used for CQ completion (we create one per core) 3. *Special type of EQ (page fault) used for RDMA on demand paging (ODP). *The 3rd type shouldn't be special at least in mlx5 core, it is yet another async events EQ with specific use case, it will be removed in the next two patches, and will completely move its logic to mlx5_ib, as it is rdma specific. In this patch we remove use case (eq type) specific fields from struct mlx5_eq into a new eq type specific structures. struct mlx5_eq_async; truct mlx5_eq_comp; struct mlx5_eq_pagefault; Separate between their type specific flows. In the future we will allow users to create there own generic EQs. for now we will allow only one for ODP in next patches. We will introduce event listeners registration API for those who want to receive mlx5 async events. After that mlx5 eq handling will be clean from feature/user specific handling. Signed-off-by: Saeed Mahameed Reviewed-by: Leon Romanovsky Reviewed-by: Tariq Toukan Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/cq.c | 10 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 376 ++++++++++++--------- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/health.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h | 53 +-- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 4 - include/linux/mlx5/cq.h | 2 +- include/linux/mlx5/driver.h | 10 +- 10 files changed, 270 insertions(+), 199 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 6e55d2f37c6d..713a17ee3751 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -93,10 +93,10 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)]; u32 out[MLX5_ST_SZ_DW(create_cq_out)]; u32 din[MLX5_ST_SZ_DW(destroy_cq_in)]; - struct mlx5_eq *eq; + struct mlx5_eq_comp *eq; int err; - eq = mlx5_eqn2eq(dev, eqn); + eq = mlx5_eqn2comp_eq(dev, eqn); if (IS_ERR(eq)) return PTR_ERR(eq); @@ -120,7 +120,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, INIT_LIST_HEAD(&cq->tasklet_ctx.list); /* Add to comp EQ CQ tree to recv comp events */ - err = mlx5_eq_add_cq(eq, cq); + err = mlx5_eq_add_cq(&eq->core, cq); if (err) goto err_cmd; @@ -140,7 +140,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, return 0; err_cq_add: - mlx5_eq_del_cq(eq, cq); + mlx5_eq_del_cq(&eq->core, cq); err_cmd: memset(din, 0, sizeof(din)); memset(dout, 0, sizeof(dout)); @@ -162,7 +162,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) if (err) return err; - err = mlx5_eq_del_cq(cq->eq, cq); + err = mlx5_eq_del_cq(&cq->eq->core, cq); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c23caade31bf..0d495a6b3949 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -320,7 +320,7 @@ static void mlx5e_enable_async_events(struct mlx5e_priv *priv) static void mlx5e_disable_async_events(struct mlx5e_priv *priv) { clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state); - synchronize_irq(pci_irq_vector(priv->mdev->pdev, MLX5_EQ_VEC_ASYNC)); + mlx5_eq_synchronize_async_irq(priv->mdev); } static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, @@ -4117,17 +4117,17 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb, static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev, struct mlx5e_txqsq *sq) { - struct mlx5_eq *eq = sq->cq.mcq.eq; + struct mlx5_eq_comp *eq = sq->cq.mcq.eq; u32 eqe_count; netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", - eq->eqn, eq->cons_index, eq->irqn); + eq->core.eqn, eq->core.cons_index, eq->core.irqn); eqe_count = mlx5_eq_poll_irq_disabled(eq); if (!eqe_count) return false; - netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->eqn); + netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->core.eqn); sq->channel->stats->eq_rearm++; return true; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 32ce20221c44..252c9f0569b1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -72,13 +72,16 @@ struct mlx5_irq_info { }; struct mlx5_eq_table { - struct list_head comp_eqs_list; - struct mlx5_eq pages_eq; - struct mlx5_eq async_eq; - struct mlx5_eq cmd_eq; + struct list_head comp_eqs_list; + struct mlx5_eq pages_eq; + struct mlx5_eq async_eq; + struct mlx5_eq cmd_eq; + #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - struct mlx5_eq pfault_eq; + struct mlx5_eq_pagefault pfault_eq; #endif + struct mutex lock; /* sync async eqs creations */ + u8 num_async_eqs; int num_comp_vectors; struct mlx5_irq_info *irq_info; #ifdef CONFIG_RFS_ACCEL @@ -224,24 +227,24 @@ static void eqe_pf_action(struct work_struct *work) struct mlx5_pagefault *pfault = container_of(work, struct mlx5_pagefault, work); - struct mlx5_eq *eq = pfault->eq; + struct mlx5_eq_pagefault *eq = pfault->eq; - mlx5_core_page_fault(eq->dev, pfault); - mempool_free(pfault, eq->pf_ctx.pool); + mlx5_core_page_fault(eq->core.dev, pfault); + mempool_free(pfault, eq->pool); } -static void eq_pf_process(struct mlx5_eq *eq) +static void eq_pf_process(struct mlx5_eq_pagefault *eq) { - struct mlx5_core_dev *dev = eq->dev; + struct mlx5_core_dev *dev = eq->core.dev; struct mlx5_eqe_page_fault *pf_eqe; struct mlx5_pagefault *pfault; struct mlx5_eqe *eqe; int set_ci = 0; - while ((eqe = next_eqe_sw(eq))) { - pfault = mempool_alloc(eq->pf_ctx.pool, GFP_ATOMIC); + while ((eqe = next_eqe_sw(&eq->core))) { + pfault = mempool_alloc(eq->pool, GFP_ATOMIC); if (!pfault) { - schedule_work(&eq->pf_ctx.work); + schedule_work(&eq->work); break; } @@ -311,30 +314,30 @@ static void eq_pf_process(struct mlx5_eq *eq) pfault->eq = eq; INIT_WORK(&pfault->work, eqe_pf_action); - queue_work(eq->pf_ctx.wq, &pfault->work); + queue_work(eq->wq, &pfault->work); - ++eq->cons_index; + ++eq->core.cons_index; ++set_ci; if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { - eq_update_ci(eq, 0); + eq_update_ci(&eq->core, 0); set_ci = 0; } } - eq_update_ci(eq, 1); + eq_update_ci(&eq->core, 1); } static irqreturn_t mlx5_eq_pf_int(int irq, void *eq_ptr) { - struct mlx5_eq *eq = eq_ptr; + struct mlx5_eq_pagefault *eq = eq_ptr; unsigned long flags; - if (spin_trylock_irqsave(&eq->pf_ctx.lock, flags)) { + if (spin_trylock_irqsave(&eq->lock, flags)) { eq_pf_process(eq); - spin_unlock_irqrestore(&eq->pf_ctx.lock, flags); + spin_unlock_irqrestore(&eq->lock, flags); } else { - schedule_work(&eq->pf_ctx.work); + schedule_work(&eq->work); } return IRQ_HANDLED; @@ -352,35 +355,61 @@ static void mempool_refill(mempool_t *pool) static void eq_pf_action(struct work_struct *work) { - struct mlx5_eq *eq = container_of(work, struct mlx5_eq, pf_ctx.work); + struct mlx5_eq_pagefault *eq = + container_of(work, struct mlx5_eq_pagefault, work); - mempool_refill(eq->pf_ctx.pool); + mempool_refill(eq->pool); - spin_lock_irq(&eq->pf_ctx.lock); + spin_lock_irq(&eq->lock); eq_pf_process(eq); - spin_unlock_irq(&eq->pf_ctx.lock); + spin_unlock_irq(&eq->lock); } -static int init_pf_ctx(struct mlx5_eq_pagefault *pf_ctx, const char *name) +static int +create_pf_eq(struct mlx5_core_dev *dev, struct mlx5_eq_pagefault *eq) { - spin_lock_init(&pf_ctx->lock); - INIT_WORK(&pf_ctx->work, eq_pf_action); + int err; - pf_ctx->wq = alloc_workqueue(name, - WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM, - MLX5_NUM_CMD_EQE); - if (!pf_ctx->wq) + spin_lock_init(&eq->lock); + INIT_WORK(&eq->work, eq_pf_action); + + eq->pool = mempool_create_kmalloc_pool(MLX5_NUM_PF_DRAIN, + sizeof(struct mlx5_pagefault)); + if (!eq->pool) return -ENOMEM; - pf_ctx->pool = mempool_create_kmalloc_pool - (MLX5_NUM_PF_DRAIN, sizeof(struct mlx5_pagefault)); - if (!pf_ctx->pool) + eq->wq = alloc_workqueue("mlx5_page_fault", + WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM, + MLX5_NUM_CMD_EQE); + if (!eq->wq) { + err = -ENOMEM; + goto err_mempool; + } + + err = mlx5_create_async_eq(dev, &eq->core, MLX5_NUM_ASYNC_EQE, + 1 << MLX5_EVENT_TYPE_PAGE_FAULT, + "mlx5_page_fault_eq", mlx5_eq_pf_int); + if (err) goto err_wq; return 0; err_wq: - destroy_workqueue(pf_ctx->wq); - return -ENOMEM; + destroy_workqueue(eq->wq); +err_mempool: + mempool_destroy(eq->pool); + return err; +} + +static int destroy_pf_eq(struct mlx5_core_dev *dev, struct mlx5_eq_pagefault *eq) +{ + int err; + + err = mlx5_destroy_async_eq(dev, &eq->core); + cancel_work_sync(&eq->work); + destroy_workqueue(eq->wq); + mempool_destroy(eq->pool); + + return err; } int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token, @@ -444,37 +473,88 @@ static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) return cq; } -static void mlx5_eq_cq_completion(struct mlx5_eq *eq, u32 cqn) +static void mlx5_eq_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type) { struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn); if (unlikely(!cq)) { - mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn); + mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn); return; } - ++cq->arm_sn; - - cq->comp(cq); + cq->event(cq, event_type); mlx5_cq_put(cq); } -static void mlx5_eq_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type) +static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr) { - struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn); + struct mlx5_eq_comp *eq_comp = eq_ptr; + struct mlx5_eq *eq = eq_ptr; + struct mlx5_eqe *eqe; + int set_ci = 0; + u32 cqn = -1; - if (unlikely(!cq)) { - mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn); - return; + while ((eqe = next_eqe_sw(eq))) { + struct mlx5_core_cq *cq; + /* Make sure we read EQ entry contents after we've + * checked the ownership bit. + */ + dma_rmb(); + /* Assume (eqe->type) is always MLX5_EVENT_TYPE_COMP */ + cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff; + + cq = mlx5_eq_cq_get(eq, cqn); + if (likely(cq)) { + ++cq->arm_sn; + cq->comp(cq); + mlx5_cq_put(cq); + } else { + mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn); + } + + ++eq->cons_index; + ++set_ci; + + /* The HCA will think the queue has overflowed if we + * don't tell it we've been processing events. We + * create our EQs with MLX5_NUM_SPARE_EQE extra + * entries, so we must update our consumer index at + * least that often. + */ + if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { + eq_update_ci(eq, 0); + set_ci = 0; + } } - cq->event(cq, event_type); + eq_update_ci(eq, 1); - mlx5_cq_put(cq); + if (cqn != -1) + tasklet_schedule(&eq_comp->tasklet_ctx.task); + + return IRQ_HANDLED; } -static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) +/* Some architectures don't latch interrupts when they are disabled, so using + * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to + * avoid losing them. It is not recommended to use it, unless this is the last + * resort. + */ +u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq) +{ + u32 count_eqe; + + disable_irq(eq->core.irqn); + count_eqe = eq->core.cons_index; + mlx5_eq_comp_int(eq->core.irqn, eq); + count_eqe = eq->core.cons_index - count_eqe; + enable_irq(eq->core.irqn); + + return count_eqe; +} + +static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) { struct mlx5_eq *eq = eq_ptr; struct mlx5_core_dev *dev = eq->dev; @@ -494,10 +574,6 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n", eq->eqn, eqe_type_str(eqe->type)); switch (eqe->type) { - case MLX5_EVENT_TYPE_COMP: - cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff; - mlx5_eq_cq_completion(eq, cqn); - break; case MLX5_EVENT_TYPE_DCT_DRAINED: rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff; rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN); @@ -619,30 +695,9 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) eq_update_ci(eq, 1); - if (cqn != -1) - tasklet_schedule(&eq->tasklet_ctx.task); - return IRQ_HANDLED; } -/* Some architectures don't latch interrupts when they are disabled, so using - * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to - * avoid losing them. It is not recommended to use it, unless this is the last - * resort. - */ -u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq) -{ - u32 count_eqe; - - disable_irq(eq->irqn); - count_eqe = eq->cons_index; - mlx5_eq_int(eq->irqn, eq); - count_eqe = eq->cons_index - count_eqe; - enable_irq(eq->irqn); - - return count_eqe; -} - static void init_eq_buf(struct mlx5_eq *eq) { struct mlx5_eqe *eqe; @@ -656,13 +711,12 @@ static void init_eq_buf(struct mlx5_eq *eq) static int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, - int nent, u64 mask, const char *name, enum mlx5_eq_type type) + int nent, u64 mask, const char *name, irq_handler_t handler) { struct mlx5_eq_table *eq_table = dev->priv.eq_table; struct mlx5_cq_table *cq_table = &eq->cq_table; u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; - irq_handler_t handler; __be64 *pas; void *eqc; int inlen; @@ -674,20 +728,12 @@ mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, spin_lock_init(&cq_table->lock); INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC); - eq->type = type; eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE); eq->cons_index = 0; err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf); if (err) return err; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (type == MLX5_EQ_TYPE_PF) - handler = mlx5_eq_pf_int; - else -#endif - handler = mlx5_eq_int; - init_eq_buf(eq); inlen = MLX5_ST_SZ_BYTES(create_eq_in) + @@ -732,21 +778,6 @@ mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, if (err) goto err_irq; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (type == MLX5_EQ_TYPE_PF) { - err = init_pf_ctx(&eq->pf_ctx, name); - if (err) - goto err_irq; - } else -#endif - { - INIT_LIST_HEAD(&eq->tasklet_ctx.list); - INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); - spin_lock_init(&eq->tasklet_ctx.lock); - tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb, - (unsigned long)&eq->tasklet_ctx); - } - /* EQs are created in ARMED state */ eq_update_ci(eq, 1); @@ -780,15 +811,6 @@ static int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) eq->eqn); synchronize_irq(eq->irqn); - if (eq->type == MLX5_EQ_TYPE_COMP) { - tasklet_disable(&eq->tasklet_ctx.task); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - } else if (eq->type == MLX5_EQ_TYPE_PF) { - cancel_work_sync(&eq->pf_ctx.work); - destroy_workqueue(eq->pf_ctx.wq); - mempool_destroy(eq->pf_ctx.pool); -#endif - } mlx5_buf_free(dev, &eq->buf); return err; @@ -841,8 +863,15 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev) err = mlx5_eq_debugfs_init(dev); if (err) - kvfree(eq_table); + goto kvfree_eq_table; + mutex_init(&eq_table->lock); + + return 0; + +kvfree_eq_table: + kvfree(eq_table); + dev->priv.eq_table = NULL; return err; } @@ -854,6 +883,43 @@ void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev) /* Async EQs */ +int mlx5_create_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + int nent, u64 mask, const char *name, irq_handler_t handler) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + u8 vecdix; + int err; + + mutex_lock(&eq_table->lock); + if (eq_table->num_async_eqs >= MLX5_EQ_MAX_ASYNC_EQS) { + err = -ENOSPC; + goto unlock; + } + + vecdix = eq_table->num_async_eqs + 1; + + err = mlx5_create_map_eq(dev, eq, vecdix, nent, mask, name, handler); + if (!err) + eq_table->num_async_eqs++; + +unlock: + mutex_unlock(&eq_table->lock); + return err; +} + +int mlx5_destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + int err; + + mutex_lock(&eq_table->lock); + err = mlx5_destroy_unmap_eq(dev, eq); + if (!err) + eq_table->num_async_eqs--; + mutex_unlock(&eq_table->lock); + return err; +} + static int create_async_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; @@ -887,9 +953,9 @@ static int create_async_eqs(struct mlx5_core_dev *dev) if (MLX5_CAP_MCAM_REG(dev, tracer_registers)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER); - err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, - MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD, - "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC); + err = mlx5_create_async_eq(dev, &table->cmd_eq, MLX5_NUM_CMD_EQE, + 1ull << MLX5_EVENT_TYPE_CMD, "mlx5_cmd_eq", + mlx5_eq_async_int); if (err) { mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); return err; @@ -897,19 +963,15 @@ static int create_async_eqs(struct mlx5_core_dev *dev) mlx5_cmd_use_events(dev); - err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC, - MLX5_NUM_ASYNC_EQE, async_event_mask, - "mlx5_async_eq", MLX5_EQ_TYPE_ASYNC); + err = mlx5_create_async_eq(dev, &table->async_eq, MLX5_NUM_ASYNC_EQE, + async_event_mask, "mlx5_async_eq", mlx5_eq_async_int); if (err) { mlx5_core_warn(dev, "failed to create async EQ %d\n", err); goto err1; } - err = mlx5_create_map_eq(dev, &table->pages_eq, - MLX5_EQ_VEC_PAGES, - /* TODO: sriov max_vf + */ 1, - 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq", - MLX5_EQ_TYPE_ASYNC); + err = mlx5_create_async_eq(dev, &table->pages_eq, /* TODO: sriov max_vf + */ 1, + 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq", mlx5_eq_async_int); if (err) { mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); goto err2; @@ -917,12 +979,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING if (MLX5_CAP_GEN(dev, pg)) { - err = mlx5_create_map_eq(dev, &table->pfault_eq, - MLX5_EQ_VEC_PFAULT, - MLX5_NUM_ASYNC_EQE, - 1 << MLX5_EVENT_TYPE_PAGE_FAULT, - "mlx5_page_fault_eq", - MLX5_EQ_TYPE_PF); + err = create_pf_eq(dev, &table->pfault_eq); if (err) { mlx5_core_warn(dev, "failed to create page fault EQ %d\n", err); @@ -932,17 +989,17 @@ static int create_async_eqs(struct mlx5_core_dev *dev) return err; err3: - mlx5_destroy_unmap_eq(dev, &table->pages_eq); + mlx5_destroy_async_eq(dev, &table->pages_eq); #else return err; #endif err2: - mlx5_destroy_unmap_eq(dev, &table->async_eq); + mlx5_destroy_async_eq(dev, &table->async_eq); err1: mlx5_cmd_use_polling(dev); - mlx5_destroy_unmap_eq(dev, &table->cmd_eq); + mlx5_destroy_async_eq(dev, &table->cmd_eq); return err; } @@ -953,25 +1010,25 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING if (MLX5_CAP_GEN(dev, pg)) { - err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq); + err = destroy_pf_eq(dev, &table->pfault_eq); if (err) mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n", err); } #endif - err = mlx5_destroy_unmap_eq(dev, &table->pages_eq); + err = mlx5_destroy_async_eq(dev, &table->pages_eq); if (err) mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n", err); - err = mlx5_destroy_unmap_eq(dev, &table->async_eq); + err = mlx5_destroy_async_eq(dev, &table->async_eq); if (err) mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n", err); mlx5_cmd_use_polling(dev); - err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq); + err = mlx5_destroy_async_eq(dev, &table->cmd_eq); if (err) mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n", err); @@ -982,6 +1039,16 @@ struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev) return &dev->priv.eq_table->async_eq; } +void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev) +{ + synchronize_irq(dev->priv.eq_table->async_eq.irqn); +} + +void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev) +{ + synchronize_irq(dev->priv.eq_table->cmd_eq.irqn); +} + /* Completion EQs */ static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) @@ -1048,7 +1115,7 @@ static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev) static void destroy_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - struct mlx5_eq *eq, *n; + struct mlx5_eq_comp *eq, *n; clear_comp_irqs_affinity_hints(dev); @@ -1060,9 +1127,10 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) #endif list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { list_del(&eq->list); - if (mlx5_destroy_unmap_eq(dev, eq)) - mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n", - eq->eqn); + if (mlx5_destroy_unmap_eq(dev, &eq->core)) + mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n", + eq->core.eqn); + tasklet_disable(&eq->tasklet_ctx.task); kfree(eq); } } @@ -1071,7 +1139,7 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; char name[MLX5_MAX_IRQ_NAME]; - struct mlx5_eq *eq; + struct mlx5_eq_comp *eq; int ncomp_vec; int nent; int err; @@ -1094,17 +1162,23 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) goto clean; } + INIT_LIST_HEAD(&eq->tasklet_ctx.list); + INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); + spin_lock_init(&eq->tasklet_ctx.lock); + tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb, + (unsigned long)&eq->tasklet_ctx); + #ifdef CONFIG_RFS_ACCEL irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx)); #endif snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); - err = mlx5_create_map_eq(dev, eq, vecidx, nent, 0, - name, MLX5_EQ_TYPE_COMP); + err = mlx5_create_map_eq(dev, &eq->core, vecidx, nent, 0, + name, mlx5_eq_comp_int); if (err) { kfree(eq); goto clean; } - mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn); + mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn); /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */ list_add_tail(&eq->list, &table->comp_eqs_list); } @@ -1126,14 +1200,14 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, unsigned int *irqn) { struct mlx5_eq_table *table = dev->priv.eq_table; - struct mlx5_eq *eq, *n; + struct mlx5_eq_comp *eq, *n; int err = -ENOENT; int i = 0; list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { if (i++ == vector) { - *eqn = eq->eqn; - *irqn = eq->irqn; + *eqn = eq->core.eqn; + *irqn = eq->core.irqn; err = 0; break; } @@ -1166,13 +1240,13 @@ struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev) #endif } -struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn) +struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn) { struct mlx5_eq_table *table = dev->priv.eq_table; - struct mlx5_eq *eq; + struct mlx5_eq_comp *eq; list_for_each_entry(eq, &table->comp_eqs_list, list) { - if (eq->eqn == eqn) + if (eq->core.eqn == eqn) return eq; } @@ -1183,7 +1257,7 @@ struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn) void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - struct mlx5_eq *eq; + struct mlx5_eq_comp *eq; clear_comp_irqs_affinity_hints(dev); @@ -1194,14 +1268,14 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) } #endif list_for_each_entry(eq, &table->comp_eqs_list, list) - free_irq(eq->irqn, eq); + free_irq(eq->core.irqn, eq); free_irq(table->pages_eq.irqn, &table->pages_eq); free_irq(table->async_eq.irqn, &table->async_eq); free_irq(table->cmd_eq.irqn, &table->cmd_eq); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING if (MLX5_CAP_GEN(dev, pg)) - free_irq(table->pfault_eq.irqn, &table->pfault_eq); + free_irq(table->pfault_eq.core.irqn, &table->pfault_eq.core); #endif pci_free_irq_vectors(dev->pdev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 324606227b1a..2346b6ba3d54 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1568,7 +1568,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) /* Mark this vport as disabled to discard new events */ vport->enabled = false; - synchronize_irq(pci_irq_vector(esw->dev->pdev, MLX5_EQ_VEC_ASYNC)); + mlx5_eq_synchronize_async_irq(esw->dev); /* Wait for current already scheduled events to complete */ flush_workqueue(esw->work_queue); /* Disable events from this vport */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index b5be6f0b9ed5..066883003aea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -85,7 +85,7 @@ static void trigger_cmd_completions(struct mlx5_core_dev *dev) u64 vector; /* wait for pending handlers to complete */ - synchronize_irq(pci_irq_vector(dev->pdev, MLX5_EQ_VEC_CMD)); + mlx5_eq_synchronize_cmd_irq(dev); spin_lock_irqsave(&dev->cmd.alloc_lock, flags); vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1); if (!vector) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index 48ee37797b3f..706d58383dbd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -8,11 +8,8 @@ #define MLX5_MAX_IRQ_NAME (32) enum { - MLX5_EQ_VEC_PAGES = 0, - MLX5_EQ_VEC_CMD = 1, - MLX5_EQ_VEC_ASYNC = 2, - MLX5_EQ_VEC_PFAULT = 3, - MLX5_EQ_VEC_COMP_BASE, + MLX5_EQ_MAX_ASYNC_EQS = 4, /* mlx5_core needs at least 3 */ + MLX5_EQ_VEC_COMP_BASE = MLX5_EQ_MAX_ASYNC_EQS, }; struct mlx5_eq_tasklet { @@ -22,13 +19,6 @@ struct mlx5_eq_tasklet { spinlock_t lock; /* lock completion tasklet list */ }; -struct mlx5_eq_pagefault { - struct work_struct work; - spinlock_t lock; /* Pagefaults spinlock */ - struct workqueue_struct *wq; - mempool_t *pool; -}; - struct mlx5_cq_table { spinlock_t lock; /* protect radix tree */ struct radix_tree_root tree; @@ -44,29 +34,48 @@ struct mlx5_eq { unsigned int irqn; u8 eqn; int nent; - struct list_head list; struct mlx5_rsc_debug *dbg; - enum mlx5_eq_type type; - union { - struct mlx5_eq_tasklet tasklet_ctx; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - struct mlx5_eq_pagefault pf_ctx; -#endif - }; +}; + +struct mlx5_eq_comp { + struct mlx5_eq core; /* Must be first */ + struct mlx5_eq_tasklet tasklet_ctx; + struct list_head list; +}; + +struct mlx5_eq_pagefault { + struct mlx5_eq core; /* Must be first */ + struct work_struct work; + spinlock_t lock; /* Pagefaults spinlock */ + struct workqueue_struct *wq; + mempool_t *pool; }; int mlx5_eq_table_init(struct mlx5_core_dev *dev); void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev); int mlx5_eq_table_create(struct mlx5_core_dev *dev); void mlx5_eq_table_destroy(struct mlx5_core_dev *dev); +int mlx5_create_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + int nent, u64 mask, const char *name, + irq_handler_t handler); +int mlx5_destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); + int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); -struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn); +struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn); struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev); -u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq); void mlx5_cq_tasklet_cb(unsigned long data); struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix); +u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq); +void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev); +void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev); + +int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev); + /* This function should only be called after mlx5_cmd_force_teardown_hca */ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 5d11ef92c8b6..3de83fe65f2b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -53,8 +53,8 @@ #endif #include #include "mlx5_core.h" -#include "fs_core.h" #include "lib/eq.h" +#include "fs_core.h" #include "lib/mpfs.h" #include "eswitch.h" #include "lib/mlx5.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 4d39adcfb0eb..4728b027cb9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -125,10 +125,6 @@ int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev); void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); -int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev); -void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 31a750570c38..28b757a64029 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -60,7 +60,7 @@ struct mlx5_core_cq { } tasklet_ctx; int reset_notify_added; struct list_head reset_notify; - struct mlx5_eq *eq; + struct mlx5_eq_comp *eq; u16 uid; }; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 4d6246cb6c19..fe9b552aa649 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -210,14 +210,6 @@ enum mlx5_port_status { MLX5_PORT_DOWN = 2, }; -enum mlx5_eq_type { - MLX5_EQ_TYPE_COMP, - MLX5_EQ_TYPE_ASYNC, -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - MLX5_EQ_TYPE_PF, -#endif -}; - struct mlx5_bfreg_info { u32 *sys_pages; int num_low_latency_bfregs; @@ -692,7 +684,7 @@ struct mlx5_pagefault { } rdma; }; - struct mlx5_eq *eq; + struct mlx5_eq_pagefault *eq; struct work_struct work; }; -- cgit v1.2.3 From 7701707cb94ed4d1e63ae4fa5ef62a2345ef9db7 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 19 Nov 2018 10:52:40 -0800 Subject: net/mlx5: EQ, Generic EQ Add mlx5_eq_{create/destroy}_generic APIs and EQE access methods, for mlx5 core consumers generic EQs. This API will be used in downstream patch to move page fault (RDMA ODP) EQ logic into mlx5_ib rdma driver, hence it will use a generic EQ. Current mlx5 EQ allocation scheme: On load mlx5 allocates 4 (for async) + #cores (for data completions) MSIX vectors, mlx5 core will assign 3 MSIX vectors for internal async EQs and will use all of the #cores MSIX vectors for completion EQs, (One vector is going to be reserved for a generic EQ). After this patch an external user (e.g mlx5_ib) of mlx5_core can use this new API to create new generic EQs with the reserved msix vector index for that eq. Signed-off-by: Saeed Mahameed Reviewed-by: Leon Romanovsky Reviewed-by: Tariq Toukan Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 243 +++++++++++++++++------ drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h | 12 +- include/linux/mlx5/eq.h | 39 ++++ 3 files changed, 221 insertions(+), 73 deletions(-) create mode 100644 include/linux/mlx5/eq.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 252c9f0569b1..ec1f5018546e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #ifdef CONFIG_RFS_ACCEL #include @@ -69,6 +70,7 @@ enum { struct mlx5_irq_info { cpumask_var_t mask; char name[MLX5_MAX_IRQ_NAME]; + void *context; /* dev_id provided to request_irq */ }; struct mlx5_eq_table { @@ -81,7 +83,6 @@ struct mlx5_eq_table { struct mlx5_eq_pagefault pfault_eq; #endif struct mutex lock; /* sync async eqs creations */ - u8 num_async_eqs; int num_comp_vectors; struct mlx5_irq_info *irq_info; #ifdef CONFIG_RFS_ACCEL @@ -229,19 +230,19 @@ static void eqe_pf_action(struct work_struct *work) work); struct mlx5_eq_pagefault *eq = pfault->eq; - mlx5_core_page_fault(eq->core.dev, pfault); + mlx5_core_page_fault(eq->core->dev, pfault); mempool_free(pfault, eq->pool); } static void eq_pf_process(struct mlx5_eq_pagefault *eq) { - struct mlx5_core_dev *dev = eq->core.dev; + struct mlx5_core_dev *dev = eq->core->dev; struct mlx5_eqe_page_fault *pf_eqe; struct mlx5_pagefault *pfault; struct mlx5_eqe *eqe; int set_ci = 0; - while ((eqe = next_eqe_sw(&eq->core))) { + while ((eqe = next_eqe_sw(eq->core))) { pfault = mempool_alloc(eq->pool, GFP_ATOMIC); if (!pfault) { schedule_work(&eq->work); @@ -316,16 +317,16 @@ static void eq_pf_process(struct mlx5_eq_pagefault *eq) INIT_WORK(&pfault->work, eqe_pf_action); queue_work(eq->wq, &pfault->work); - ++eq->core.cons_index; + ++eq->core->cons_index; ++set_ci; if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { - eq_update_ci(&eq->core, 0); + eq_update_ci(eq->core, 0); set_ci = 0; } } - eq_update_ci(&eq->core, 1); + eq_update_ci(eq->core, 1); } static irqreturn_t mlx5_eq_pf_int(int irq, void *eq_ptr) @@ -368,6 +369,7 @@ static void eq_pf_action(struct work_struct *work) static int create_pf_eq(struct mlx5_core_dev *dev, struct mlx5_eq_pagefault *eq) { + struct mlx5_eq_param param = {}; int err; spin_lock_init(&eq->lock); @@ -386,11 +388,19 @@ create_pf_eq(struct mlx5_core_dev *dev, struct mlx5_eq_pagefault *eq) goto err_mempool; } - err = mlx5_create_async_eq(dev, &eq->core, MLX5_NUM_ASYNC_EQE, - 1 << MLX5_EVENT_TYPE_PAGE_FAULT, - "mlx5_page_fault_eq", mlx5_eq_pf_int); - if (err) + param = (struct mlx5_eq_param) { + .index = MLX5_EQ_PFAULT_IDX, + .mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT, + .nent = MLX5_NUM_ASYNC_EQE, + .context = eq, + .handler = mlx5_eq_pf_int + }; + + eq->core = mlx5_eq_create_generic(dev, "mlx5_page_fault_eq", ¶m); + if (IS_ERR(eq->core)) { + err = PTR_ERR(eq->core); goto err_wq; + } return 0; err_wq: @@ -404,7 +414,7 @@ static int destroy_pf_eq(struct mlx5_core_dev *dev, struct mlx5_eq_pagefault *eq { int err; - err = mlx5_destroy_async_eq(dev, &eq->core); + err = mlx5_eq_destroy_generic(dev, eq->core); cancel_work_sync(&eq->work); destroy_workqueue(eq->wq); mempool_destroy(eq->pool); @@ -710,25 +720,29 @@ static void init_eq_buf(struct mlx5_eq *eq) } static int -mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, - int nent, u64 mask, const char *name, irq_handler_t handler) +create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, + struct mlx5_eq_param *param) { struct mlx5_eq_table *eq_table = dev->priv.eq_table; struct mlx5_cq_table *cq_table = &eq->cq_table; u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; + u8 vecidx = param->index; __be64 *pas; void *eqc; int inlen; u32 *in; int err; + if (eq_table->irq_info[vecidx].context) + return -EEXIST; + /* Init CQ table */ memset(cq_table, 0, sizeof(*cq_table)); spin_lock_init(&cq_table->lock); INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC); - eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE); + eq->nent = roundup_pow_of_two(param->nent + MLX5_NUM_SPARE_EQE); eq->cons_index = 0; err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf); if (err) @@ -749,7 +763,7 @@ mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, mlx5_fill_page_array(&eq->buf, pas); MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ); - MLX5_SET64(create_eq_in, in, event_bitmask, mask); + MLX5_SET64(create_eq_in, in, event_bitmask, param->mask); eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry); MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent)); @@ -764,13 +778,15 @@ mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, snprintf(eq_table->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", name, pci_name(dev->pdev)); + eq_table->irq_info[vecidx].context = param->context; + eq->vecidx = vecidx; eq->eqn = MLX5_GET(create_eq_out, out, eq_number); eq->irqn = pci_irq_vector(dev->pdev, vecidx); eq->dev = dev; eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; - err = request_irq(eq->irqn, handler, 0, - eq_table->irq_info[vecidx].name, eq); + err = request_irq(eq->irqn, param->handler, 0, + eq_table->irq_info[vecidx].name, param->context); if (err) goto err_eq; @@ -799,12 +815,19 @@ err_buf: return err; } -static int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) { + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + struct mlx5_irq_info *irq_info; int err; + irq_info = &eq_table->irq_info[eq->vecidx]; + mlx5_debug_eq_remove(dev, eq); - free_irq(eq->irqn, eq); + + free_irq(eq->irqn, irq_info->context); + irq_info->context = NULL; + err = mlx5_cmd_destroy_eq(dev, eq->eqn); if (err) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", @@ -883,48 +906,38 @@ void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev) /* Async EQs */ -int mlx5_create_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, - int nent, u64 mask, const char *name, irq_handler_t handler) +static int create_async_eq(struct mlx5_core_dev *dev, const char *name, + struct mlx5_eq *eq, struct mlx5_eq_param *param) { struct mlx5_eq_table *eq_table = dev->priv.eq_table; - u8 vecdix; int err; mutex_lock(&eq_table->lock); - if (eq_table->num_async_eqs >= MLX5_EQ_MAX_ASYNC_EQS) { + if (param->index >= MLX5_EQ_MAX_ASYNC_EQS) { err = -ENOSPC; goto unlock; } - vecdix = eq_table->num_async_eqs + 1; - - err = mlx5_create_map_eq(dev, eq, vecdix, nent, mask, name, handler); - if (!err) - eq_table->num_async_eqs++; - + err = create_map_eq(dev, eq, name, param); unlock: mutex_unlock(&eq_table->lock); return err; } -int mlx5_destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +static int destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) { struct mlx5_eq_table *eq_table = dev->priv.eq_table; int err; mutex_lock(&eq_table->lock); - err = mlx5_destroy_unmap_eq(dev, eq); - if (!err) - eq_table->num_async_eqs--; + err = destroy_unmap_eq(dev, eq); mutex_unlock(&eq_table->lock); return err; } -static int create_async_eqs(struct mlx5_core_dev *dev) +static u64 gather_async_events_mask(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = dev->priv.eq_table; u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; - int err; if (MLX5_VPORT_MANAGER(dev)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE); @@ -953,9 +966,23 @@ static int create_async_eqs(struct mlx5_core_dev *dev) if (MLX5_CAP_MCAM_REG(dev, tracer_registers)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER); - err = mlx5_create_async_eq(dev, &table->cmd_eq, MLX5_NUM_CMD_EQE, - 1ull << MLX5_EVENT_TYPE_CMD, "mlx5_cmd_eq", - mlx5_eq_async_int); + return async_event_mask; +} + +static int create_async_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_param param = {}; + int err; + + param = (struct mlx5_eq_param) { + .index = MLX5_EQ_CMD_IDX, + .mask = 1ull << MLX5_EVENT_TYPE_CMD, + .nent = MLX5_NUM_CMD_EQE, + .context = &table->cmd_eq, + .handler = mlx5_eq_async_int, + }; + err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq, ¶m); if (err) { mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); return err; @@ -963,15 +990,27 @@ static int create_async_eqs(struct mlx5_core_dev *dev) mlx5_cmd_use_events(dev); - err = mlx5_create_async_eq(dev, &table->async_eq, MLX5_NUM_ASYNC_EQE, - async_event_mask, "mlx5_async_eq", mlx5_eq_async_int); + param = (struct mlx5_eq_param) { + .index = MLX5_EQ_ASYNC_IDX, + .mask = gather_async_events_mask(dev), + .nent = MLX5_NUM_ASYNC_EQE, + .context = &table->async_eq, + .handler = mlx5_eq_async_int, + }; + err = create_async_eq(dev, "mlx5_async_eq", &table->async_eq, ¶m); if (err) { mlx5_core_warn(dev, "failed to create async EQ %d\n", err); goto err1; } - err = mlx5_create_async_eq(dev, &table->pages_eq, /* TODO: sriov max_vf + */ 1, - 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq", mlx5_eq_async_int); + param = (struct mlx5_eq_param) { + .index = MLX5_EQ_PAGEREQ_IDX, + .mask = 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, + .nent = /* TODO: sriov max_vf + */ 1, + .context = &table->pages_eq, + .handler = mlx5_eq_async_int, + }; + err = create_async_eq(dev, "mlx5_pages_eq", &table->pages_eq, ¶m); if (err) { mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); goto err2; @@ -989,17 +1028,17 @@ static int create_async_eqs(struct mlx5_core_dev *dev) return err; err3: - mlx5_destroy_async_eq(dev, &table->pages_eq); + destroy_async_eq(dev, &table->pages_eq); #else return err; #endif err2: - mlx5_destroy_async_eq(dev, &table->async_eq); + destroy_async_eq(dev, &table->async_eq); err1: mlx5_cmd_use_polling(dev); - mlx5_destroy_async_eq(dev, &table->cmd_eq); + destroy_async_eq(dev, &table->cmd_eq); return err; } @@ -1017,18 +1056,18 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) } #endif - err = mlx5_destroy_async_eq(dev, &table->pages_eq); + err = destroy_async_eq(dev, &table->pages_eq); if (err) mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n", err); - err = mlx5_destroy_async_eq(dev, &table->async_eq); + err = destroy_async_eq(dev, &table->async_eq); if (err) mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n", err); mlx5_cmd_use_polling(dev); - err = mlx5_destroy_async_eq(dev, &table->cmd_eq); + err = destroy_async_eq(dev, &table->cmd_eq); if (err) mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n", err); @@ -1049,6 +1088,77 @@ void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev) synchronize_irq(dev->priv.eq_table->cmd_eq.irqn); } +/* Generic EQ API for mlx5_core consumers + * Needed For RDMA ODP EQ for now + */ +struct mlx5_eq * +mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name, + struct mlx5_eq_param *param) +{ + struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL); + int err; + + if (!eq) + return ERR_PTR(-ENOMEM); + + err = create_async_eq(dev, name, eq, param); + if (err) { + kvfree(eq); + eq = ERR_PTR(err); + } + + return eq; +} +EXPORT_SYMBOL(mlx5_eq_create_generic); + +int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + int err; + + if (IS_ERR(eq)) + return -EINVAL; + + err = destroy_async_eq(dev, eq); + if (err) + goto out; + + kvfree(eq); +out: + return err; +} +EXPORT_SYMBOL(mlx5_eq_destroy_generic); + +struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc) +{ + u32 ci = eq->cons_index + cc; + struct mlx5_eqe *eqe; + + eqe = get_eqe(eq, ci & (eq->nent - 1)); + eqe = ((eqe->owner & 1) ^ !!(ci & eq->nent)) ? NULL : eqe; + /* Make sure we read EQ entry contents after we've + * checked the ownership bit. + */ + if (eqe) + dma_rmb(); + + return eqe; +} +EXPORT_SYMBOL(mlx5_eq_get_eqe); + +void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm) +{ + __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2); + u32 val; + + eq->cons_index += cc; + val = (eq->cons_index & 0xffffff) | (eq->eqn << 24); + + __raw_writel((__force u32)cpu_to_be32(val), addr); + /* We still want ordering, just not swabbing, so add a barrier */ + mb(); +} +EXPORT_SYMBOL(mlx5_eq_update_ci); + /* Completion EQs */ static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) @@ -1127,7 +1237,7 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) #endif list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { list_del(&eq->list); - if (mlx5_destroy_unmap_eq(dev, &eq->core)) + if (destroy_unmap_eq(dev, &eq->core)) mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n", eq->core.eqn); tasklet_disable(&eq->tasklet_ctx.task); @@ -1155,6 +1265,7 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) #endif for (i = 0; i < ncomp_vec; i++) { int vecidx = i + MLX5_EQ_VEC_COMP_BASE; + struct mlx5_eq_param param = {}; eq = kzalloc(sizeof(*eq), GFP_KERNEL); if (!eq) { @@ -1172,8 +1283,14 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx)); #endif snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); - err = mlx5_create_map_eq(dev, &eq->core, vecidx, nent, 0, - name, mlx5_eq_comp_int); + param = (struct mlx5_eq_param) { + .index = vecidx, + .mask = 0, + .nent = nent, + .context = &eq->core, + .handler = mlx5_eq_comp_int + }; + err = create_map_eq(dev, &eq->core, name, ¶m); if (err) { kfree(eq); goto clean; @@ -1257,7 +1374,7 @@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn) void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - struct mlx5_eq_comp *eq; + int i, max_eqs; clear_comp_irqs_affinity_hints(dev); @@ -1267,16 +1384,16 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) table->rmap = NULL; } #endif - list_for_each_entry(eq, &table->comp_eqs_list, list) - free_irq(eq->core.irqn, eq); - free_irq(table->pages_eq.irqn, &table->pages_eq); - free_irq(table->async_eq.irqn, &table->async_eq); - free_irq(table->cmd_eq.irqn, &table->cmd_eq); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (MLX5_CAP_GEN(dev, pg)) - free_irq(table->pfault_eq.core.irqn, &table->pfault_eq.core); -#endif + mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ + max_eqs = table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE; + for (i = max_eqs - 1; i >= 0; i--) { + if (!table->irq_info[i].context) + continue; + free_irq(pci_irq_vector(dev->pdev, i), table->irq_info[i].context); + table->irq_info[i].context = NULL; + } + mutex_unlock(&table->lock); pci_free_irq_vectors(dev->pdev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index 706d58383dbd..db32057ad054 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -7,11 +7,6 @@ #define MLX5_MAX_IRQ_NAME (32) -enum { - MLX5_EQ_MAX_ASYNC_EQS = 4, /* mlx5_core needs at least 3 */ - MLX5_EQ_VEC_COMP_BASE = MLX5_EQ_MAX_ASYNC_EQS, -}; - struct mlx5_eq_tasklet { struct list_head list; struct list_head process_list; @@ -31,6 +26,7 @@ struct mlx5_eq { u32 cons_index; struct mlx5_frag_buf buf; int size; + unsigned int vecidx; unsigned int irqn; u8 eqn; int nent; @@ -44,7 +40,7 @@ struct mlx5_eq_comp { }; struct mlx5_eq_pagefault { - struct mlx5_eq core; /* Must be first */ + struct mlx5_eq *core; struct work_struct work; spinlock_t lock; /* Pagefaults spinlock */ struct workqueue_struct *wq; @@ -55,10 +51,6 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev); void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev); int mlx5_eq_table_create(struct mlx5_core_dev *dev); void mlx5_eq_table_destroy(struct mlx5_core_dev *dev); -int mlx5_create_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, - int nent, u64 mask, const char *name, - irq_handler_t handler); -int mlx5_destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h new file mode 100644 index 000000000000..c733673ba5f6 --- /dev/null +++ b/include/linux/mlx5/eq.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#ifndef MLX5_CORE_EQ_H +#define MLX5_CORE_EQ_H + +#include + +enum { + MLX5_EQ_PAGEREQ_IDX = 0, + MLX5_EQ_CMD_IDX = 1, + MLX5_EQ_ASYNC_IDX = 2, + /* reserved to be used by mlx5_core ulps (mlx5e/mlx5_ib) */ + MLX5_EQ_PFAULT_IDX = 3, + MLX5_EQ_MAX_ASYNC_EQS, + /* completion eqs vector indices start here */ + MLX5_EQ_VEC_COMP_BASE = MLX5_EQ_MAX_ASYNC_EQS, +}; + +struct mlx5_eq; + +struct mlx5_eq_param { + u8 index; + int nent; + u64 mask; + void *context; + irq_handler_t handler; +}; + +struct mlx5_eq * +mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name, + struct mlx5_eq_param *param); +int +mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq); + +struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc); +void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm); + +#endif /* MLX5_CORE_EQ_H */ -- cgit v1.2.3 From d5d284b829a6eb7127df24d1bd3896a698981e62 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 19 Nov 2018 10:52:41 -0800 Subject: {net,IB}/mlx5: Move Page fault EQ and ODP logic to RDMA Use the new generic EQ API to move all ODP RDMA data structures and logic form mlx5 core driver into mlx5_ib driver. Signed-off-by: Saeed Mahameed Reviewed-by: Leon Romanovsky Reviewed-by: Tariq Toukan Acked-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 10 +- drivers/infiniband/hw/mlx5/mlx5_ib.h | 15 +- drivers/infiniband/hw/mlx5/odp.c | 281 ++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 34 --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 252 ------------------ drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h | 8 - drivers/net/ethernet/mellanox/mlx5/core/main.c | 17 +- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 - include/linux/mlx5/driver.h | 49 ---- include/linux/mlx5/eq.h | 21 ++ 10 files changed, 308 insertions(+), 381 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 6fbc0cba1bac..fcf4a0328a90 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6040,6 +6040,11 @@ static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev) return mlx5_ib_odp_init_one(dev); } +void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev) +{ + mlx5_ib_odp_cleanup_one(dev); +} + int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) { @@ -6225,7 +6230,7 @@ static const struct mlx5_ib_profile pf_profile = { mlx5_ib_stage_dev_res_cleanup), STAGE_CREATE(MLX5_IB_STAGE_ODP, mlx5_ib_stage_odp_init, - NULL), + mlx5_ib_stage_odp_cleanup), STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, mlx5_ib_stage_counters_init, mlx5_ib_stage_counters_cleanup), @@ -6395,9 +6400,6 @@ static struct mlx5_interface mlx5_ib_interface = { .add = mlx5_ib_add, .remove = mlx5_ib_remove, .event = mlx5_ib_event, -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - .pfault = mlx5_ib_pfault, -#endif .protocol = MLX5_INTERFACE_PROTOCOL_IB, }; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b651a7a6fde9..27999fd32356 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -880,6 +880,15 @@ struct mlx5_ib_lb_state { bool enabled; }; +struct mlx5_ib_pf_eq { + struct mlx5_ib_dev *dev; + struct mlx5_eq *core; + struct work_struct work; + spinlock_t lock; /* Pagefaults spinlock */ + struct workqueue_struct *wq; + mempool_t *pool; +}; + struct mlx5_ib_dev { struct ib_device ib_dev; const struct uverbs_object_tree_def *driver_trees[7]; @@ -902,6 +911,8 @@ struct mlx5_ib_dev { #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING struct ib_odp_caps odp_caps; u64 odp_max_size; + struct mlx5_ib_pf_eq odp_pf_eq; + /* * Sleepable RCU that prevents destruction of MRs while they are still * being used by a page fault handler. @@ -1158,9 +1169,8 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev); -void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context, - struct mlx5_pagefault *pfault); int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev); +void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev); int __init mlx5_ib_odp_init(void); void mlx5_ib_odp_cleanup(void); void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, @@ -1175,6 +1185,7 @@ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) } static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; } +static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {} static inline int mlx5_ib_odp_init(void) { return 0; } static inline void mlx5_ib_odp_cleanup(void) {} static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {} diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 7d784b40e017..416d141322a0 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -37,6 +37,46 @@ #include "mlx5_ib.h" #include "cmd.h" +#include + +/* Contains the details of a pagefault. */ +struct mlx5_pagefault { + u32 bytes_committed; + u32 token; + u8 event_subtype; + u8 type; + union { + /* Initiator or send message responder pagefault details. */ + struct { + /* Received packet size, only valid for responders. */ + u32 packet_size; + /* + * Number of resource holding WQE, depends on type. + */ + u32 wq_num; + /* + * WQE index. Refers to either the send queue or + * receive queue, according to event_subtype. + */ + u16 wqe_index; + } wqe; + /* RDMA responder pagefault details */ + struct { + u32 r_key; + /* + * Received packet size, minimal size page fault + * resolution required for forward progress. + */ + u32 packet_size; + u32 rdma_op_len; + u64 rdma_va; + } rdma; + }; + + struct mlx5_ib_pf_eq *eq; + struct work_struct work; +}; + #define MAX_PREFETCH_LEN (4*1024*1024U) /* Timeout in ms to wait for an active mmu notifier to complete when handling @@ -304,14 +344,20 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, { int wq_num = pfault->event_subtype == MLX5_PFAULT_SUBTYPE_WQE ? pfault->wqe.wq_num : pfault->token; - int ret = mlx5_core_page_fault_resume(dev->mdev, - pfault->token, - wq_num, - pfault->type, - error); - if (ret) - mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x\n", - wq_num); + u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = { }; + u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = { }; + int err; + + MLX5_SET(page_fault_resume_in, in, opcode, MLX5_CMD_OP_PAGE_FAULT_RESUME); + MLX5_SET(page_fault_resume_in, in, page_fault_type, pfault->type); + MLX5_SET(page_fault_resume_in, in, token, pfault->token); + MLX5_SET(page_fault_resume_in, in, wq_number, wq_num); + MLX5_SET(page_fault_resume_in, in, error, !!error); + + err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); + if (err) + mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x err %d\n", + wq_num, err); } static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, @@ -1196,10 +1242,8 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, } } -void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context, - struct mlx5_pagefault *pfault) +static void mlx5_ib_pfault(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault) { - struct mlx5_ib_dev *dev = context; u8 event_subtype = pfault->event_subtype; switch (event_subtype) { @@ -1216,6 +1260,203 @@ void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context, } } +static void mlx5_ib_eqe_pf_action(struct work_struct *work) +{ + struct mlx5_pagefault *pfault = container_of(work, + struct mlx5_pagefault, + work); + struct mlx5_ib_pf_eq *eq = pfault->eq; + + mlx5_ib_pfault(eq->dev, pfault); + mempool_free(pfault, eq->pool); +} + +static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq) +{ + struct mlx5_eqe_page_fault *pf_eqe; + struct mlx5_pagefault *pfault; + struct mlx5_eqe *eqe; + int cc = 0; + + while ((eqe = mlx5_eq_get_eqe(eq->core, cc))) { + pfault = mempool_alloc(eq->pool, GFP_ATOMIC); + if (!pfault) { + schedule_work(&eq->work); + break; + } + + pf_eqe = &eqe->data.page_fault; + pfault->event_subtype = eqe->sub_type; + pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed); + + mlx5_ib_dbg(eq->dev, + "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n", + eqe->sub_type, pfault->bytes_committed); + + switch (eqe->sub_type) { + case MLX5_PFAULT_SUBTYPE_RDMA: + /* RDMA based event */ + pfault->type = + be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24; + pfault->token = + be32_to_cpu(pf_eqe->rdma.pftype_token) & + MLX5_24BIT_MASK; + pfault->rdma.r_key = + be32_to_cpu(pf_eqe->rdma.r_key); + pfault->rdma.packet_size = + be16_to_cpu(pf_eqe->rdma.packet_length); + pfault->rdma.rdma_op_len = + be32_to_cpu(pf_eqe->rdma.rdma_op_len); + pfault->rdma.rdma_va = + be64_to_cpu(pf_eqe->rdma.rdma_va); + mlx5_ib_dbg(eq->dev, + "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n", + pfault->type, pfault->token, + pfault->rdma.r_key); + mlx5_ib_dbg(eq->dev, + "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n", + pfault->rdma.rdma_op_len, + pfault->rdma.rdma_va); + break; + + case MLX5_PFAULT_SUBTYPE_WQE: + /* WQE based event */ + pfault->type = + (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7; + pfault->token = + be32_to_cpu(pf_eqe->wqe.token); + pfault->wqe.wq_num = + be32_to_cpu(pf_eqe->wqe.pftype_wq) & + MLX5_24BIT_MASK; + pfault->wqe.wqe_index = + be16_to_cpu(pf_eqe->wqe.wqe_index); + pfault->wqe.packet_size = + be16_to_cpu(pf_eqe->wqe.packet_length); + mlx5_ib_dbg(eq->dev, + "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n", + pfault->type, pfault->token, + pfault->wqe.wq_num, + pfault->wqe.wqe_index); + break; + + default: + mlx5_ib_warn(eq->dev, + "Unsupported page fault event sub-type: 0x%02hhx\n", + eqe->sub_type); + /* Unsupported page faults should still be + * resolved by the page fault handler + */ + } + + pfault->eq = eq; + INIT_WORK(&pfault->work, mlx5_ib_eqe_pf_action); + queue_work(eq->wq, &pfault->work); + + cc = mlx5_eq_update_cc(eq->core, ++cc); + } + + mlx5_eq_update_ci(eq->core, cc, 1); +} + +static irqreturn_t mlx5_ib_eq_pf_int(int irq, void *eq_ptr) +{ + struct mlx5_ib_pf_eq *eq = eq_ptr; + unsigned long flags; + + if (spin_trylock_irqsave(&eq->lock, flags)) { + mlx5_ib_eq_pf_process(eq); + spin_unlock_irqrestore(&eq->lock, flags); + } else { + schedule_work(&eq->work); + } + + return IRQ_HANDLED; +} + +/* mempool_refill() was proposed but unfortunately wasn't accepted + * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html + * Cheap workaround. + */ +static void mempool_refill(mempool_t *pool) +{ + while (pool->curr_nr < pool->min_nr) + mempool_free(mempool_alloc(pool, GFP_KERNEL), pool); +} + +static void mlx5_ib_eq_pf_action(struct work_struct *work) +{ + struct mlx5_ib_pf_eq *eq = + container_of(work, struct mlx5_ib_pf_eq, work); + + mempool_refill(eq->pool); + + spin_lock_irq(&eq->lock); + mlx5_ib_eq_pf_process(eq); + spin_unlock_irq(&eq->lock); +} + +enum { + MLX5_IB_NUM_PF_EQE = 0x1000, + MLX5_IB_NUM_PF_DRAIN = 64, +}; + +static int +mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) +{ + struct mlx5_eq_param param = {}; + int err; + + INIT_WORK(&eq->work, mlx5_ib_eq_pf_action); + spin_lock_init(&eq->lock); + eq->dev = dev; + + eq->pool = mempool_create_kmalloc_pool(MLX5_IB_NUM_PF_DRAIN, + sizeof(struct mlx5_pagefault)); + if (!eq->pool) + return -ENOMEM; + + eq->wq = alloc_workqueue("mlx5_ib_page_fault", + WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM, + MLX5_NUM_CMD_EQE); + if (!eq->wq) { + err = -ENOMEM; + goto err_mempool; + } + + param = (struct mlx5_eq_param) { + .index = MLX5_EQ_PFAULT_IDX, + .mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT, + .nent = MLX5_IB_NUM_PF_EQE, + .context = eq, + .handler = mlx5_ib_eq_pf_int + }; + eq->core = mlx5_eq_create_generic(dev->mdev, "mlx5_ib_page_fault_eq", ¶m); + if (IS_ERR(eq->core)) { + err = PTR_ERR(eq->core); + goto err_wq; + } + + return 0; +err_wq: + destroy_workqueue(eq->wq); +err_mempool: + mempool_destroy(eq->pool); + return err; +} + +static int +mlx5_ib_destroy_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) +{ + int err; + + err = mlx5_eq_destroy_generic(dev->mdev, eq->core); + cancel_work_sync(&eq->work); + destroy_workqueue(eq->wq); + mempool_destroy(eq->pool); + + return err; +} + void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) { if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) @@ -1244,7 +1485,7 @@ void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) { - int ret; + int ret = 0; if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) { ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey); @@ -1254,7 +1495,20 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) } } - return 0; + if (!MLX5_CAP_GEN(dev->mdev, pg)) + return ret; + + ret = mlx5_ib_create_pf_eq(dev, &dev->odp_pf_eq); + + return ret; +} + +void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *dev) +{ + if (!MLX5_CAP_GEN(dev->mdev, pg)) + return; + + mlx5_ib_destroy_pf_eq(dev, &dev->odp_pf_eq); } int mlx5_ib_odp_init(void) @@ -1264,4 +1518,3 @@ int mlx5_ib_odp_init(void) return 0; } - diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 37ba7c78859d..7eedbea38a78 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -139,17 +139,6 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) spin_lock_irq(&priv->ctx_lock); list_add_tail(&dev_ctx->list, &priv->ctx_list); - -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (dev_ctx->intf->pfault) { - if (priv->pfault) { - mlx5_core_err(dev, "multiple page fault handlers not supported"); - } else { - priv->pfault_ctx = dev_ctx->context; - priv->pfault = dev_ctx->intf->pfault; - } - } -#endif spin_unlock_irq(&priv->ctx_lock); } @@ -179,15 +168,6 @@ void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv) if (!dev_ctx) return; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - spin_lock_irq(&priv->ctx_lock); - if (priv->pfault == dev_ctx->intf->pfault) - priv->pfault = NULL; - spin_unlock_irq(&priv->ctx_lock); - - synchronize_srcu(&priv->pfault_srcu); -#endif - spin_lock_irq(&priv->ctx_lock); list_del(&dev_ctx->list); spin_unlock_irq(&priv->ctx_lock); @@ -447,20 +427,6 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, spin_unlock_irqrestore(&priv->ctx_lock, flags); } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -void mlx5_core_page_fault(struct mlx5_core_dev *dev, - struct mlx5_pagefault *pfault) -{ - struct mlx5_priv *priv = &dev->priv; - int srcu_idx; - - srcu_idx = srcu_read_lock(&priv->pfault_srcu); - if (priv->pfault) - priv->pfault(dev, priv->pfault_ctx, pfault); - srcu_read_unlock(&priv->pfault_srcu, srcu_idx); -} -#endif - void mlx5_dev_list_lock(void) { mutex_lock(&mlx5_intf_mutex); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index ec1f5018546e..895401609c63 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -56,13 +56,6 @@ enum { MLX5_EQ_STATE_ALWAYS_ARMED = 0xb, }; -enum { - MLX5_NUM_SPARE_EQE = 0x80, - MLX5_NUM_ASYNC_EQE = 0x1000, - MLX5_NUM_CMD_EQE = 32, - MLX5_NUM_PF_DRAIN = 64, -}; - enum { MLX5_EQ_DOORBEL_OFFSET = 0x40, }; @@ -79,9 +72,6 @@ struct mlx5_eq_table { struct mlx5_eq async_eq; struct mlx5_eq cmd_eq; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - struct mlx5_eq_pagefault pfault_eq; -#endif struct mutex lock; /* sync async eqs creations */ int num_comp_vectors; struct mlx5_irq_info *irq_info; @@ -222,224 +212,6 @@ static void eq_update_ci(struct mlx5_eq *eq, int arm) mb(); } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -static void eqe_pf_action(struct work_struct *work) -{ - struct mlx5_pagefault *pfault = container_of(work, - struct mlx5_pagefault, - work); - struct mlx5_eq_pagefault *eq = pfault->eq; - - mlx5_core_page_fault(eq->core->dev, pfault); - mempool_free(pfault, eq->pool); -} - -static void eq_pf_process(struct mlx5_eq_pagefault *eq) -{ - struct mlx5_core_dev *dev = eq->core->dev; - struct mlx5_eqe_page_fault *pf_eqe; - struct mlx5_pagefault *pfault; - struct mlx5_eqe *eqe; - int set_ci = 0; - - while ((eqe = next_eqe_sw(eq->core))) { - pfault = mempool_alloc(eq->pool, GFP_ATOMIC); - if (!pfault) { - schedule_work(&eq->work); - break; - } - - dma_rmb(); - pf_eqe = &eqe->data.page_fault; - pfault->event_subtype = eqe->sub_type; - pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed); - - mlx5_core_dbg(dev, - "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n", - eqe->sub_type, pfault->bytes_committed); - - switch (eqe->sub_type) { - case MLX5_PFAULT_SUBTYPE_RDMA: - /* RDMA based event */ - pfault->type = - be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24; - pfault->token = - be32_to_cpu(pf_eqe->rdma.pftype_token) & - MLX5_24BIT_MASK; - pfault->rdma.r_key = - be32_to_cpu(pf_eqe->rdma.r_key); - pfault->rdma.packet_size = - be16_to_cpu(pf_eqe->rdma.packet_length); - pfault->rdma.rdma_op_len = - be32_to_cpu(pf_eqe->rdma.rdma_op_len); - pfault->rdma.rdma_va = - be64_to_cpu(pf_eqe->rdma.rdma_va); - mlx5_core_dbg(dev, - "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n", - pfault->type, pfault->token, - pfault->rdma.r_key); - mlx5_core_dbg(dev, - "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n", - pfault->rdma.rdma_op_len, - pfault->rdma.rdma_va); - break; - - case MLX5_PFAULT_SUBTYPE_WQE: - /* WQE based event */ - pfault->type = - (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7; - pfault->token = - be32_to_cpu(pf_eqe->wqe.token); - pfault->wqe.wq_num = - be32_to_cpu(pf_eqe->wqe.pftype_wq) & - MLX5_24BIT_MASK; - pfault->wqe.wqe_index = - be16_to_cpu(pf_eqe->wqe.wqe_index); - pfault->wqe.packet_size = - be16_to_cpu(pf_eqe->wqe.packet_length); - mlx5_core_dbg(dev, - "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n", - pfault->type, pfault->token, - pfault->wqe.wq_num, - pfault->wqe.wqe_index); - break; - - default: - mlx5_core_warn(dev, - "Unsupported page fault event sub-type: 0x%02hhx\n", - eqe->sub_type); - /* Unsupported page faults should still be - * resolved by the page fault handler - */ - } - - pfault->eq = eq; - INIT_WORK(&pfault->work, eqe_pf_action); - queue_work(eq->wq, &pfault->work); - - ++eq->core->cons_index; - ++set_ci; - - if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { - eq_update_ci(eq->core, 0); - set_ci = 0; - } - } - - eq_update_ci(eq->core, 1); -} - -static irqreturn_t mlx5_eq_pf_int(int irq, void *eq_ptr) -{ - struct mlx5_eq_pagefault *eq = eq_ptr; - unsigned long flags; - - if (spin_trylock_irqsave(&eq->lock, flags)) { - eq_pf_process(eq); - spin_unlock_irqrestore(&eq->lock, flags); - } else { - schedule_work(&eq->work); - } - - return IRQ_HANDLED; -} - -/* mempool_refill() was proposed but unfortunately wasn't accepted - * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html - * Chip workaround. - */ -static void mempool_refill(mempool_t *pool) -{ - while (pool->curr_nr < pool->min_nr) - mempool_free(mempool_alloc(pool, GFP_KERNEL), pool); -} - -static void eq_pf_action(struct work_struct *work) -{ - struct mlx5_eq_pagefault *eq = - container_of(work, struct mlx5_eq_pagefault, work); - - mempool_refill(eq->pool); - - spin_lock_irq(&eq->lock); - eq_pf_process(eq); - spin_unlock_irq(&eq->lock); -} - -static int -create_pf_eq(struct mlx5_core_dev *dev, struct mlx5_eq_pagefault *eq) -{ - struct mlx5_eq_param param = {}; - int err; - - spin_lock_init(&eq->lock); - INIT_WORK(&eq->work, eq_pf_action); - - eq->pool = mempool_create_kmalloc_pool(MLX5_NUM_PF_DRAIN, - sizeof(struct mlx5_pagefault)); - if (!eq->pool) - return -ENOMEM; - - eq->wq = alloc_workqueue("mlx5_page_fault", - WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM, - MLX5_NUM_CMD_EQE); - if (!eq->wq) { - err = -ENOMEM; - goto err_mempool; - } - - param = (struct mlx5_eq_param) { - .index = MLX5_EQ_PFAULT_IDX, - .mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT, - .nent = MLX5_NUM_ASYNC_EQE, - .context = eq, - .handler = mlx5_eq_pf_int - }; - - eq->core = mlx5_eq_create_generic(dev, "mlx5_page_fault_eq", ¶m); - if (IS_ERR(eq->core)) { - err = PTR_ERR(eq->core); - goto err_wq; - } - - return 0; -err_wq: - destroy_workqueue(eq->wq); -err_mempool: - mempool_destroy(eq->pool); - return err; -} - -static int destroy_pf_eq(struct mlx5_core_dev *dev, struct mlx5_eq_pagefault *eq) -{ - int err; - - err = mlx5_eq_destroy_generic(dev, eq->core); - cancel_work_sync(&eq->work); - destroy_workqueue(eq->wq); - mempool_destroy(eq->pool); - - return err; -} - -int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token, - u32 wq_num, u8 type, int error) -{ - u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0}; - u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {0}; - - MLX5_SET(page_fault_resume_in, in, opcode, - MLX5_CMD_OP_PAGE_FAULT_RESUME); - MLX5_SET(page_fault_resume_in, in, error, !!error); - MLX5_SET(page_fault_resume_in, in, page_fault_type, type); - MLX5_SET(page_fault_resume_in, in, wq_number, wq_num); - MLX5_SET(page_fault_resume_in, in, token, token); - - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} -EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume); -#endif - static void general_event_handler(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) { @@ -1016,22 +788,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) goto err2; } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (MLX5_CAP_GEN(dev, pg)) { - err = create_pf_eq(dev, &table->pfault_eq); - if (err) { - mlx5_core_warn(dev, "failed to create page fault EQ %d\n", - err); - goto err3; - } - } - - return err; -err3: - destroy_async_eq(dev, &table->pages_eq); -#else return err; -#endif err2: destroy_async_eq(dev, &table->async_eq); @@ -1047,15 +804,6 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) struct mlx5_eq_table *table = dev->priv.eq_table; int err; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (MLX5_CAP_GEN(dev, pg)) { - err = destroy_pf_eq(dev, &table->pfault_eq); - if (err) - mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n", - err); - } -#endif - err = destroy_async_eq(dev, &table->pages_eq); if (err) mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index db32057ad054..4cc2d442cef6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -39,14 +39,6 @@ struct mlx5_eq_comp { struct list_head list; }; -struct mlx5_eq_pagefault { - struct mlx5_eq *core; - struct work_struct work; - spinlock_t lock; /* Pagefaults spinlock */ - struct workqueue_struct *wq; - mempool_t *pool; -}; - int mlx5_eq_table_init(struct mlx5_core_dev *dev); void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev); int mlx5_eq_table_create(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 3de83fe65f2b..91022f141855 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1169,14 +1169,6 @@ static int init_one(struct pci_dev *pdev, INIT_LIST_HEAD(&priv->waiting_events_list); priv->is_accum_events = false; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - err = init_srcu_struct(&priv->pfault_srcu); - if (err) { - dev_err(&pdev->dev, "init_srcu_struct failed with error code %d\n", - err); - goto clean_dev; - } -#endif mutex_init(&priv->bfregs.reg_head.lock); mutex_init(&priv->bfregs.wc_head.lock); INIT_LIST_HEAD(&priv->bfregs.reg_head.list); @@ -1185,7 +1177,7 @@ static int init_one(struct pci_dev *pdev, err = mlx5_pci_init(dev, priv); if (err) { dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err); - goto clean_srcu; + goto clean_dev; } err = mlx5_health_init(dev); @@ -1218,11 +1210,7 @@ clean_health: mlx5_health_cleanup(dev); close_pci: mlx5_pci_close(dev, priv); -clean_srcu: -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - cleanup_srcu_struct(&priv->pfault_srcu); clean_dev: -#endif devlink_free(devlink); return err; @@ -1246,9 +1234,6 @@ static void remove_one(struct pci_dev *pdev) mlx5_pagealloc_cleanup(dev); mlx5_health_cleanup(dev); mlx5_pci_close(dev, priv); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - cleanup_srcu_struct(&priv->pfault_srcu); -#endif devlink_free(devlink); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 4728b027cb9e..21727d9eeb84 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -100,8 +100,6 @@ int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev); void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); -void mlx5_core_page_fault(struct mlx5_core_dev *dev, - struct mlx5_pagefault *pfault); void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force); void mlx5_disable_device(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index fe9b552aa649..f41e6713df10 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -510,7 +510,6 @@ struct mlx5_fc_stats { struct mlx5_mpfs; struct mlx5_eswitch; struct mlx5_lag; -struct mlx5_pagefault; struct mlx5_eq_table; struct mlx5_rate_limit { @@ -619,13 +618,6 @@ struct mlx5_priv { struct mlx5_port_module_event_stats pme_stats; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - void (*pfault)(struct mlx5_core_dev *dev, - void *context, - struct mlx5_pagefault *pfault); - void *pfault_ctx; - struct srcu_struct pfault_srcu; -#endif struct mlx5_bfreg_data bfregs; struct mlx5_uars_page *uar; }; @@ -650,44 +642,6 @@ enum mlx5_pagefault_type_flags { MLX5_PFAULT_RDMA = 1 << 2, }; -/* Contains the details of a pagefault. */ -struct mlx5_pagefault { - u32 bytes_committed; - u32 token; - u8 event_subtype; - u8 type; - union { - /* Initiator or send message responder pagefault details. */ - struct { - /* Received packet size, only valid for responders. */ - u32 packet_size; - /* - * Number of resource holding WQE, depends on type. - */ - u32 wq_num; - /* - * WQE index. Refers to either the send queue or - * receive queue, according to event_subtype. - */ - u16 wqe_index; - } wqe; - /* RDMA responder pagefault details */ - struct { - u32 r_key; - /* - * Received packet size, minimal size page fault - * resolution required for forward progress. - */ - u32 packet_size; - u32 rdma_op_len; - u64 rdma_va; - } rdma; - }; - - struct mlx5_eq_pagefault *eq; - struct work_struct work; -}; - struct mlx5_td { struct list_head tirs_list; u32 tdn; @@ -1118,9 +1072,6 @@ struct mlx5_interface { void (*detach)(struct mlx5_core_dev *dev, void *context); void (*event)(struct mlx5_core_dev *dev, void *context, enum mlx5_dev_event event, unsigned long param); - void (*pfault)(struct mlx5_core_dev *dev, - void *context, - struct mlx5_pagefault *pfault); void * (*get_dev)(void *context); int protocol; struct list_head list; diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h index c733673ba5f6..71d82c5a1a02 100644 --- a/include/linux/mlx5/eq.h +++ b/include/linux/mlx5/eq.h @@ -17,6 +17,10 @@ enum { MLX5_EQ_VEC_COMP_BASE = MLX5_EQ_MAX_ASYNC_EQS, }; +#define MLX5_NUM_CMD_EQE (32) +#define MLX5_NUM_ASYNC_EQE (0x1000) +#define MLX5_NUM_SPARE_EQE (0x80) + struct mlx5_eq; struct mlx5_eq_param { @@ -36,4 +40,21 @@ mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq); struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc); void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm); +/* The HCA will think the queue has overflowed if we + * don't tell it we've been processing events. We + * create EQs with MLX5_NUM_SPARE_EQE extra entries, + * so we must update our consumer index at + * least that often. + * + * mlx5_eq_update_cc must be called on every EQE @EQ irq handler + */ +static inline u32 mlx5_eq_update_cc(struct mlx5_eq *eq, u32 cc) +{ + if (unlikely(cc >= MLX5_NUM_SPARE_EQE)) { + mlx5_eq_update_ci(eq, cc, 0); + cc = 0; + } + return cc; +} + #endif /* MLX5_CORE_EQ_H */ -- cgit v1.2.3 From 6d2d6fc83a281d51863fb5d08b59333ed1b46cc1 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 19 Nov 2018 10:52:42 -0800 Subject: net/mlx5: EQ, Make EQE access methods inline These are one/two liner generic EQ access methods, better have them declared static inline in eq.h. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 23 ---------------------- drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h | 25 +++++++++++++++++++++++- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 895401609c63..6ba8e401a0c7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -46,7 +46,6 @@ #include "diag/fw_tracer.h" enum { - MLX5_EQE_SIZE = sizeof(struct mlx5_eqe), MLX5_EQE_OWNER_INIT_VAL = 0x1, }; @@ -103,18 +102,6 @@ static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry) -{ - return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE); -} - -static struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq) -{ - struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1)); - - return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe; -} - static const char *eqe_type_str(u8 type) { switch (type) { @@ -202,16 +189,6 @@ static enum mlx5_dev_event port_subtype_event(u8 subtype) return -1; } -static void eq_update_ci(struct mlx5_eq *eq, int arm) -{ - __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2); - u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24); - - __raw_writel((__force u32)cpu_to_be32(val), addr); - /* We still want ordering, just not swabbing, so add a barrier */ - mb(); -} - static void general_event_handler(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index 4cc2d442cef6..6d8c8a57d52b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -5,7 +5,8 @@ #define __LIB_MLX5_EQ_H__ #include -#define MLX5_MAX_IRQ_NAME (32) +#define MLX5_MAX_IRQ_NAME (32) +#define MLX5_EQE_SIZE (sizeof(struct mlx5_eqe)) struct mlx5_eq_tasklet { struct list_head list; @@ -39,6 +40,28 @@ struct mlx5_eq_comp { struct list_head list; }; +static inline struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry) +{ + return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE); +} + +static inline struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq) +{ + struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1)); + + return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe; +} + +static inline void eq_update_ci(struct mlx5_eq *eq, int arm) +{ + __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2); + u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24); + + __raw_writel((__force u32)cpu_to_be32(val), addr); + /* We still want ordering, just not swabbing, so add a barrier */ + mb(); +} + int mlx5_eq_table_init(struct mlx5_core_dev *dev); void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev); int mlx5_eq_table_create(struct mlx5_core_dev *dev); -- cgit v1.2.3 From 0f597ed435b9ea1296e25474b762bedceba97a50 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 20 Nov 2018 14:12:18 -0800 Subject: net/mlx5: EQ, Introduce atomic notifier chain subscription API Use atomic_notifier_chain to fire firmware events at internal mlx5 core components such as eswitch/fpga/clock/FW tracer/etc.., this is to avoid explicit calls from low level mlx5_core to upper components and to simplify the mlx5_core API for future developments. Simply provide register/unregister notifiers API and call the notifier chain on firmware async events. Example: to subscribe to a FW event: struct mlx5_nb port_event; MLX5_NB_INIT(&port_event, port_event_handler, PORT_CHANGE); mlx5_eq_notifier_register(mdev, &port_event); where: - port_event_handler is the notifier block callback. - PORT_EVENT is the suffix of MLX5_EVENT_TYPE_PORT_CHANGE. The above will guarantee that port_event_handler will receive all FW events of the type MLX5_EVENT_TYPE_PORT_CHANGE. To receive all FW/HW events one can subscribe to MLX5_EVENT_TYPE_NOTIFY_ANY. The next few patches will start moving all mlx5 core components to use this new API and cleanup mlx5_eq_async_int misx handler from component explicit calls and specific logic. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 42 ++++++++++++++++++++-- drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h | 5 +++ .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 5 +++ include/linux/mlx5/device.h | 10 +++++- include/linux/mlx5/eq.h | 16 +++++++-- 5 files changed, 72 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 6ba8e401a0c7..34e4b2c246ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -31,6 +31,7 @@ */ #include +#include #include #include #include @@ -68,8 +69,10 @@ struct mlx5_irq_info { struct mlx5_eq_table { struct list_head comp_eqs_list; struct mlx5_eq pages_eq; - struct mlx5_eq async_eq; struct mlx5_eq cmd_eq; + struct mlx5_eq async_eq; + + struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX]; struct mutex lock; /* sync async eqs creations */ int num_comp_vectors; @@ -316,13 +319,17 @@ u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq) static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) { struct mlx5_eq *eq = eq_ptr; - struct mlx5_core_dev *dev = eq->dev; + struct mlx5_eq_table *eqt; + struct mlx5_core_dev *dev; struct mlx5_eqe *eqe; int set_ci = 0; u32 cqn = -1; u32 rsn; u8 port; + dev = eq->dev; + eqt = dev->priv.eq_table; + while ((eqe = next_eqe_sw(eq))) { /* * Make sure we read EQ entry contents after we've @@ -437,6 +444,13 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) break; } + if (likely(eqe->type < MLX5_EVENT_TYPE_MAX)) + atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe); + else + mlx5_core_warn_once(dev, "notifier_call_chain is not setup for eqe: %d\n", eqe->type); + + atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe); + ++eq->cons_index; ++set_ci; @@ -625,7 +639,7 @@ int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) int mlx5_eq_table_init(struct mlx5_core_dev *dev) { struct mlx5_eq_table *eq_table; - int err; + int i, err; eq_table = kvzalloc(sizeof(*eq_table), GFP_KERNEL); if (!eq_table) @@ -638,6 +652,8 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev) goto kvfree_eq_table; mutex_init(&eq_table->lock); + for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++) + ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]); return 0; @@ -1202,3 +1218,23 @@ void mlx5_eq_table_destroy(struct mlx5_core_dev *dev) destroy_async_eqs(dev); free_irq_vectors(dev); } + +int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb) +{ + struct mlx5_eq_table *eqt = dev->priv.eq_table; + + if (nb->event_type >= MLX5_EVENT_TYPE_MAX) + return -EINVAL; + + return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb); +} + +int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb) +{ + struct mlx5_eq_table *eqt = dev->priv.eq_table; + + if (nb->event_type >= MLX5_EVENT_TYPE_MAX) + return -EINVAL; + + return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index 6d8c8a57d52b..c0fb6d72b695 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -4,6 +4,8 @@ #ifndef __LIB_MLX5_EQ_H__ #define __LIB_MLX5_EQ_H__ #include +#include +#include #define MLX5_MAX_IRQ_NAME (32) #define MLX5_EQE_SIZE (sizeof(struct mlx5_eqe)) @@ -90,4 +92,7 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev); #endif +int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb); +int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb); + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 21727d9eeb84..e06c6e16ffc9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -78,6 +78,11 @@ do { \ __func__, __LINE__, current->pid, \ ##__VA_ARGS__) +#define mlx5_core_warn_once(__dev, format, ...) \ + dev_warn_once(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + #define mlx5_core_info(__dev, format, ...) \ dev_info(&(__dev)->pdev->dev, format, ##__VA_ARGS__) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index e326524bafcc..f7c8bebfe472 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -301,9 +301,15 @@ enum { MLX5_EVENT_QUEUE_TYPE_DCT = 6, }; +/* mlx5 components can subscribe to any one of these events via + * mlx5_eq_notifier_register API. + */ enum mlx5_event { + /* Special value to subscribe to any event */ + MLX5_EVENT_TYPE_NOTIFY_ANY = 0x0, + /* HW events enum start: comp events are not subscribable */ MLX5_EVENT_TYPE_COMP = 0x0, - + /* HW Async events enum start: subscribable events */ MLX5_EVENT_TYPE_PATH_MIG = 0x01, MLX5_EVENT_TYPE_COMM_EST = 0x02, MLX5_EVENT_TYPE_SQ_DRAINED = 0x03, @@ -341,6 +347,8 @@ enum mlx5_event { MLX5_EVENT_TYPE_FPGA_QP_ERROR = 0x21, MLX5_EVENT_TYPE_DEVICE_TRACER = 0x26, + + MLX5_EVENT_TYPE_MAX = MLX5_EVENT_TYPE_DEVICE_TRACER + 1, }; enum { diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h index 71d82c5a1a02..00045cc4ea11 100644 --- a/include/linux/mlx5/eq.h +++ b/include/linux/mlx5/eq.h @@ -4,8 +4,6 @@ #ifndef MLX5_CORE_EQ_H #define MLX5_CORE_EQ_H -#include - enum { MLX5_EQ_PAGEREQ_IDX = 0, MLX5_EQ_CMD_IDX = 1, @@ -22,6 +20,7 @@ enum { #define MLX5_NUM_SPARE_EQE (0x80) struct mlx5_eq; +struct mlx5_core_dev; struct mlx5_eq_param { u8 index; @@ -57,4 +56,17 @@ static inline u32 mlx5_eq_update_cc(struct mlx5_eq *eq, u32 cc) return cc; } +struct mlx5_nb { + struct notifier_block nb; + u8 event_type; +}; + +#define mlx5_nb_cof(ptr, type, member) \ + (container_of(container_of(ptr, struct mlx5_nb, nb), type, member)) + +#define MLX5_NB_INIT(name, handler, event) do { \ + (name)->nb.notifier_call = handler; \ + (name)->event_type = MLX5_EVENT_TYPE_##event; \ +} while (0) + #endif /* MLX5_CORE_EQ_H */ -- cgit v1.2.3 From 720a936d407709dcbcf714293112667677d8c967 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 20 Nov 2018 14:12:19 -0800 Subject: net/mlx5: FWTrace, Use async events chain Remove the explicit call to mlx5_fw_tracer_event on MLX5_EVENT_TYPE_DEVICE_TRACER and let fw tracer to register its own handler when its ready. Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/diag/fw_tracer.c | 27 ++++++++++++---------- .../ethernet/mellanox/mlx5/core/diag/fw_tracer.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 4 ---- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index d4ec93bde4de..6999f4486e9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -30,6 +30,7 @@ * SOFTWARE. */ #define CREATE_TRACE_POINTS +#include "lib/eq.h" #include "fw_tracer.h" #include "fw_tracer_tracepoint.h" @@ -846,9 +847,9 @@ free_tracer: return ERR_PTR(err); } -/* Create HW resources + start tracer - * must be called before Async EQ is created - */ +static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data); + +/* Create HW resources + start tracer */ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer) { struct mlx5_core_dev *dev; @@ -874,6 +875,9 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer) goto err_dealloc_pd; } + MLX5_NB_INIT(&tracer->nb, fw_tracer_event, DEVICE_TRACER); + mlx5_eq_notifier_register(dev, &tracer->nb); + mlx5_fw_tracer_start(tracer); return 0; @@ -883,9 +887,7 @@ err_dealloc_pd: return err; } -/* Stop tracer + Cleanup HW resources - * must be called after Async EQ is destroyed - */ +/* Stop tracer + Cleanup HW resources */ void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer) { if (IS_ERR_OR_NULL(tracer)) @@ -893,7 +895,7 @@ void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer) mlx5_core_dbg(tracer->dev, "FWTracer: Cleanup, is owner ? (%d)\n", tracer->owner); - + mlx5_eq_notifier_unregister(tracer->dev, &tracer->nb); cancel_work_sync(&tracer->ownership_change_work); cancel_work_sync(&tracer->handle_traces_work); @@ -922,12 +924,11 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer) kfree(tracer); } -void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) +static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data) { - struct mlx5_fw_tracer *tracer = dev->tracer; - - if (!tracer) - return; + struct mlx5_fw_tracer *tracer = mlx5_nb_cof(nb, struct mlx5_fw_tracer, nb); + struct mlx5_core_dev *dev = tracer->dev; + struct mlx5_eqe *eqe = data; switch (eqe->sub_type) { case MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE: @@ -942,6 +943,8 @@ void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n", eqe->sub_type); } + + return NOTIFY_OK; } EXPORT_TRACEPOINT_SYMBOL(mlx5_fw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h index 0347f2dd5cee..a8b8747f2b61 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h @@ -55,6 +55,7 @@ struct mlx5_fw_tracer { struct mlx5_core_dev *dev; + struct mlx5_nb nb; bool owner; u8 trc_ver; struct workqueue_struct *work_queue; @@ -170,6 +171,5 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev); int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer); void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer); void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer); -void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 34e4b2c246ff..c7c436b0ed2e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -434,10 +434,6 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) general_event_handler(dev, eqe); break; - case MLX5_EVENT_TYPE_DEVICE_TRACER: - mlx5_fw_tracer_event(dev, eqe); - break; - default: mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n", eqe->type, eq->eqn); -- cgit v1.2.3 From a52a7d01fde117bc6d57602e2e7e947037c865b0 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 20 Nov 2018 14:12:20 -0800 Subject: net/mlx5: FPGA, Use async events chain Remove the explicit call to mlx5_fpga_event on MLX5_EVENT_TYPE_FPGA_ERROR or MLX5_EVENT_TYPE_FPGA_QP_ERROR let fpga core to register its own handler when its ready. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 5 --- .../net/ethernet/mellanox/mlx5/core/fpga/core.c | 38 +++++++++++++++++++--- .../net/ethernet/mellanox/mlx5/core/fpga/core.h | 11 +++---- 3 files changed, 38 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index c7c436b0ed2e..8aabd23d2166 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -421,11 +421,6 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) mlx5_pps_event(dev, eqe); break; - case MLX5_EVENT_TYPE_FPGA_ERROR: - case MLX5_EVENT_TYPE_FPGA_QP_ERROR: - mlx5_fpga_event(dev, eqe->type, &eqe->data.raw); - break; - case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: mlx5_temp_warning_event(dev, eqe); break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c index 436a8136f26f..27c5f6c7d36a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c @@ -36,6 +36,7 @@ #include "mlx5_core.h" #include "lib/mlx5.h" +#include "lib/eq.h" #include "fpga/core.h" #include "fpga/conn.h" @@ -145,6 +146,22 @@ static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev) return 0; } +static int mlx5_fpga_event(struct mlx5_fpga_device *, unsigned long, void *); + +static int fpga_err_event(struct notifier_block *nb, unsigned long event, void *eqe) +{ + struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_err_nb); + + return mlx5_fpga_event(fdev, event, eqe); +} + +static int fpga_qp_err_event(struct notifier_block *nb, unsigned long event, void *eqe) +{ + struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_qp_err_nb); + + return mlx5_fpga_event(fdev, event, eqe); +} + int mlx5_fpga_device_start(struct mlx5_core_dev *mdev) { struct mlx5_fpga_device *fdev = mdev->fpga; @@ -185,6 +202,11 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev) if (err) goto out; + MLX5_NB_INIT(&fdev->fpga_err_nb, fpga_err_event, FPGA_ERROR); + MLX5_NB_INIT(&fdev->fpga_qp_err_nb, fpga_qp_err_event, FPGA_QP_ERROR); + mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_err_nb); + mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_qp_err_nb); + err = mlx5_fpga_conn_device_init(fdev); if (err) goto err_rsvd_gid; @@ -201,6 +223,8 @@ err_conn_init: mlx5_fpga_conn_device_cleanup(fdev); err_rsvd_gid: + mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb); + mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb); mlx5_core_unreserve_gids(mdev, max_num_qps); out: spin_lock_irqsave(&fdev->state_lock, flags); @@ -256,6 +280,9 @@ void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev) } mlx5_fpga_conn_device_cleanup(fdev); + mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb); + mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb); + max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps); mlx5_core_unreserve_gids(mdev, max_num_qps); } @@ -283,9 +310,10 @@ static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome) return "Unknown"; } -void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data) +static int mlx5_fpga_event(struct mlx5_fpga_device *fdev, + unsigned long event, void *eqe) { - struct mlx5_fpga_device *fdev = mdev->fpga; + void *data = ((struct mlx5_eqe *)eqe)->data.raw; const char *event_name; bool teardown = false; unsigned long flags; @@ -303,9 +331,7 @@ void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data) fpga_qpn = MLX5_GET(fpga_qp_error_event, data, fpga_qpn); break; default: - mlx5_fpga_warn_ratelimited(fdev, "Unexpected event %u\n", - event); - return; + return NOTIFY_DONE; } spin_lock_irqsave(&fdev->state_lock, flags); @@ -326,4 +352,6 @@ void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data) */ if (teardown) mlx5_trigger_health_work(fdev->mdev); + + return NOTIFY_OK; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h index 3e2355c8df3f..7e2e871dbf83 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h @@ -35,11 +35,16 @@ #ifdef CONFIG_MLX5_FPGA +#include + +#include "lib/eq.h" #include "fpga/cmd.h" /* Represents an Innova device */ struct mlx5_fpga_device { struct mlx5_core_dev *mdev; + struct mlx5_nb fpga_err_nb; + struct mlx5_nb fpga_qp_err_nb; spinlock_t state_lock; /* Protects state transitions */ enum mlx5_fpga_status state; enum mlx5_fpga_image last_admin_image; @@ -82,7 +87,6 @@ int mlx5_fpga_init(struct mlx5_core_dev *mdev); void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev); int mlx5_fpga_device_start(struct mlx5_core_dev *mdev); void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev); -void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data); #else @@ -104,11 +108,6 @@ static inline void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev) { } -static inline void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, - void *data) -{ -} - #endif #endif /* __MLX5_FPGA_CORE_H__ */ -- cgit v1.2.3 From 41069256e93045a45a2c359c9715439be0b47bf4 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 20 Nov 2018 14:12:21 -0800 Subject: net/mlx5: Clock, Use async events chain Remove the explicit call to mlx5_pps_event on MLX5_EVENT_TYPE_PPS_EVENT and let clock logic to register its own handler when its ready. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 4 ---- .../net/ethernet/mellanox/mlx5/core/lib/clock.c | 24 +++++++++++++++------- .../net/ethernet/mellanox/mlx5/core/lib/clock.h | 3 --- include/linux/mlx5/driver.h | 4 +++- 4 files changed, 20 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 8aabd23d2166..e5fcce9ca107 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -417,10 +417,6 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) mlx5_port_module_event(dev, eqe); break; - case MLX5_EVENT_TYPE_PPS_EVENT: - mlx5_pps_event(dev, eqe); - break; - case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: mlx5_temp_warning_event(dev, eqe); break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 0d90b1b4a3d3..d27c239e7d6c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -33,6 +33,7 @@ #include #include #include +#include "lib/eq.h" #include "en.h" #include "clock.h" @@ -439,16 +440,17 @@ static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev) clock->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode); } -void mlx5_pps_event(struct mlx5_core_dev *mdev, - struct mlx5_eqe *eqe) +static int mlx5_pps_event(struct notifier_block *nb, + unsigned long type, void *data) { - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mlx5_nb_cof(nb, struct mlx5_clock, pps_nb); + struct mlx5_core_dev *mdev = clock->mdev; struct ptp_clock_event ptp_event; - struct timespec64 ts; - u64 nsec_now, nsec_delta; u64 cycles_now, cycles_delta; + u64 nsec_now, nsec_delta, ns; + struct mlx5_eqe *eqe = data; int pin = eqe->data.pps.pin; - s64 ns; + struct timespec64 ts; unsigned long flags; switch (clock->ptp_info.pin_config[pin].func) { @@ -463,6 +465,7 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev, } else { ptp_event.type = PTP_CLOCK_EXTTS; } + /* TODOL clock->ptp can be NULL if ptp_clock_register failes */ ptp_clock_event(clock->ptp, &ptp_event); break; case PTP_PF_PEROUT: @@ -481,8 +484,11 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev, write_sequnlock_irqrestore(&clock->lock, flags); break; default: - mlx5_core_err(mdev, " Unhandled event\n"); + mlx5_core_err(mdev, " Unhandled clock PPS event, func %d\n", + clock->ptp_info.pin_config[pin].func); } + + return NOTIFY_OK; } void mlx5_init_clock(struct mlx5_core_dev *mdev) @@ -567,6 +573,9 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev) PTR_ERR(clock->ptp)); clock->ptp = NULL; } + + MLX5_NB_INIT(&clock->pps_nb, mlx5_pps_event, PPS_EVENT); + mlx5_eq_notifier_register(mdev, &clock->pps_nb); } void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) @@ -576,6 +585,7 @@ void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) return; + mlx5_eq_notifier_unregister(mdev, &clock->pps_nb); if (clock->ptp) { ptp_clock_unregister(clock->ptp); clock->ptp = NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h index 263cb6e2aeee..31600924bdc3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h @@ -36,7 +36,6 @@ #if IS_ENABLED(CONFIG_PTP_1588_CLOCK) void mlx5_init_clock(struct mlx5_core_dev *mdev); void mlx5_cleanup_clock(struct mlx5_core_dev *mdev); -void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev) { @@ -60,8 +59,6 @@ static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock, #else static inline void mlx5_init_clock(struct mlx5_core_dev *mdev) {} static inline void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) {} -static inline void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) {} - static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev) { return -1; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f41e6713df10..99a23db9a929 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -671,6 +672,8 @@ struct mlx5_pps { }; struct mlx5_clock { + struct mlx5_core_dev *mdev; + struct mlx5_nb pps_nb; seqlock_t lock; struct cyclecounter cycles; struct timecounter tc; @@ -678,7 +681,6 @@ struct mlx5_clock { u32 nominal_c_mult; unsigned long overflow_period; struct delayed_work overflow_work; - struct mlx5_core_dev *mdev; struct ptp_clock *ptp; struct ptp_clock_info ptp_info; struct mlx5_pps pps_info; -- cgit v1.2.3 From 6933a93795590b51a3056b6d66562ca1da1557ae Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 20 Nov 2018 14:12:22 -0800 Subject: net/mlx5: E-Switch, Use async events chain Remove the explicit call to mlx5_eswitch_vport_event on MLX5_EVENT_TYPE_NIC_VPORT_CHANGE and let the eswitch register its own handler when its ready. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 4 --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 44 +++++++++++++---------- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 3 +- 3 files changed, 26 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index e5fcce9ca107..7c8b2d89645b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -409,10 +409,6 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) } break; - case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: - mlx5_eswitch_vport_event(dev->priv.eswitch, eqe); - break; - case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: mlx5_port_module_event(dev, eqe); break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 2346b6ba3d54..e6a9b19d8626 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -36,6 +36,7 @@ #include #include #include "mlx5_core.h" +#include "lib/eq.h" #include "eswitch.h" #include "fs_core.h" #include "lib/eq.h" @@ -1568,7 +1569,6 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) /* Mark this vport as disabled to discard new events */ vport->enabled = false; - mlx5_eq_synchronize_async_irq(esw->dev); /* Wait for current already scheduled events to complete */ flush_workqueue(esw->work_queue); /* Disable events from this vport */ @@ -1594,10 +1594,25 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) mutex_unlock(&esw->state_lock); } +static int eswitch_vport_event(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_eswitch *esw = mlx5_nb_cof(nb, struct mlx5_eswitch, nb); + struct mlx5_eqe *eqe = data; + struct mlx5_vport *vport; + u16 vport_num; + + vport_num = be16_to_cpu(eqe->data.vport_change.vport_num); + vport = &esw->vports[vport_num]; + if (vport->enabled) + queue_work(esw->work_queue, &vport->vport_change_handler); + + return NOTIFY_OK; +} + /* Public E-Switch API */ #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev)) - int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { int err; @@ -1641,6 +1656,11 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) for (i = 0; i <= nvfs; i++) esw_enable_vport(esw, i, enabled_events); + if (mode == SRIOV_LEGACY) { + MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE); + mlx5_eq_notifier_register(esw->dev, &esw->nb); + } + esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n", esw->enabled_vports); return 0; @@ -1670,6 +1690,9 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) mc_promisc = &esw->mc_promisc; nvports = esw->enabled_vports; + if (esw->mode == SRIOV_LEGACY) + mlx5_eq_notifier_unregister(esw->dev, &esw->nb); + for (i = 0; i < esw->total_vports; i++) esw_disable_vport(esw, i); @@ -1778,23 +1801,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) kfree(esw); } -void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) -{ - struct mlx5_eqe_vport_change *vc_eqe = &eqe->data.vport_change; - u16 vport_num = be16_to_cpu(vc_eqe->vport_num); - struct mlx5_vport *vport; - - if (!esw) { - pr_warn("MLX5 E-Switch: vport %d got an event while eswitch is not initialized\n", - vport_num); - return; - } - - vport = &esw->vports[vport_num]; - if (vport->enabled) - queue_work(esw->work_queue, &vport->vport_change_handler); -} - /* Vport Administration */ #define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index aaafc9f17115..480ffa294867 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -181,6 +181,7 @@ struct esw_mc_addr { /* SRIOV only */ struct mlx5_eswitch { struct mlx5_core_dev *dev; + struct mlx5_nb nb; struct mlx5_eswitch_fdb fdb_table; struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; struct workqueue_struct *work_queue; @@ -211,7 +212,6 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw); /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); -void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe); int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode); void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, @@ -352,7 +352,6 @@ static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} -static inline void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) {} static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; } static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {} -- cgit v1.2.3 From 0cf53c1247565b339a23d82a1853a0c41e9a2a34 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 20 Nov 2018 14:12:23 -0800 Subject: net/mlx5: FWPage, Use async events chain Remove the explicit call to mlx5_core_req_pages_handler on MLX5_EVENT_TYPE_PAGE_REQUEST and let FW page logic to register its own handler when its ready. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 11 ------ drivers/net/ethernet/mellanox/mlx5/core/main.c | 27 +++++++------ .../net/ethernet/mellanox/mlx5/core/pagealloc.c | 44 +++++++++++++++------- include/linux/mlx5/driver.h | 5 ++- 4 files changed, 47 insertions(+), 40 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 7c8b2d89645b..7f6a644700eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -398,17 +398,6 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) mlx5_eq_cq_event(eq, cqn, eqe->type); break; - case MLX5_EVENT_TYPE_PAGE_REQUEST: - { - u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id); - s32 npages = be32_to_cpu(eqe->data.req_pages.num_pages); - - mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n", - func_id, npages); - mlx5_core_req_pages_handler(dev, func_id, npages); - } - break; - case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: mlx5_port_module_event(dev, eqe); break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 91022f141855..9e4cd2757ea8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -916,16 +916,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto reclaim_boot_pages; } - err = mlx5_pagealloc_start(dev); - if (err) { - dev_err(&pdev->dev, "mlx5_pagealloc_start failed\n"); - goto reclaim_boot_pages; - } - err = mlx5_cmd_init_hca(dev, sw_owner_id); if (err) { dev_err(&pdev->dev, "init hca failed\n"); - goto err_pagealloc_stop; + goto reclaim_boot_pages; } mlx5_set_driver_version(dev); @@ -953,6 +947,8 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_get_uars; } + mlx5_pagealloc_start(dev); + err = mlx5_eq_table_create(dev); if (err) { dev_err(&pdev->dev, "Failed to create EQs\n"); @@ -1039,6 +1035,7 @@ err_fw_tracer: mlx5_eq_table_destroy(dev); err_eq_table: + mlx5_pagealloc_stop(dev); mlx5_put_uars_page(dev, priv->uar); err_get_uars: @@ -1052,9 +1049,6 @@ err_stop_poll: goto out_err; } -err_pagealloc_stop: - mlx5_pagealloc_stop(dev); - reclaim_boot_pages: mlx5_reclaim_startup_pages(dev); @@ -1100,16 +1094,18 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_fpga_device_stop(dev); mlx5_fw_tracer_cleanup(dev->tracer); mlx5_eq_table_destroy(dev); + mlx5_pagealloc_stop(dev); mlx5_put_uars_page(dev, priv->uar); + if (cleanup) mlx5_cleanup_once(dev); mlx5_stop_health_poll(dev, cleanup); + err = mlx5_cmd_teardown_hca(dev); if (err) { dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n"); goto out; } - mlx5_pagealloc_stop(dev); mlx5_reclaim_startup_pages(dev); mlx5_core_disable_hca(dev, 0); mlx5_cmd_cleanup(dev); @@ -1186,12 +1182,14 @@ static int init_one(struct pci_dev *pdev, goto close_pci; } - mlx5_pagealloc_init(dev); + err = mlx5_pagealloc_init(dev); + if (err) + goto err_pagealloc_init; err = mlx5_load_one(dev, priv, true); if (err) { dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err); - goto clean_health; + goto err_load_one; } request_module_nowait(MLX5_IB_MOD); @@ -1205,8 +1203,9 @@ static int init_one(struct pci_dev *pdev, clean_load: mlx5_unload_one(dev, priv, true); -clean_health: +err_load_one: mlx5_pagealloc_cleanup(dev); +err_pagealloc_init: mlx5_health_cleanup(dev); close_pci: mlx5_pci_close(dev, priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index e36d3e3675f9..a83b517b0714 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -37,6 +37,7 @@ #include #include #include "mlx5_core.h" +#include "lib/eq.h" enum { MLX5_PAGES_CANT_GIVE = 0, @@ -433,15 +434,28 @@ static void pages_work_handler(struct work_struct *work) kfree(req); } -void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, - s32 npages) +static int req_pages_handler(struct notifier_block *nb, + unsigned long type, void *data) { struct mlx5_pages_req *req; - + struct mlx5_core_dev *dev; + struct mlx5_priv *priv; + struct mlx5_eqe *eqe; + u16 func_id; + s32 npages; + + priv = mlx5_nb_cof(nb, struct mlx5_priv, pg_nb); + dev = container_of(priv, struct mlx5_core_dev, priv); + eqe = data; + + func_id = be16_to_cpu(eqe->data.req_pages.func_id); + npages = be32_to_cpu(eqe->data.req_pages.num_pages); + mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n", + func_id, npages); req = kzalloc(sizeof(*req), GFP_ATOMIC); if (!req) { mlx5_core_warn(dev, "failed to allocate pages request\n"); - return; + return NOTIFY_DONE; } req->dev = dev; @@ -449,6 +463,7 @@ void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, req->npages = npages; INIT_WORK(&req->work, pages_work_handler); queue_work(dev->priv.pg_wq, &req->work); + return NOTIFY_OK; } int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot) @@ -524,29 +539,32 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) return 0; } -void mlx5_pagealloc_init(struct mlx5_core_dev *dev) +int mlx5_pagealloc_init(struct mlx5_core_dev *dev) { dev->priv.page_root = RB_ROOT; INIT_LIST_HEAD(&dev->priv.free_list); + dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator"); + if (!dev->priv.pg_wq) + return -ENOMEM; + + return 0; } void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev) { - /* nothing */ + destroy_workqueue(dev->priv.pg_wq); } -int mlx5_pagealloc_start(struct mlx5_core_dev *dev) +void mlx5_pagealloc_start(struct mlx5_core_dev *dev) { - dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator"); - if (!dev->priv.pg_wq) - return -ENOMEM; - - return 0; + MLX5_NB_INIT(&dev->priv.pg_nb, req_pages_handler, PAGE_REQUEST); + mlx5_eq_notifier_register(dev, &dev->priv.pg_nb); } void mlx5_pagealloc_stop(struct mlx5_core_dev *dev) { - destroy_workqueue(dev->priv.pg_wq); + mlx5_eq_notifier_unregister(dev, &dev->priv.pg_nb); + flush_workqueue(dev->priv.pg_wq); } int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 99a23db9a929..61088ad33500 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -564,6 +564,7 @@ struct mlx5_priv { struct mlx5_eq_table *eq_table; /* pages stuff */ + struct mlx5_nb pg_nb; struct workqueue_struct *pg_wq; struct rb_root page_root; int fw_pages; @@ -962,9 +963,9 @@ int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn); int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn); int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, u16 opmod, u8 port); -void mlx5_pagealloc_init(struct mlx5_core_dev *dev); +int mlx5_pagealloc_init(struct mlx5_core_dev *dev); void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); -int mlx5_pagealloc_start(struct mlx5_core_dev *dev); +void mlx5_pagealloc_start(struct mlx5_core_dev *dev); void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, s32 npages); -- cgit v1.2.3 From 71edc69ca1a78ce18411a540c550a4ef1eb017cd Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 20 Nov 2018 14:12:24 -0800 Subject: net/mlx5: CmdIF, Use async events chain Remove the explicit call to mlx5_cmd_comp_handler on MLX5_EVENT_TYPE_CMD and let command interface to register its own handler when its ready. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 48 +++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 4 -- drivers/net/ethernet/mellanox/mlx5/core/health.c | 25 +---------- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 +- include/linux/mlx5/driver.h | 2 + 5 files changed, 50 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 7b18aff955f1..8ab636d59edb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -40,9 +40,11 @@ #include #include #include +#include #include #include "mlx5_core.h" +#include "lib/eq.h" enum { CMD_IF_REV = 5, @@ -805,6 +807,8 @@ static u16 msg_to_opcode(struct mlx5_cmd_msg *in) return MLX5_GET(mbox_in, in->first.data, opcode); } +static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); + static void cb_timeout_handler(struct work_struct *work) { struct delayed_work *dwork = container_of(work, struct delayed_work, @@ -1412,14 +1416,32 @@ static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode) up(&cmd->sem); } +static int cmd_comp_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_core_dev *dev; + struct mlx5_cmd *cmd; + struct mlx5_eqe *eqe; + + cmd = mlx5_nb_cof(nb, struct mlx5_cmd, nb); + dev = container_of(cmd, struct mlx5_core_dev, cmd); + eqe = data; + + mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false); + + return NOTIFY_OK; +} void mlx5_cmd_use_events(struct mlx5_core_dev *dev) { + MLX5_NB_INIT(&dev->cmd.nb, cmd_comp_notifier, CMD); + mlx5_eq_notifier_register(dev, &dev->cmd.nb); mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS); } void mlx5_cmd_use_polling(struct mlx5_core_dev *dev) { mlx5_cmd_change_mod(dev, CMD_MODE_POLLING); + mlx5_eq_notifier_unregister(dev, &dev->cmd.nb); } static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg) @@ -1435,7 +1457,7 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg) } } -void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced) +static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced) { struct mlx5_cmd *cmd = &dev->cmd; struct mlx5_cmd_work_ent *ent; @@ -1533,7 +1555,29 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced) } } } -EXPORT_SYMBOL(mlx5_cmd_comp_handler); + +void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev) +{ + unsigned long flags; + u64 vector; + + /* wait for pending handlers to complete */ + mlx5_eq_synchronize_cmd_irq(dev); + spin_lock_irqsave(&dev->cmd.alloc_lock, flags); + vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1); + if (!vector) + goto no_trig; + + vector |= MLX5_TRIGGERED_CMD_COMP; + spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); + + mlx5_core_dbg(dev, "vector 0x%llx\n", vector); + mlx5_cmd_comp_handler(dev, vector, true); + return; + +no_trig: + spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); +} static int status_to_err(u8 status) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 7f6a644700eb..b28869aa1a4e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -368,10 +368,6 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) mlx5_srq_event(dev, rsn, eqe->type); break; - case MLX5_EVENT_TYPE_CMD: - mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false); - break; - case MLX5_EVENT_TYPE_PORT_CHANGE: port = (eqe->data.port.port >> 4) & 0xf; switch (eqe->sub_type) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 066883003aea..4e42bd290959 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -79,29 +79,6 @@ void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state) &dev->iseg->cmdq_addr_l_sz); } -static void trigger_cmd_completions(struct mlx5_core_dev *dev) -{ - unsigned long flags; - u64 vector; - - /* wait for pending handlers to complete */ - mlx5_eq_synchronize_cmd_irq(dev); - spin_lock_irqsave(&dev->cmd.alloc_lock, flags); - vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1); - if (!vector) - goto no_trig; - - vector |= MLX5_TRIGGERED_CMD_COMP; - spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); - - mlx5_core_dbg(dev, "vector 0x%llx\n", vector); - mlx5_cmd_comp_handler(dev, vector, true); - return; - -no_trig: - spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); -} - static int in_fatal(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; @@ -125,7 +102,7 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) mlx5_core_err(dev, "start\n"); if (pci_channel_offline(dev->pdev) || in_fatal(dev) || force) { dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; - trigger_cmd_completions(dev); + mlx5_cmd_trigger_completions(dev); } mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 1); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index e06c6e16ffc9..5dd453e47a04 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -127,7 +127,7 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev); -void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); +void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev); int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 61088ad33500..a8d638134fc8 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -278,6 +278,8 @@ struct mlx5_cmd_stats { }; struct mlx5_cmd { + struct mlx5_nb nb; + void *cmd_alloc_buf; dma_addr_t alloc_dma; int alloc_size; -- cgit v1.2.3 From 221c14f3d12489ced0f2ca8b31b2221c5dbbf145 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 20 Nov 2018 14:12:25 -0800 Subject: net/mlx5: Resource tables, Use async events chain Remove the explicit call to QP/SRQ resources events handlers on several FW events and let resources logic register resources events notifiers via the new API. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 29 ------------ drivers/net/ethernet/mellanox/mlx5/core/qp.c | 68 ++++++++++++++++++++++----- drivers/net/ethernet/mellanox/mlx5/core/srq.c | 55 +++++++++++++++++++--- include/linux/mlx5/driver.h | 6 ++- 4 files changed, 108 insertions(+), 50 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index b28869aa1a4e..0cf448575ebd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -324,7 +324,6 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) struct mlx5_eqe *eqe; int set_ci = 0; u32 cqn = -1; - u32 rsn; u8 port; dev = eq->dev; @@ -340,34 +339,6 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n", eq->eqn, eqe_type_str(eqe->type)); switch (eqe->type) { - case MLX5_EVENT_TYPE_DCT_DRAINED: - rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff; - rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN); - mlx5_rsc_event(dev, rsn, eqe->type); - break; - case MLX5_EVENT_TYPE_PATH_MIG: - case MLX5_EVENT_TYPE_COMM_EST: - case MLX5_EVENT_TYPE_SQ_DRAINED: - case MLX5_EVENT_TYPE_SRQ_LAST_WQE: - case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: - case MLX5_EVENT_TYPE_PATH_MIG_FAILED: - case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: - case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: - rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; - rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN); - mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n", - eqe_type_str(eqe->type), eqe->type, rsn); - mlx5_rsc_event(dev, rsn, eqe->type); - break; - - case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: - case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: - rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; - mlx5_core_dbg(dev, "SRQ event %s(%d): srqn 0x%x\n", - eqe_type_str(eqe->type), eqe->type, rsn); - mlx5_srq_event(dev, rsn, eqe->type); - break; - case MLX5_EVENT_TYPE_PORT_CHANGE: port = (eqe->data.port.port >> 4) & 0xf; switch (eqe->sub_type) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index cba4a435043a..28726c63101f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -38,11 +38,11 @@ #include #include "mlx5_core.h" +#include "lib/eq.h" -static struct mlx5_core_rsc_common *mlx5_get_rsc(struct mlx5_core_dev *dev, - u32 rsn) +static struct mlx5_core_rsc_common * +mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn) { - struct mlx5_qp_table *table = &dev->priv.qp_table; struct mlx5_core_rsc_common *common; spin_lock(&table->lock); @@ -53,11 +53,6 @@ static struct mlx5_core_rsc_common *mlx5_get_rsc(struct mlx5_core_dev *dev, spin_unlock(&table->lock); - if (!common) { - mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n", - rsn); - return NULL; - } return common; } @@ -120,14 +115,52 @@ static bool is_event_type_allowed(int rsc_type, int event_type) } } -void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) +static int rsc_event_notifier(struct notifier_block *nb, + unsigned long type, void *data) { - struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn); + struct mlx5_core_rsc_common *common; + struct mlx5_qp_table *table; + struct mlx5_core_dev *dev; struct mlx5_core_dct *dct; + u8 event_type = (u8)type; struct mlx5_core_qp *qp; + struct mlx5_priv *priv; + struct mlx5_eqe *eqe; + u32 rsn; + + switch (event_type) { + case MLX5_EVENT_TYPE_DCT_DRAINED: + eqe = data; + rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff; + rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN); + break; + case MLX5_EVENT_TYPE_PATH_MIG: + case MLX5_EVENT_TYPE_COMM_EST: + case MLX5_EVENT_TYPE_SQ_DRAINED: + case MLX5_EVENT_TYPE_SRQ_LAST_WQE: + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + case MLX5_EVENT_TYPE_PATH_MIG_FAILED: + case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: + eqe = data; + rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; + rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN); + break; + default: + return NOTIFY_DONE; + } + + table = mlx5_nb_cof(nb, struct mlx5_qp_table, nb); + priv = container_of(table, struct mlx5_priv, qp_table); + dev = container_of(priv, struct mlx5_core_dev, priv); - if (!common) - return; + mlx5_core_dbg(dev, "event (%d) arrived on resource 0x%x\n", eqe->type, rsn); + + common = mlx5_get_rsc(table, rsn); + if (!common) { + mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n", rsn); + return NOTIFY_OK; + } if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type)) { mlx5_core_warn(dev, "event 0x%.2x is not allowed on resource 0x%.8x\n", @@ -152,6 +185,8 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) } out: mlx5_core_put_rsc(common); + + return NOTIFY_OK; } static int create_resource_common(struct mlx5_core_dev *dev, @@ -487,10 +522,16 @@ void mlx5_init_qp_table(struct mlx5_core_dev *dev) spin_lock_init(&table->lock); INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); mlx5_qp_debugfs_init(dev); + + MLX5_NB_INIT(&table->nb, rsc_event_notifier, NOTIFY_ANY); + mlx5_eq_notifier_register(dev, &table->nb); } void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev) { + struct mlx5_qp_table *table = &dev->priv.qp_table; + + mlx5_eq_notifier_unregister(dev, &table->nb); mlx5_qp_debugfs_cleanup(dev); } @@ -676,8 +717,9 @@ struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_core_dev *dev, enum mlx5_res_type res_type) { u32 rsn = res_num | (res_type << MLX5_USER_INDEX_LEN); + struct mlx5_qp_table *table = &dev->priv.qp_table; - return mlx5_get_rsc(dev, rsn); + return mlx5_get_rsc(table, rsn); } EXPORT_SYMBOL_GPL(mlx5_core_res_hold); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c index 6a6fc9be01e6..0563866c13f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -36,13 +36,25 @@ #include #include #include -#include "mlx5_core.h" #include +#include "mlx5_core.h" +#include "lib/eq.h" -void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type) +static int srq_event_notifier(struct mlx5_srq_table *table, + unsigned long type, void *data) { - struct mlx5_srq_table *table = &dev->priv.srq_table; + struct mlx5_core_dev *dev; struct mlx5_core_srq *srq; + struct mlx5_priv *priv; + struct mlx5_eqe *eqe; + u32 srqn; + + priv = container_of(table, struct mlx5_priv, srq_table); + dev = container_of(priv, struct mlx5_core_dev, priv); + + eqe = data; + srqn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; + mlx5_core_dbg(dev, "SRQ event (%d): srqn 0x%x\n", eqe->type, srqn); spin_lock(&table->lock); @@ -54,13 +66,35 @@ void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type) if (!srq) { mlx5_core_warn(dev, "Async event for bogus SRQ 0x%08x\n", srqn); - return; + return NOTIFY_OK; } - srq->event(srq, event_type); + srq->event(srq, eqe->type); if (atomic_dec_and_test(&srq->refcount)) complete(&srq->free); + + return NOTIFY_OK; +} + +static int catas_err_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_srq_table *table; + + table = mlx5_nb_cof(nb, struct mlx5_srq_table, catas_err_nb); + /* type == MLX5_EVENT_TYPE_SRQ_CATAS_ERROR */ + return srq_event_notifier(table, type, data); +} + +static int rq_limit_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_srq_table *table; + + table = mlx5_nb_cof(nb, struct mlx5_srq_table, rq_limit_nb); + /* type == MLX5_EVENT_TYPE_SRQ_RQ_LIMIT */ + return srq_event_notifier(table, type, data); } static int get_pas_size(struct mlx5_srq_attr *in) @@ -708,9 +742,18 @@ void mlx5_init_srq_table(struct mlx5_core_dev *dev) memset(table, 0, sizeof(*table)); spin_lock_init(&table->lock); INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); + + MLX5_NB_INIT(&table->catas_err_nb, catas_err_notifier, SRQ_CATAS_ERROR); + mlx5_eq_notifier_register(dev, &table->catas_err_nb); + + MLX5_NB_INIT(&table->rq_limit_nb, rq_limit_notifier, SRQ_RQ_LIMIT); + mlx5_eq_notifier_register(dev, &table->rq_limit_nb); } void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev) { - /* nothing */ + struct mlx5_srq_table *table = &dev->priv.srq_table; + + mlx5_eq_notifier_unregister(dev, &table->rq_limit_nb); + mlx5_eq_notifier_unregister(dev, &table->catas_err_nb); } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a8d638134fc8..afba0864f45c 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -464,6 +464,8 @@ struct mlx5_core_health { }; struct mlx5_qp_table { + struct mlx5_nb nb; + /* protect radix tree */ spinlock_t lock; @@ -471,6 +473,8 @@ struct mlx5_qp_table { }; struct mlx5_srq_table { + struct mlx5_nb catas_err_nb; + struct mlx5_nb rq_limit_nb; /* protect radix tree */ spinlock_t lock; @@ -978,8 +982,6 @@ void mlx5_unregister_debugfs(void); void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas); void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); -void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type); -void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type); struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, unsigned int *irqn); -- cgit v1.2.3 From 2742bc90bc102644d67e17758e234646d91ae696 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 20 Nov 2018 14:12:26 -0800 Subject: net/mlx5: CQ ERR, Use async events chain Remove the explicit call to mlx5_eq_cq_event on MLX5_EVENT_TYPE_CQ_ERROR and register a specific CQ ERROR handler via the new API. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 66 ++++++++++++++++++---------- 1 file changed, 44 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 0cf448575ebd..4e3febbf639d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -74,6 +74,9 @@ struct mlx5_eq_table { struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX]; + /* Since CQ DB is stored in async_eq */ + struct mlx5_nb cq_err_nb; + struct mutex lock; /* sync async eqs creations */ int num_comp_vectors; struct mlx5_irq_info *irq_info; @@ -235,20 +238,6 @@ static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) return cq; } -static void mlx5_eq_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type) -{ - struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn); - - if (unlikely(!cq)) { - mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn); - return; - } - - cq->event(cq, event_type); - - mlx5_cq_put(cq); -} - static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr) { struct mlx5_eq_comp *eq_comp = eq_ptr; @@ -323,7 +312,6 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) struct mlx5_core_dev *dev; struct mlx5_eqe *eqe; int set_ci = 0; - u32 cqn = -1; u8 port; dev = eq->dev; @@ -358,12 +346,6 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) port, eqe->sub_type); } break; - case MLX5_EVENT_TYPE_CQ_ERROR: - cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; - mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n", - cqn, eqe->data.cq_err.syndrome); - mlx5_eq_cq_event(eq, cqn, eqe->type); - break; case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: mlx5_port_module_event(dev, eqe); @@ -639,6 +621,38 @@ static int destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) return err; } +static int cq_err_event_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_eq_table *eqt; + struct mlx5_core_cq *cq; + struct mlx5_eqe *eqe; + struct mlx5_eq *eq; + u32 cqn; + + /* type == MLX5_EVENT_TYPE_CQ_ERROR */ + + eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb); + eq = &eqt->async_eq; + eqe = data; + + cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; + mlx5_core_warn(eq->dev, "CQ error on CQN 0x%x, syndrome 0x%x\n", + cqn, eqe->data.cq_err.syndrome); + + cq = mlx5_eq_cq_get(eq, cqn); + if (unlikely(!cq)) { + mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn); + return NOTIFY_OK; + } + + cq->event(cq, type); + + mlx5_cq_put(cq); + + return NOTIFY_OK; +} + static u64 gather_async_events_mask(struct mlx5_core_dev *dev) { u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; @@ -679,6 +693,9 @@ static int create_async_eqs(struct mlx5_core_dev *dev) struct mlx5_eq_param param = {}; int err; + MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR); + mlx5_eq_notifier_register(dev, &table->cq_err_nb); + param = (struct mlx5_eq_param) { .index = MLX5_EQ_CMD_IDX, .mask = 1ull << MLX5_EVENT_TYPE_CMD, @@ -689,7 +706,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq, ¶m); if (err) { mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); - return err; + goto err0; } mlx5_cmd_use_events(dev); @@ -728,6 +745,8 @@ err2: err1: mlx5_cmd_use_polling(dev); destroy_async_eq(dev, &table->cmd_eq); +err0: + mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); return err; } @@ -745,12 +764,15 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) if (err) mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n", err); + mlx5_cmd_use_polling(dev); err = destroy_async_eq(dev, &table->cmd_eq); if (err) mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n", err); + + mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); } struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev) -- cgit v1.2.3 From 69c1280b1f3b9123bc5154b2062507abcc14c3ef Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 20 Nov 2018 14:12:27 -0800 Subject: net/mlx5: Device events, Use async events chain Move all the generic async events handling into new specific events handling file events.c to keep eq.c file clean from concrete event logic handling. Use new API to register for NOTIFY_ANY to handle generic events and dispatch allowed events to mlx5_core consumers (mlx5_ib and mlx5e) Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 9 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 157 ------------ drivers/net/ethernet/mellanox/mlx5/core/events.c | 283 +++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h | 34 +++ drivers/net/ethernet/mellanox/mlx5/core/main.c | 16 +- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 6 +- drivers/net/ethernet/mellanox/mlx5/core/port.c | 57 ----- include/linux/mlx5/driver.h | 29 +-- include/linux/mlx5/port.h | 3 - 10 files changed, 344 insertions(+), 252 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/events.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index d324a3884462..26afe0779a0c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -14,7 +14,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ - fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o \ + fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ diag/fs_tracepoint.o diag/fw_tracer.o # diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 1e55b9c27ffc..748d23806391 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#include "lib/mlx5.h" #include "en.h" #include "en_accel/ipsec.h" #include "en_accel/tls.h" @@ -1120,15 +1121,17 @@ static int mlx5e_grp_pme_fill_strings(struct mlx5e_priv *priv, u8 *data, static int mlx5e_grp_pme_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) { - struct mlx5_priv *mlx5_priv = &priv->mdev->priv; + struct mlx5_pme_stats pme_stats; int i; + mlx5_get_pme_stats(priv->mdev, &pme_stats); + for (i = 0; i < NUM_PME_STATUS_STATS; i++) - data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.status_counters, + data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.status_counters, mlx5e_pme_status_desc, i); for (i = 0; i < NUM_PME_ERR_STATS; i++) - data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters, + data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.error_counters, mlx5e_pme_error_desc, i); return idx; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 4e3febbf639d..4aa39a1fe23f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -108,121 +108,6 @@ static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -static const char *eqe_type_str(u8 type) -{ - switch (type) { - case MLX5_EVENT_TYPE_COMP: - return "MLX5_EVENT_TYPE_COMP"; - case MLX5_EVENT_TYPE_PATH_MIG: - return "MLX5_EVENT_TYPE_PATH_MIG"; - case MLX5_EVENT_TYPE_COMM_EST: - return "MLX5_EVENT_TYPE_COMM_EST"; - case MLX5_EVENT_TYPE_SQ_DRAINED: - return "MLX5_EVENT_TYPE_SQ_DRAINED"; - case MLX5_EVENT_TYPE_SRQ_LAST_WQE: - return "MLX5_EVENT_TYPE_SRQ_LAST_WQE"; - case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: - return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT"; - case MLX5_EVENT_TYPE_CQ_ERROR: - return "MLX5_EVENT_TYPE_CQ_ERROR"; - case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: - return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR"; - case MLX5_EVENT_TYPE_PATH_MIG_FAILED: - return "MLX5_EVENT_TYPE_PATH_MIG_FAILED"; - case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: - return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR"; - case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: - return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR"; - case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: - return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR"; - case MLX5_EVENT_TYPE_INTERNAL_ERROR: - return "MLX5_EVENT_TYPE_INTERNAL_ERROR"; - case MLX5_EVENT_TYPE_PORT_CHANGE: - return "MLX5_EVENT_TYPE_PORT_CHANGE"; - case MLX5_EVENT_TYPE_GPIO_EVENT: - return "MLX5_EVENT_TYPE_GPIO_EVENT"; - case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: - return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT"; - case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: - return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT"; - case MLX5_EVENT_TYPE_REMOTE_CONFIG: - return "MLX5_EVENT_TYPE_REMOTE_CONFIG"; - case MLX5_EVENT_TYPE_DB_BF_CONGESTION: - return "MLX5_EVENT_TYPE_DB_BF_CONGESTION"; - case MLX5_EVENT_TYPE_STALL_EVENT: - return "MLX5_EVENT_TYPE_STALL_EVENT"; - case MLX5_EVENT_TYPE_CMD: - return "MLX5_EVENT_TYPE_CMD"; - case MLX5_EVENT_TYPE_PAGE_REQUEST: - return "MLX5_EVENT_TYPE_PAGE_REQUEST"; - case MLX5_EVENT_TYPE_PAGE_FAULT: - return "MLX5_EVENT_TYPE_PAGE_FAULT"; - case MLX5_EVENT_TYPE_PPS_EVENT: - return "MLX5_EVENT_TYPE_PPS_EVENT"; - case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: - return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE"; - case MLX5_EVENT_TYPE_FPGA_ERROR: - return "MLX5_EVENT_TYPE_FPGA_ERROR"; - case MLX5_EVENT_TYPE_FPGA_QP_ERROR: - return "MLX5_EVENT_TYPE_FPGA_QP_ERROR"; - case MLX5_EVENT_TYPE_GENERAL_EVENT: - return "MLX5_EVENT_TYPE_GENERAL_EVENT"; - case MLX5_EVENT_TYPE_DEVICE_TRACER: - return "MLX5_EVENT_TYPE_DEVICE_TRACER"; - default: - return "Unrecognized event"; - } -} - -static enum mlx5_dev_event port_subtype_event(u8 subtype) -{ - switch (subtype) { - case MLX5_PORT_CHANGE_SUBTYPE_DOWN: - return MLX5_DEV_EVENT_PORT_DOWN; - case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: - return MLX5_DEV_EVENT_PORT_UP; - case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: - return MLX5_DEV_EVENT_PORT_INITIALIZED; - case MLX5_PORT_CHANGE_SUBTYPE_LID: - return MLX5_DEV_EVENT_LID_CHANGE; - case MLX5_PORT_CHANGE_SUBTYPE_PKEY: - return MLX5_DEV_EVENT_PKEY_CHANGE; - case MLX5_PORT_CHANGE_SUBTYPE_GUID: - return MLX5_DEV_EVENT_GUID_CHANGE; - case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: - return MLX5_DEV_EVENT_CLIENT_REREG; - } - return -1; -} - -static void general_event_handler(struct mlx5_core_dev *dev, - struct mlx5_eqe *eqe) -{ - switch (eqe->sub_type) { - case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT: - if (dev->event) - dev->event(dev, MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT, 0); - break; - default: - mlx5_core_dbg(dev, "General event with unrecognized subtype: sub_type %d\n", - eqe->sub_type); - } -} - -static void mlx5_temp_warning_event(struct mlx5_core_dev *dev, - struct mlx5_eqe *eqe) -{ - u64 value_lsb; - u64 value_msb; - - value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb); - value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb); - - mlx5_core_warn(dev, - "High temperature on sensors with bit set %llx %llx", - value_msb, value_lsb); -} - /* caller must eventually call mlx5_cq_put on the returned cq */ static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) { @@ -312,7 +197,6 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) struct mlx5_core_dev *dev; struct mlx5_eqe *eqe; int set_ci = 0; - u8 port; dev = eq->dev; eqt = dev->priv.eq_table; @@ -324,47 +208,6 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) */ dma_rmb(); - mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n", - eq->eqn, eqe_type_str(eqe->type)); - switch (eqe->type) { - case MLX5_EVENT_TYPE_PORT_CHANGE: - port = (eqe->data.port.port >> 4) & 0xf; - switch (eqe->sub_type) { - case MLX5_PORT_CHANGE_SUBTYPE_DOWN: - case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: - case MLX5_PORT_CHANGE_SUBTYPE_LID: - case MLX5_PORT_CHANGE_SUBTYPE_PKEY: - case MLX5_PORT_CHANGE_SUBTYPE_GUID: - case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: - case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: - if (dev->event) - dev->event(dev, port_subtype_event(eqe->sub_type), - (unsigned long)port); - break; - default: - mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n", - port, eqe->sub_type); - } - break; - - case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: - mlx5_port_module_event(dev, eqe); - break; - - case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: - mlx5_temp_warning_event(dev, eqe); - break; - - case MLX5_EVENT_TYPE_GENERAL_EVENT: - general_event_handler(dev, eqe); - break; - - default: - mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n", - eqe->type, eq->eqn); - break; - } - if (likely(eqe->type < MLX5_EVENT_TYPE_MAX)) atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe); else diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c new file mode 100644 index 000000000000..d3ab86bd394b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -0,0 +1,283 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2018 Mellanox Technologies + +#include +#include "mlx5_core.h" +#include "lib/eq.h" +#include "lib/mlx5.h" + +struct mlx5_events { + struct mlx5_nb nb; + struct mlx5_core_dev *dev; + + /* port module evetns stats */ + struct mlx5_pme_stats pme_stats; +}; + +static const char *eqe_type_str(u8 type) +{ + switch (type) { + case MLX5_EVENT_TYPE_COMP: + return "MLX5_EVENT_TYPE_COMP"; + case MLX5_EVENT_TYPE_PATH_MIG: + return "MLX5_EVENT_TYPE_PATH_MIG"; + case MLX5_EVENT_TYPE_COMM_EST: + return "MLX5_EVENT_TYPE_COMM_EST"; + case MLX5_EVENT_TYPE_SQ_DRAINED: + return "MLX5_EVENT_TYPE_SQ_DRAINED"; + case MLX5_EVENT_TYPE_SRQ_LAST_WQE: + return "MLX5_EVENT_TYPE_SRQ_LAST_WQE"; + case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: + return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT"; + case MLX5_EVENT_TYPE_CQ_ERROR: + return "MLX5_EVENT_TYPE_CQ_ERROR"; + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR"; + case MLX5_EVENT_TYPE_PATH_MIG_FAILED: + return "MLX5_EVENT_TYPE_PATH_MIG_FAILED"; + case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR"; + case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: + return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR"; + case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: + return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR"; + case MLX5_EVENT_TYPE_INTERNAL_ERROR: + return "MLX5_EVENT_TYPE_INTERNAL_ERROR"; + case MLX5_EVENT_TYPE_PORT_CHANGE: + return "MLX5_EVENT_TYPE_PORT_CHANGE"; + case MLX5_EVENT_TYPE_GPIO_EVENT: + return "MLX5_EVENT_TYPE_GPIO_EVENT"; + case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: + return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT"; + case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: + return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT"; + case MLX5_EVENT_TYPE_REMOTE_CONFIG: + return "MLX5_EVENT_TYPE_REMOTE_CONFIG"; + case MLX5_EVENT_TYPE_DB_BF_CONGESTION: + return "MLX5_EVENT_TYPE_DB_BF_CONGESTION"; + case MLX5_EVENT_TYPE_STALL_EVENT: + return "MLX5_EVENT_TYPE_STALL_EVENT"; + case MLX5_EVENT_TYPE_CMD: + return "MLX5_EVENT_TYPE_CMD"; + case MLX5_EVENT_TYPE_PAGE_REQUEST: + return "MLX5_EVENT_TYPE_PAGE_REQUEST"; + case MLX5_EVENT_TYPE_PAGE_FAULT: + return "MLX5_EVENT_TYPE_PAGE_FAULT"; + case MLX5_EVENT_TYPE_PPS_EVENT: + return "MLX5_EVENT_TYPE_PPS_EVENT"; + case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: + return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE"; + case MLX5_EVENT_TYPE_FPGA_ERROR: + return "MLX5_EVENT_TYPE_FPGA_ERROR"; + case MLX5_EVENT_TYPE_FPGA_QP_ERROR: + return "MLX5_EVENT_TYPE_FPGA_QP_ERROR"; + case MLX5_EVENT_TYPE_GENERAL_EVENT: + return "MLX5_EVENT_TYPE_GENERAL_EVENT"; + case MLX5_EVENT_TYPE_DEVICE_TRACER: + return "MLX5_EVENT_TYPE_DEVICE_TRACER"; + default: + return "Unrecognized event"; + } +} + +static enum mlx5_dev_event port_subtype2dev(u8 subtype) +{ + switch (subtype) { + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + return MLX5_DEV_EVENT_PORT_DOWN; + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: + return MLX5_DEV_EVENT_PORT_UP; + case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: + return MLX5_DEV_EVENT_PORT_INITIALIZED; + case MLX5_PORT_CHANGE_SUBTYPE_LID: + return MLX5_DEV_EVENT_LID_CHANGE; + case MLX5_PORT_CHANGE_SUBTYPE_PKEY: + return MLX5_DEV_EVENT_PKEY_CHANGE; + case MLX5_PORT_CHANGE_SUBTYPE_GUID: + return MLX5_DEV_EVENT_GUID_CHANGE; + case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: + return MLX5_DEV_EVENT_CLIENT_REREG; + } + return -1; +} + +static void temp_warning_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) +{ + u64 value_lsb; + u64 value_msb; + + value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb); + value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb); + + mlx5_core_warn(dev, + "High temperature on sensors with bit set %llx %llx", + value_msb, value_lsb); +} + +static const char *mlx5_pme_status[MLX5_MODULE_STATUS_NUM] = { + "Cable plugged", /* MLX5_MODULE_STATUS_PLUGGED = 0x1 */ + "Cable unplugged", /* MLX5_MODULE_STATUS_UNPLUGGED = 0x2 */ + "Cable error", /* MLX5_MODULE_STATUS_ERROR = 0x3 */ +}; + +static const char *mlx5_pme_error[MLX5_MODULE_EVENT_ERROR_NUM] = { + "Power budget exceeded", + "Long Range for non MLNX cable", + "Bus stuck(I2C or data shorted)", + "No EEPROM/retry timeout", + "Enforce part number list", + "Unknown identifier", + "High Temperature", + "Bad or shorted cable/module", + "Unknown status", +}; + +static void port_module_event(struct mlx5_events *events, struct mlx5_eqe *eqe) +{ + enum port_module_event_status_type module_status; + enum port_module_event_error_type error_type; + struct mlx5_eqe_port_module *module_event_eqe; + struct mlx5_core_dev *dev = events->dev; + u8 module_num; + + module_event_eqe = &eqe->data.port_module; + module_num = module_event_eqe->module; + module_status = module_event_eqe->module_status & + PORT_MODULE_EVENT_MODULE_STATUS_MASK; + error_type = module_event_eqe->error_type & + PORT_MODULE_EVENT_ERROR_TYPE_MASK; + + if (module_status < MLX5_MODULE_STATUS_ERROR) { + events->pme_stats.status_counters[module_status - 1]++; + } else if (module_status == MLX5_MODULE_STATUS_ERROR) { + if (error_type >= MLX5_MODULE_EVENT_ERROR_UNKNOWN) + /* Unknown error type */ + error_type = MLX5_MODULE_EVENT_ERROR_UNKNOWN; + events->pme_stats.error_counters[error_type]++; + } + + if (!printk_ratelimit()) + return; + + if (module_status < MLX5_MODULE_STATUS_ERROR) + mlx5_core_info(dev, + "Port module event: module %u, %s\n", + module_num, mlx5_pme_status[module_status - 1]); + + else if (module_status == MLX5_MODULE_STATUS_ERROR) + mlx5_core_info(dev, + "Port module event[error]: module %u, %s, %s\n", + module_num, mlx5_pme_status[module_status - 1], + mlx5_pme_error[error_type]); +} + +void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats) +{ + *stats = dev->priv.events->pme_stats; +} + +/* Event handler for the low level mlx5_core driver. + * This handler will process/filter _some_ events and sometimes dispatch + * the equivalent mlx5_dev_event to the HCA interfaces (mlx5_ib and mlx5e) + * + * Other Major feature specific events such as + * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with + * separate notifiers callbacks, specifically by those mlx5 components. + */ +static int events_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + bool dev_event_dispatch = false; + enum mlx5_dev_event dev_event; + unsigned long dev_event_data; + + struct mlx5_eqe *eqe = data; + struct mlx5_events *events; + struct mlx5_core_dev *dev; + u8 port; + + events = mlx5_nb_cof(nb, struct mlx5_events, nb); + dev = events->dev; + + mlx5_core_dbg(dev, "Async eqe type %s, subtype (%d)\n", + eqe_type_str(eqe->type), eqe->sub_type); + switch (eqe->type) { + case MLX5_EVENT_TYPE_PORT_CHANGE: + port = (eqe->data.port.port >> 4) & 0xf; + switch (eqe->sub_type) { + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: + case MLX5_PORT_CHANGE_SUBTYPE_LID: + case MLX5_PORT_CHANGE_SUBTYPE_PKEY: + case MLX5_PORT_CHANGE_SUBTYPE_GUID: + case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: + case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: + dev_event = port_subtype2dev(eqe->sub_type); + dev_event_data = (unsigned long)port; + dev_event_dispatch = true; + break; + default: + mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n", + port, eqe->sub_type); + } + break; + case MLX5_EVENT_TYPE_GENERAL_EVENT: + switch (eqe->sub_type) { + case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT: + dev_event = MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT; + dev_event_data = 0; + dev_event_dispatch = true; + break; + default: + mlx5_core_dbg(dev, "General event with unrecognized subtype: sub_type %d\n", + eqe->sub_type); + } + break; + + case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: + port_module_event(events, eqe); + break; + case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: + temp_warning_event(dev, eqe); + break; + default: + return NOTIFY_DONE; + } + + if (dev->event && dev_event_dispatch) + dev->event(dev, dev_event, dev_event_data); + + return NOTIFY_OK; +} + +int mlx5_events_init(struct mlx5_core_dev *dev) +{ + struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL); + + if (!events) + return -ENOMEM; + + events->dev = dev; + dev->priv.events = events; + return 0; +} + +void mlx5_events_cleanup(struct mlx5_core_dev *dev) +{ + kvfree(dev->priv.events); +} + +void mlx5_events_start(struct mlx5_core_dev *dev) +{ + struct mlx5_events *events = dev->priv.events; + + MLX5_NB_INIT(&events->nb, events_notifier, NOTIFY_ANY); + mlx5_eq_notifier_register(dev, &events->nb); +} + +void mlx5_events_stop(struct mlx5_core_dev *dev) +{ + struct mlx5_events *events = dev->priv.events; + + mlx5_eq_notifier_unregister(dev, &events->nb); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h index 7550b1cc8c6a..23317e328b0b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -33,6 +33,8 @@ #ifndef __LIB_MLX5_H__ #define __LIB_MLX5_H__ +#include "mlx5_core.h" + void mlx5_init_reserved_gids(struct mlx5_core_dev *dev); void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev); int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count); @@ -40,4 +42,36 @@ void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count); int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index); void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index); +/* TODO move to lib/events.h */ + +#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF +#define PORT_MODULE_EVENT_ERROR_TYPE_MASK 0xF + +enum port_module_event_status_type { + MLX5_MODULE_STATUS_PLUGGED = 0x1, + MLX5_MODULE_STATUS_UNPLUGGED = 0x2, + MLX5_MODULE_STATUS_ERROR = 0x3, + MLX5_MODULE_STATUS_NUM = 0x3, +}; + +enum port_module_event_error_type { + MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED, + MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX_CABLE_MODULE, + MLX5_MODULE_EVENT_ERROR_BUS_STUCK, + MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT, + MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST, + MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER, + MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE, + MLX5_MODULE_EVENT_ERROR_BAD_CABLE, + MLX5_MODULE_EVENT_ERROR_UNKNOWN, + MLX5_MODULE_EVENT_ERROR_NUM, +}; + +struct mlx5_pme_stats { + u64 status_counters[MLX5_MODULE_STATUS_NUM]; + u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM]; +}; + +void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats); + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 9e4cd2757ea8..e56278ead4eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -735,10 +735,16 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto out; } + err = mlx5_events_init(dev); + if (err) { + dev_err(&pdev->dev, "failed to initialize events\n"); + goto err_eq_cleanup; + } + err = mlx5_cq_debugfs_init(dev); if (err) { dev_err(&pdev->dev, "failed to initialize cq debugfs\n"); - goto err_eq_cleanup; + goto err_events_cleanup; } mlx5_init_qp_table(dev); @@ -801,7 +807,8 @@ err_tables_cleanup: mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cq_debugfs_cleanup(dev); - +err_events_cleanup: + mlx5_events_cleanup(dev); err_eq_cleanup: mlx5_eq_table_cleanup(dev); @@ -824,6 +831,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cq_debugfs_cleanup(dev); + mlx5_events_cleanup(dev); mlx5_eq_table_cleanup(dev); } @@ -947,6 +955,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_get_uars; } + mlx5_events_start(dev); mlx5_pagealloc_start(dev); err = mlx5_eq_table_create(dev); @@ -1036,6 +1045,7 @@ err_fw_tracer: err_eq_table: mlx5_pagealloc_stop(dev); + mlx5_events_stop(dev); mlx5_put_uars_page(dev, priv->uar); err_get_uars: @@ -1095,8 +1105,8 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_fw_tracer_cleanup(dev->tracer); mlx5_eq_table_destroy(dev); mlx5_pagealloc_stop(dev); + mlx5_events_stop(dev); mlx5_put_uars_page(dev, priv->uar); - if (cleanup) mlx5_cleanup_once(dev); mlx5_stop_health_poll(dev, cleanup); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 5dd453e47a04..c70bd94e18d6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -105,7 +105,6 @@ int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev); void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); -void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force); void mlx5_disable_device(struct mlx5_core_dev *dev); void mlx5_recover_device(struct mlx5_core_dev *dev); @@ -141,6 +140,11 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam, void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev); void mlx5_lag_remove(struct mlx5_core_dev *dev); +int mlx5_events_init(struct mlx5_core_dev *dev); +void mlx5_events_cleanup(struct mlx5_core_dev *dev); +void mlx5_events_start(struct mlx5_core_dev *dev); +void mlx5_events_stop(struct mlx5_core_dev *dev); + void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv); void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv); void mlx5_attach_device(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 31a9cbd85689..2b82f35f4c35 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -915,63 +915,6 @@ void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported, *enabled = !!(MLX5_GET(pcmr_reg, out, fcs_chk)); } -static const char *mlx5_pme_status[MLX5_MODULE_STATUS_NUM] = { - "Cable plugged", /* MLX5_MODULE_STATUS_PLUGGED = 0x1 */ - "Cable unplugged", /* MLX5_MODULE_STATUS_UNPLUGGED = 0x2 */ - "Cable error", /* MLX5_MODULE_STATUS_ERROR = 0x3 */ -}; - -static const char *mlx5_pme_error[MLX5_MODULE_EVENT_ERROR_NUM] = { - "Power budget exceeded", - "Long Range for non MLNX cable", - "Bus stuck(I2C or data shorted)", - "No EEPROM/retry timeout", - "Enforce part number list", - "Unknown identifier", - "High Temperature", - "Bad or shorted cable/module", - "Unknown status", -}; - -void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) -{ - enum port_module_event_status_type module_status; - enum port_module_event_error_type error_type; - struct mlx5_eqe_port_module *module_event_eqe; - struct mlx5_priv *priv = &dev->priv; - u8 module_num; - - module_event_eqe = &eqe->data.port_module; - module_num = module_event_eqe->module; - module_status = module_event_eqe->module_status & - PORT_MODULE_EVENT_MODULE_STATUS_MASK; - error_type = module_event_eqe->error_type & - PORT_MODULE_EVENT_ERROR_TYPE_MASK; - - if (module_status < MLX5_MODULE_STATUS_ERROR) { - priv->pme_stats.status_counters[module_status - 1]++; - } else if (module_status == MLX5_MODULE_STATUS_ERROR) { - if (error_type >= MLX5_MODULE_EVENT_ERROR_UNKNOWN) - /* Unknown error type */ - error_type = MLX5_MODULE_EVENT_ERROR_UNKNOWN; - priv->pme_stats.error_counters[error_type]++; - } - - if (!printk_ratelimit()) - return; - - if (module_status < MLX5_MODULE_STATUS_ERROR) - mlx5_core_info(dev, - "Port module event: module %u, %s\n", - module_num, mlx5_pme_status[module_status - 1]); - - else if (module_status == MLX5_MODULE_STATUS_ERROR) - mlx5_core_info(dev, - "Port module event[error]: module %u, %s, %s\n", - module_num, mlx5_pme_status[module_status - 1], - mlx5_pme_error[error_type]); -} - int mlx5_query_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size) { u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index afba0864f45c..ba64ecf72478 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -514,6 +514,7 @@ struct mlx5_fc_stats { unsigned long sampling_interval; /* jiffies */ }; +struct mlx5_events; struct mlx5_mpfs; struct mlx5_eswitch; struct mlx5_lag; @@ -540,31 +541,6 @@ struct mlx5_rl_table { struct mlx5_rl_entry *rl_entry; }; -enum port_module_event_status_type { - MLX5_MODULE_STATUS_PLUGGED = 0x1, - MLX5_MODULE_STATUS_UNPLUGGED = 0x2, - MLX5_MODULE_STATUS_ERROR = 0x3, - MLX5_MODULE_STATUS_NUM = 0x3, -}; - -enum port_module_event_error_type { - MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED, - MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX_CABLE_MODULE, - MLX5_MODULE_EVENT_ERROR_BUS_STUCK, - MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT, - MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST, - MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER, - MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE, - MLX5_MODULE_EVENT_ERROR_BAD_CABLE, - MLX5_MODULE_EVENT_ERROR_UNKNOWN, - MLX5_MODULE_EVENT_ERROR_NUM, -}; - -struct mlx5_port_module_event_stats { - u64 status_counters[MLX5_MODULE_STATUS_NUM]; - u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM]; -}; - struct mlx5_priv { char name[MLX5_MAX_NAME_LEN]; struct mlx5_eq_table *eq_table; @@ -614,6 +590,7 @@ struct mlx5_priv { struct list_head waiting_events_list; bool is_accum_events; + struct mlx5_events *events; struct mlx5_flow_steering *steering; struct mlx5_mpfs *mpfs; @@ -624,8 +601,6 @@ struct mlx5_priv { struct mlx5_fc_stats fc_stats; struct mlx5_rl_table rl_table; - struct mlx5_port_module_event_stats pme_stats; - struct mlx5_bfreg_data bfregs; struct mlx5_uars_page *uar; }; diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 34aed6032f86..bf4bc01ffb0c 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -107,9 +107,6 @@ enum mlx5e_connector_type { #define MLX5E_PROT_MASK(link_mode) (1 << link_mode) -#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF -#define PORT_MODULE_EVENT_ERROR_TYPE_MASK 0xF - int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, int ptys_size, int proto_mask, u8 local_port); -- cgit v1.2.3 From 2c89156082b3be7a064b09dfb0bcd274609f355d Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 20 Nov 2018 14:12:28 -0800 Subject: net/mlx5: Improve core device events handling Register a separate handler per event type, rather than listening for all events and looking for the events to handle in a switch case. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/events.c | 223 ++++++++++++++--------- 1 file changed, 136 insertions(+), 87 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index d3ab86bd394b..3ad004af37d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -2,15 +2,41 @@ // Copyright (c) 2018 Mellanox Technologies #include + #include "mlx5_core.h" #include "lib/eq.h" #include "lib/mlx5.h" +struct mlx5_event_nb { + struct mlx5_nb nb; + void *ctx; +}; + +/* General events handlers for the low level mlx5_core driver + * + * Other Major feature specific events such as + * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with + * separate notifiers callbacks, specifically by those mlx5 components. + */ +static int any_notifier(struct notifier_block *, unsigned long, void *); +static int port_change(struct notifier_block *, unsigned long, void *); +static int general_event(struct notifier_block *, unsigned long, void *); +static int temp_warn(struct notifier_block *, unsigned long, void *); +static int port_module(struct notifier_block *, unsigned long, void *); + +static struct mlx5_nb events_nbs_ref[] = { + {.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY }, + {.nb.notifier_call = port_change, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE }, + {.nb.notifier_call = general_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT }, + {.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT }, + {.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT }, +}; + struct mlx5_events { - struct mlx5_nb nb; struct mlx5_core_dev *dev; + struct mlx5_event_nb notifiers[ARRAY_SIZE(events_nbs_ref)]; - /* port module evetns stats */ + /* port module events stats */ struct mlx5_pme_stats pme_stats; }; @@ -80,6 +106,19 @@ static const char *eqe_type_str(u8 type) } } +/* handles all FW events, type == eqe->type */ +static int any_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); + struct mlx5_events *events = event_nb->ctx; + struct mlx5_eqe *eqe = data; + + mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n", + eqe_type_str(eqe->type), eqe->sub_type); + return NOTIFY_OK; +} + static enum mlx5_dev_event port_subtype2dev(u8 subtype) { switch (subtype) { @@ -101,19 +140,92 @@ static enum mlx5_dev_event port_subtype2dev(u8 subtype) return -1; } -static void temp_warning_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) +/* type == MLX5_EVENT_TYPE_PORT_CHANGE */ +static int port_change(struct notifier_block *nb, + unsigned long type, void *data) { + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); + struct mlx5_events *events = event_nb->ctx; + struct mlx5_core_dev *dev = events->dev; + + bool dev_event_dispatch = false; + enum mlx5_dev_event dev_event; + unsigned long dev_event_data; + struct mlx5_eqe *eqe = data; + u8 port = (eqe->data.port.port >> 4) & 0xf; + + switch (eqe->sub_type) { + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: + case MLX5_PORT_CHANGE_SUBTYPE_LID: + case MLX5_PORT_CHANGE_SUBTYPE_PKEY: + case MLX5_PORT_CHANGE_SUBTYPE_GUID: + case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: + case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: + dev_event = port_subtype2dev(eqe->sub_type); + dev_event_data = (unsigned long)port; + dev_event_dispatch = true; + break; + default: + mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n", + port, eqe->sub_type); + } + + if (dev->event && dev_event_dispatch) + dev->event(dev, dev_event, dev_event_data); + + return NOTIFY_OK; +} + +/* type == MLX5_EVENT_TYPE_GENERAL_EVENT */ +static int general_event(struct notifier_block *nb, unsigned long type, void *data) +{ + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); + struct mlx5_events *events = event_nb->ctx; + struct mlx5_core_dev *dev = events->dev; + + bool dev_event_dispatch = false; + enum mlx5_dev_event dev_event; + unsigned long dev_event_data; + struct mlx5_eqe *eqe = data; + + switch (eqe->sub_type) { + case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT: + dev_event = MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT; + dev_event_data = 0; + dev_event_dispatch = true; + break; + default: + mlx5_core_dbg(dev, "General event with unrecognized subtype: sub_type %d\n", + eqe->sub_type); + } + + if (dev->event && dev_event_dispatch) + dev->event(dev, dev_event, dev_event_data); + + return NOTIFY_OK; +} + +/* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */ +static int temp_warn(struct notifier_block *nb, unsigned long type, void *data) +{ + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); + struct mlx5_events *events = event_nb->ctx; + struct mlx5_eqe *eqe = data; u64 value_lsb; u64 value_msb; value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb); value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb); - mlx5_core_warn(dev, + mlx5_core_warn(events->dev, "High temperature on sensors with bit set %llx %llx", value_msb, value_lsb); + + return NOTIFY_OK; } +/* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */ static const char *mlx5_pme_status[MLX5_MODULE_STATUS_NUM] = { "Cable plugged", /* MLX5_MODULE_STATUS_PLUGGED = 0x1 */ "Cable unplugged", /* MLX5_MODULE_STATUS_UNPLUGGED = 0x2 */ @@ -132,12 +244,16 @@ static const char *mlx5_pme_error[MLX5_MODULE_EVENT_ERROR_NUM] = { "Unknown status", }; -static void port_module_event(struct mlx5_events *events, struct mlx5_eqe *eqe) +/* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */ +static int port_module(struct notifier_block *nb, unsigned long type, void *data) { + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); + struct mlx5_events *events = event_nb->ctx; + struct mlx5_eqe *eqe = data; + enum port_module_event_status_type module_status; enum port_module_event_error_type error_type; struct mlx5_eqe_port_module *module_event_eqe; - struct mlx5_core_dev *dev = events->dev; u8 module_num; module_event_eqe = &eqe->data.port_module; @@ -146,7 +262,6 @@ static void port_module_event(struct mlx5_events *events, struct mlx5_eqe *eqe) PORT_MODULE_EVENT_MODULE_STATUS_MASK; error_type = module_event_eqe->error_type & PORT_MODULE_EVENT_ERROR_TYPE_MASK; - if (module_status < MLX5_MODULE_STATUS_ERROR) { events->pme_stats.status_counters[module_status - 1]++; } else if (module_status == MLX5_MODULE_STATUS_ERROR) { @@ -157,18 +272,20 @@ static void port_module_event(struct mlx5_events *events, struct mlx5_eqe *eqe) } if (!printk_ratelimit()) - return; + return NOTIFY_OK; if (module_status < MLX5_MODULE_STATUS_ERROR) - mlx5_core_info(dev, + mlx5_core_info(events->dev, "Port module event: module %u, %s\n", module_num, mlx5_pme_status[module_status - 1]); else if (module_status == MLX5_MODULE_STATUS_ERROR) - mlx5_core_info(dev, + mlx5_core_info(events->dev, "Port module event[error]: module %u, %s, %s\n", module_num, mlx5_pme_status[module_status - 1], mlx5_pme_error[error_type]); + + return NOTIFY_OK; } void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats) @@ -176,80 +293,6 @@ void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats) *stats = dev->priv.events->pme_stats; } -/* Event handler for the low level mlx5_core driver. - * This handler will process/filter _some_ events and sometimes dispatch - * the equivalent mlx5_dev_event to the HCA interfaces (mlx5_ib and mlx5e) - * - * Other Major feature specific events such as - * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with - * separate notifiers callbacks, specifically by those mlx5 components. - */ -static int events_notifier(struct notifier_block *nb, - unsigned long type, void *data) -{ - bool dev_event_dispatch = false; - enum mlx5_dev_event dev_event; - unsigned long dev_event_data; - - struct mlx5_eqe *eqe = data; - struct mlx5_events *events; - struct mlx5_core_dev *dev; - u8 port; - - events = mlx5_nb_cof(nb, struct mlx5_events, nb); - dev = events->dev; - - mlx5_core_dbg(dev, "Async eqe type %s, subtype (%d)\n", - eqe_type_str(eqe->type), eqe->sub_type); - switch (eqe->type) { - case MLX5_EVENT_TYPE_PORT_CHANGE: - port = (eqe->data.port.port >> 4) & 0xf; - switch (eqe->sub_type) { - case MLX5_PORT_CHANGE_SUBTYPE_DOWN: - case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: - case MLX5_PORT_CHANGE_SUBTYPE_LID: - case MLX5_PORT_CHANGE_SUBTYPE_PKEY: - case MLX5_PORT_CHANGE_SUBTYPE_GUID: - case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: - case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: - dev_event = port_subtype2dev(eqe->sub_type); - dev_event_data = (unsigned long)port; - dev_event_dispatch = true; - break; - default: - mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n", - port, eqe->sub_type); - } - break; - case MLX5_EVENT_TYPE_GENERAL_EVENT: - switch (eqe->sub_type) { - case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT: - dev_event = MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT; - dev_event_data = 0; - dev_event_dispatch = true; - break; - default: - mlx5_core_dbg(dev, "General event with unrecognized subtype: sub_type %d\n", - eqe->sub_type); - } - break; - - case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: - port_module_event(events, eqe); - break; - case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: - temp_warning_event(dev, eqe); - break; - default: - return NOTIFY_DONE; - } - - if (dev->event && dev_event_dispatch) - dev->event(dev, dev_event, dev_event_data); - - return NOTIFY_OK; -} - int mlx5_events_init(struct mlx5_core_dev *dev) { struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL); @@ -270,14 +313,20 @@ void mlx5_events_cleanup(struct mlx5_core_dev *dev) void mlx5_events_start(struct mlx5_core_dev *dev) { struct mlx5_events *events = dev->priv.events; + int i; - MLX5_NB_INIT(&events->nb, events_notifier, NOTIFY_ANY); - mlx5_eq_notifier_register(dev, &events->nb); + for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) { + events->notifiers[i].nb = events_nbs_ref[i]; + events->notifiers[i].ctx = events; + mlx5_eq_notifier_register(dev, &events->notifiers[i].nb); + } } void mlx5_events_stop(struct mlx5_core_dev *dev) { struct mlx5_events *events = dev->priv.events; + int i; - mlx5_eq_notifier_unregister(dev, &events->nb); + for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--) + mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb); } -- cgit v1.2.3 From 20902be46c4da59b1891d238801146134e0e06b5 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 26 Nov 2018 14:38:56 -0800 Subject: net/mlx5: Driver events notifier API Use atomic notifier chain to fire events to mlx5 core driver consumers (mlx5e/mlx5_ib) and provide mlx5 register/unregister notifier API. This API will replace the current mlx5_interface->event callback and all the logic around it, especially the delayed events logic introduced by commit 97834eba7c19 ("net/mlx5: Delay events till ib registration ends") Which is not needed anymore with this new API where the mlx5 interface can dynamically register/unregister its notifier. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/events.c | 25 +++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h | 1 + include/linux/mlx5/driver.h | 4 ++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index 3ad004af37d7..560cc14c55f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -35,7 +35,8 @@ static struct mlx5_nb events_nbs_ref[] = { struct mlx5_events { struct mlx5_core_dev *dev; struct mlx5_event_nb notifiers[ARRAY_SIZE(events_nbs_ref)]; - + /* driver notifier chain */ + struct atomic_notifier_head nh; /* port module events stats */ struct mlx5_pme_stats pme_stats; }; @@ -300,6 +301,7 @@ int mlx5_events_init(struct mlx5_core_dev *dev) if (!events) return -ENOMEM; + ATOMIC_INIT_NOTIFIER_HEAD(&events->nh); events->dev = dev; dev->priv.events = events; return 0; @@ -330,3 +332,24 @@ void mlx5_events_stop(struct mlx5_core_dev *dev) for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--) mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb); } + +int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb) +{ + struct mlx5_events *events = dev->priv.events; + + return atomic_notifier_chain_register(&events->nh, nb); +} +EXPORT_SYMBOL(mlx5_notifier_register); + +int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb) +{ + struct mlx5_events *events = dev->priv.events; + + return atomic_notifier_chain_unregister(&events->nh, nb); +} +EXPORT_SYMBOL(mlx5_notifier_unregister); + +int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data) +{ + return atomic_notifier_call_chain(&events->nh, event, data); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h index 23317e328b0b..4d78a459676e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -73,5 +73,6 @@ struct mlx5_pme_stats { }; void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats); +int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data); #endif diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ba64ecf72478..b96929d0cc9c 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -1062,6 +1063,9 @@ struct mlx5_interface { void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol); int mlx5_register_interface(struct mlx5_interface *intf); void mlx5_unregister_interface(struct mlx5_interface *intf); +int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb); +int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb); + int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id); int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev); -- cgit v1.2.3 From 7a1795553074f90d59b3250b4a9b8aaf6ca2831e Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 26 Nov 2018 14:38:57 -0800 Subject: net/mlx5: Allow port change event to be forwarded to driver notifiers chain The idea is to allow mlx5 core interfaces (mlx5e/mlx5_ib) to be able to receive some allowed FW events as is via the new notifier API. In this patch we allow forwarding port change event to mlx5 core interfaces (mlx5e/mlx5_ib) as it was received from FW. Once mlx5e and mlx5_ib start using this event we can safely remove the redundant software version of it and its translation logic. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/events.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index 560cc14c55f7..adab66eb726c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -24,12 +24,18 @@ static int general_event(struct notifier_block *, unsigned long, void *); static int temp_warn(struct notifier_block *, unsigned long, void *); static int port_module(struct notifier_block *, unsigned long, void *); +/* handler which forwards the event to events->nh, driver notifiers */ +static int forward_event(struct notifier_block *, unsigned long, void *); + static struct mlx5_nb events_nbs_ref[] = { {.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY }, {.nb.notifier_call = port_change, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE }, {.nb.notifier_call = general_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT }, {.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT }, {.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT }, + + /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */ + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE }, }; struct mlx5_events { @@ -294,6 +300,16 @@ void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats) *stats = dev->priv.events->pme_stats; } +/* forward event as is to registered interfaces (mlx5e/mlx5_ib) */ +static int forward_event(struct notifier_block *nb, unsigned long event, void *data) +{ + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); + struct mlx5_events *events = event_nb->ctx; + + atomic_notifier_call_chain(&events->nh, event, data); + return NOTIFY_OK; +} + int mlx5_events_init(struct mlx5_core_dev *dev) { struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL); -- cgit v1.2.3 From 7cffaddd39b438fab49863ea4602d6294a9d657f Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 26 Nov 2018 14:38:58 -0800 Subject: net/mlx5e: Use the new mlx5 core notifier API Remove the deprecated mlx5_interface->event mlx5e callback and use new mlx5 notifier API to subscribe for mlx5 events, handle port change event as received from FW rather than handling the mlx5 core processed port change software version event. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 27 ++++++++++++----------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index aea74856c702..13d8a74d3db5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -631,7 +631,6 @@ struct mlx5e_channel_stats { } ____cacheline_aligned_in_smp; enum { - MLX5E_STATE_ASYNC_EVENTS_ENABLED, MLX5E_STATE_OPENED, MLX5E_STATE_DESTROYING, }; @@ -690,6 +689,8 @@ struct mlx5e_priv { struct hwtstamp_config tstamp; u16 q_counter; u16 drop_rq_q_counter; + struct notifier_block events_nb; + #ifdef CONFIG_MLX5_CORE_EN_DCB struct mlx5e_dcbx dcbx; #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 0d495a6b3949..56bc41b1c31f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -294,33 +294,35 @@ void mlx5e_queue_update_stats(struct mlx5e_priv *priv) queue_work(priv->wq, &priv->update_stats_work); } -static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, - enum mlx5_dev_event event, unsigned long param) +static int async_event(struct notifier_block *nb, unsigned long event, void *data) { - struct mlx5e_priv *priv = vpriv; + struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb); + struct mlx5_eqe *eqe = data; - if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state)) - return; + if (event != MLX5_EVENT_TYPE_PORT_CHANGE) + return NOTIFY_DONE; - switch (event) { - case MLX5_DEV_EVENT_PORT_UP: - case MLX5_DEV_EVENT_PORT_DOWN: + switch (eqe->sub_type) { + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: queue_work(priv->wq, &priv->update_carrier_work); break; default: - break; + return NOTIFY_DONE; } + + return NOTIFY_OK; } static void mlx5e_enable_async_events(struct mlx5e_priv *priv) { - set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state); + priv->events_nb.notifier_call = async_event; + mlx5_notifier_register(priv->mdev, &priv->events_nb); } static void mlx5e_disable_async_events(struct mlx5e_priv *priv) { - clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state); - mlx5_eq_synchronize_async_irq(priv->mdev); + mlx5_notifier_unregister(priv->mdev, &priv->events_nb); } static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, @@ -5170,7 +5172,6 @@ static struct mlx5_interface mlx5e_interface = { .remove = mlx5e_remove, .attach = mlx5e_attach, .detach = mlx5e_detach, - .event = mlx5e_async_event, .protocol = MLX5_INTERFACE_PROTOCOL_ETH, .get_dev = mlx5e_get_netdev, }; -- cgit v1.2.3 From 58d180b34e98698fec178a469b700f1bb5a32c1f Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 26 Nov 2018 14:38:59 -0800 Subject: net/mlx5: Forward all mlx5 events to mlx5 notifiers chain This to allow seamless migration to the new notifier chain API, and to eventually deprecate interfaces dev->event callback. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 3 +++ include/linux/mlx5/driver.h | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 7eedbea38a78..d63ba8813829 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -32,6 +32,7 @@ #include #include "mlx5_core.h" +#include "lib/mlx5.h" static LIST_HEAD(intf_list); static LIST_HEAD(mlx5_dev_list); @@ -425,6 +426,8 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, dev_ctx->intf->event(dev, dev_ctx->context, event, param); spin_unlock_irqrestore(&priv->ctx_lock, flags); + + mlx5_notifier_call_chain(dev->priv.events, event, (void *)param); } void mlx5_dev_list_lock(void) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index b96929d0cc9c..14ca74707275 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -195,7 +195,7 @@ struct mlx5_rsc_debug { }; enum mlx5_dev_event { - MLX5_DEV_EVENT_SYS_ERROR, + MLX5_DEV_EVENT_SYS_ERROR = 128, /* 0 - 127 are FW events */ MLX5_DEV_EVENT_PORT_UP, MLX5_DEV_EVENT_PORT_DOWN, MLX5_DEV_EVENT_PORT_INITIALIZED, -- cgit v1.2.3 From df097a278c7592873d88571a8c78f987f6ae511b Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 26 Nov 2018 14:39:00 -0800 Subject: IB/mlx5: Use the new mlx5 core notifier API Remove the deprecated mlx5_interface->event mlx5_ib callback and use new mlx5 notifier API to subscribe for mlx5 events. For native mlx5_ib devices profiles pf_profile/nic_rep_profile register the notifier callback mlx5_ib_handle_event which treats the notifier context as mlx5_ib_dev. For vport repesentors, don't register any notifier, same as before, they didn't receive any mlx5 events. For slave port (mlx5_ib_multiport_info) register a different notifier callback mlx5_ib_event_slave_port, which knows that the event is coming for mlx5_ib_multiport_info and prepares the event job accordingly. Before this on the event handler work we had to ask mlx5_core if this is a slave port mlx5_core_is_mp_slave(work->dev), now it is not needed anymore. mlx5_ib_multiport_info notifier registration is done on mlx5_ib_bind_slave_port and de-registration is done on mlx5_ib_unbind_slave_port. Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/main.c | 77 +++++++++++++++++++++++++++++------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 ++ 2 files changed, 66 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index fcf4a0328a90..549c766cf309 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -82,10 +82,13 @@ static char mlx5_version[] = struct mlx5_ib_event_work { struct work_struct work; - struct mlx5_core_dev *dev; - void *context; + union { + struct mlx5_ib_dev *dev; + struct mlx5_ib_multiport_info *mpi; + }; + bool is_slave; enum mlx5_dev_event event; - unsigned long param; + void *param; }; enum { @@ -4240,14 +4243,14 @@ static void mlx5_ib_handle_event(struct work_struct *_work) struct mlx5_ib_dev *ibdev; struct ib_event ibev; bool fatal = false; - u8 port = (u8)work->param; + u8 port = (u8)(unsigned long)work->param; - if (mlx5_core_is_mp_slave(work->dev)) { - ibdev = mlx5_ib_get_ibdev_from_mpi(work->context); + if (work->is_slave) { + ibdev = mlx5_ib_get_ibdev_from_mpi(work->mpi); if (!ibdev) goto out; } else { - ibdev = work->context; + ibdev = work->dev; } switch (work->event) { @@ -4256,7 +4259,6 @@ static void mlx5_ib_handle_event(struct work_struct *_work) mlx5_ib_handle_internal_error(ibdev); fatal = true; break; - case MLX5_DEV_EVENT_PORT_UP: case MLX5_DEV_EVENT_PORT_DOWN: case MLX5_DEV_EVENT_PORT_INITIALIZED: @@ -4311,22 +4313,43 @@ out: kfree(work); } -static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, - enum mlx5_dev_event event, unsigned long param) +static int mlx5_ib_event(struct notifier_block *nb, + unsigned long event, void *param) { struct mlx5_ib_event_work *work; work = kmalloc(sizeof(*work), GFP_ATOMIC); if (!work) - return; + return NOTIFY_DONE; INIT_WORK(&work->work, mlx5_ib_handle_event); - work->dev = dev; + work->dev = container_of(nb, struct mlx5_ib_dev, mdev_events); + work->is_slave = false; work->param = param; - work->context = context; work->event = event; queue_work(mlx5_ib_event_wq, &work->work); + + return NOTIFY_OK; +} + +static int mlx5_ib_event_slave_port(struct notifier_block *nb, + unsigned long event, void *param) +{ + struct mlx5_ib_event_work *work; + + work = kmalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return NOTIFY_DONE; + + INIT_WORK(&work->work, mlx5_ib_handle_event); + work->mpi = container_of(nb, struct mlx5_ib_multiport_info, mdev_events); + work->is_slave = true; + work->param = param; + work->event = event; + queue_work(mlx5_ib_event_wq, &work->work); + + return NOTIFY_OK; } static int set_has_smi_cap(struct mlx5_ib_dev *dev) @@ -5357,6 +5380,11 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, spin_unlock(&port->mp.mpi_lock); return; } + + if (mpi->mdev_events.notifier_call) + mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events); + mpi->mdev_events.notifier_call = NULL; + mpi->ibdev = NULL; spin_unlock(&port->mp.mpi_lock); @@ -5412,6 +5440,7 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, ibdev->port[port_num].mp.mpi = mpi; mpi->ibdev = ibdev; + mpi->mdev_events.notifier_call = NULL; spin_unlock(&ibdev->port[port_num].mp.mpi_lock); err = mlx5_nic_vport_affiliate_multiport(ibdev->mdev, mpi->mdev); @@ -5429,6 +5458,9 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, goto unbind; } + mpi->mdev_events.notifier_call = mlx5_ib_event_slave_port; + mlx5_notifier_register(mpi->mdev, &mpi->mdev_events); + err = mlx5_ib_init_cong_debugfs(ibdev, port_num); if (err) goto unbind; @@ -6163,6 +6195,18 @@ static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev) mlx5_ib_unregister_vport_reps(dev); } +static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev) +{ + dev->mdev_events.notifier_call = mlx5_ib_event; + mlx5_notifier_register(dev->mdev, &dev->mdev_events); + return 0; +} + +static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev) +{ + mlx5_notifier_unregister(dev->mdev, &dev->mdev_events); +} + void __mlx5_ib_remove(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile, int stage) @@ -6228,6 +6272,9 @@ static const struct mlx5_ib_profile pf_profile = { STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, mlx5_ib_stage_dev_res_init, mlx5_ib_stage_dev_res_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER, + mlx5_ib_stage_dev_notifier_init, + mlx5_ib_stage_dev_notifier_cleanup), STAGE_CREATE(MLX5_IB_STAGE_ODP, mlx5_ib_stage_odp_init, mlx5_ib_stage_odp_cleanup), @@ -6279,6 +6326,9 @@ static const struct mlx5_ib_profile nic_rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, mlx5_ib_stage_dev_res_init, mlx5_ib_stage_dev_res_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER, + mlx5_ib_stage_dev_notifier_init, + mlx5_ib_stage_dev_notifier_cleanup), STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, mlx5_ib_stage_counters_init, mlx5_ib_stage_counters_cleanup), @@ -6399,7 +6449,6 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) static struct mlx5_interface mlx5_ib_interface = { .add = mlx5_ib_add, .remove = mlx5_ib_remove, - .event = mlx5_ib_event, .protocol = MLX5_INTERFACE_PROTOCOL_IB, }; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 27999fd32356..7326fb8710e0 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -775,6 +775,7 @@ enum mlx5_ib_stages { MLX5_IB_STAGE_NON_DEFAULT_CB, MLX5_IB_STAGE_ROCE, MLX5_IB_STAGE_DEVICE_RESOURCES, + MLX5_IB_STAGE_DEVICE_NOTIFIER, MLX5_IB_STAGE_ODP, MLX5_IB_STAGE_COUNTERS, MLX5_IB_STAGE_CONG_DEBUGFS, @@ -806,6 +807,7 @@ struct mlx5_ib_multiport_info { struct list_head list; struct mlx5_ib_dev *ibdev; struct mlx5_core_dev *mdev; + struct notifier_block mdev_events; struct completion unref_comp; u64 sys_image_guid; u32 mdev_refcnt; @@ -893,6 +895,7 @@ struct mlx5_ib_dev { struct ib_device ib_dev; const struct uverbs_object_tree_def *driver_trees[7]; struct mlx5_core_dev *mdev; + struct notifier_block mdev_events; struct mlx5_roce roce[MLX5_MAX_PORTS]; int num_ports; /* serialize update of capability mask -- cgit v1.2.3 From 02039fb659b366011f55b15890136754f3d82e2d Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 26 Nov 2018 14:39:01 -0800 Subject: net/mlx5: Remove unused events callback and logic The mlx5_interface->event callback is not used by mlx5e/mlx5_ib anymore. We totally remove the delayed events logic work around, since with the dynamic notifier registration API it is not needed anymore, mlx5_ib can register its notifier and start receiving events exactly at the moment it is ready to handle them. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 109 +-------------------- drivers/net/ethernet/mellanox/mlx5/core/events.c | 8 +- drivers/net/ethernet/mellanox/mlx5/core/health.c | 3 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 10 -- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 3 - include/linux/mlx5/driver.h | 10 +- 6 files changed, 11 insertions(+), 132 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index d63ba8813829..d2ed14bc37c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -32,7 +32,6 @@ #include #include "mlx5_core.h" -#include "lib/mlx5.h" static LIST_HEAD(intf_list); static LIST_HEAD(mlx5_dev_list); @@ -46,75 +45,11 @@ struct mlx5_device_context { unsigned long state; }; -struct mlx5_delayed_event { - struct list_head list; - struct mlx5_core_dev *dev; - enum mlx5_dev_event event; - unsigned long param; -}; - enum { MLX5_INTERFACE_ADDED, MLX5_INTERFACE_ATTACHED, }; -static void add_delayed_event(struct mlx5_priv *priv, - struct mlx5_core_dev *dev, - enum mlx5_dev_event event, - unsigned long param) -{ - struct mlx5_delayed_event *delayed_event; - - delayed_event = kzalloc(sizeof(*delayed_event), GFP_ATOMIC); - if (!delayed_event) { - mlx5_core_err(dev, "event %d is missed\n", event); - return; - } - - mlx5_core_dbg(dev, "Accumulating event %d\n", event); - delayed_event->dev = dev; - delayed_event->event = event; - delayed_event->param = param; - list_add_tail(&delayed_event->list, &priv->waiting_events_list); -} - -static void delayed_event_release(struct mlx5_device_context *dev_ctx, - struct mlx5_priv *priv) -{ - struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); - struct mlx5_delayed_event *de; - struct mlx5_delayed_event *n; - struct list_head temp; - - INIT_LIST_HEAD(&temp); - - spin_lock_irq(&priv->ctx_lock); - - priv->is_accum_events = false; - list_splice_init(&priv->waiting_events_list, &temp); - if (!dev_ctx->context) - goto out; - list_for_each_entry_safe(de, n, &temp, list) - dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param); - -out: - spin_unlock_irq(&priv->ctx_lock); - - list_for_each_entry_safe(de, n, &temp, list) { - list_del(&de->list); - kfree(de); - } -} - -/* accumulating events that can come after mlx5_ib calls to - * ib_register_device, till adding that interface to the events list. - */ -static void delayed_event_start(struct mlx5_priv *priv) -{ - spin_lock_irq(&priv->ctx_lock); - priv->is_accum_events = true; - spin_unlock_irq(&priv->ctx_lock); -} void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) { @@ -130,8 +65,6 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) dev_ctx->intf = intf; - delayed_event_start(priv); - dev_ctx->context = intf->add(dev); if (dev_ctx->context) { set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); @@ -143,8 +76,6 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) spin_unlock_irq(&priv->ctx_lock); } - delayed_event_release(dev_ctx, priv); - if (!dev_ctx->context) kfree(dev_ctx); } @@ -188,26 +119,20 @@ static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv if (!dev_ctx) return; - delayed_event_start(priv); if (intf->attach) { if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)) - goto out; + return; if (intf->attach(dev, dev_ctx->context)) - goto out; - + return; set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); } else { if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) - goto out; + return; dev_ctx->context = intf->add(dev); if (!dev_ctx->context) - goto out; - + return; set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); } - -out: - delayed_event_release(dev_ctx, priv); } void mlx5_attach_device(struct mlx5_core_dev *dev) @@ -403,32 +328,6 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) return res; } -void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, - unsigned long param) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_device_context *dev_ctx; - unsigned long flags; - - spin_lock_irqsave(&priv->ctx_lock, flags); - - if (priv->is_accum_events) - add_delayed_event(priv, dev, event, param); - - /* After mlx5_detach_device, the dev_ctx->intf is still set and dev_ctx is - * still in priv->ctx_list. In this case, only notify the dev_ctx if its - * ADDED or ATTACHED bit are set. - */ - list_for_each_entry(dev_ctx, &priv->ctx_list, list) - if (dev_ctx->intf->event && - (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state) || - test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))) - dev_ctx->intf->event(dev, dev_ctx->context, event, param); - - spin_unlock_irqrestore(&priv->ctx_lock, flags); - - mlx5_notifier_call_chain(dev->priv.events, event, (void *)param); -} void mlx5_dev_list_lock(void) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index adab66eb726c..ab66f5d65a04 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -178,8 +178,8 @@ static int port_change(struct notifier_block *nb, port, eqe->sub_type); } - if (dev->event && dev_event_dispatch) - dev->event(dev, dev_event, dev_event_data); + if (dev_event_dispatch) + mlx5_notifier_call_chain(events, dev_event, (void *)dev_event_data); return NOTIFY_OK; } @@ -207,8 +207,8 @@ static int general_event(struct notifier_block *nb, unsigned long type, void *da eqe->sub_type); } - if (dev->event && dev_event_dispatch) - dev->event(dev, dev_event, dev_event_data); + if (dev_event_dispatch) + mlx5_notifier_call_chain(events, dev_event, (void *)dev_event_data); return NOTIFY_OK; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 4e42bd290959..196c07383082 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -39,6 +39,7 @@ #include #include "mlx5_core.h" #include "lib/eq.h" +#include "lib/mlx5.h" enum { MLX5_HEALTH_POLL_INTERVAL = 2 * HZ, @@ -105,7 +106,7 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) mlx5_cmd_trigger_completions(dev); } - mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 1); + mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1); mlx5_core_err(dev, "end\n"); unlock: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index e56278ead4eb..4bc27a073dc4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1125,12 +1125,6 @@ out: return err; } -struct mlx5_core_event_handler { - void (*event)(struct mlx5_core_dev *dev, - enum mlx5_dev_event event, - void *data); -}; - static const struct devlink_ops mlx5_devlink_ops = { #ifdef CONFIG_MLX5_ESWITCH .eswitch_mode_set = mlx5_devlink_eswitch_mode_set, @@ -1164,7 +1158,6 @@ static int init_one(struct pci_dev *pdev, pci_set_drvdata(pdev, dev); dev->pdev = pdev; - dev->event = mlx5_core_event; dev->profile = &profile[prof_sel]; INIT_LIST_HEAD(&priv->ctx_list); @@ -1172,9 +1165,6 @@ static int init_one(struct pci_dev *pdev, mutex_init(&dev->pci_status_mutex); mutex_init(&dev->intf_state_mutex); - INIT_LIST_HEAD(&priv->waiting_events_list); - priv->is_accum_events = false; - mutex_init(&priv->bfregs.reg_head.lock); mutex_init(&priv->bfregs.wc_head.lock); INIT_LIST_HEAD(&priv->bfregs.reg_head.list); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index c70bd94e18d6..fd3141a4b3f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -102,9 +102,6 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id); int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev); int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev); - -void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, - unsigned long param); void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force); void mlx5_disable_device(struct mlx5_core_dev *dev); void mlx5_recover_device(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 14ca74707275..d3ffc64f9a75 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -588,10 +588,7 @@ struct mlx5_priv { struct list_head dev_list; struct list_head ctx_list; spinlock_t ctx_lock; - - struct list_head waiting_events_list; - bool is_accum_events; - struct mlx5_events *events; + struct mlx5_events *events; struct mlx5_flow_steering *steering; struct mlx5_mpfs *mpfs; @@ -696,9 +693,6 @@ struct mlx5_core_dev { /* sync interface state */ struct mutex intf_state_mutex; unsigned long intf_state; - void (*event) (struct mlx5_core_dev *dev, - enum mlx5_dev_event event, - unsigned long param); struct mlx5_priv priv; struct mlx5_profile *profile; atomic_t num_qps; @@ -1053,8 +1047,6 @@ struct mlx5_interface { void (*remove)(struct mlx5_core_dev *dev, void *context); int (*attach)(struct mlx5_core_dev *dev, void *context); void (*detach)(struct mlx5_core_dev *dev, void *context); - void (*event)(struct mlx5_core_dev *dev, void *context, - enum mlx5_dev_event event, unsigned long param); void * (*get_dev)(void *context); int protocol; struct list_head list; -- cgit v1.2.3 From 134e9349ecf0e23c954e205a31f5b665ce25e5f6 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 26 Nov 2018 14:39:02 -0800 Subject: IB/mlx5: Handle raw port change event rather than the software version Use the FW version of the port change event as forwarded via new mlx5 notifiers API. After this patch, processed software version of the port change event will become deprecated and will be totally removed in downstream patches. Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/main.c | 86 +++++++++++++++++++++++---------------- 1 file changed, 52 insertions(+), 34 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 549c766cf309..a0668b923f78 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -87,7 +87,7 @@ struct mlx5_ib_event_work { struct mlx5_ib_multiport_info *mpi; }; bool is_slave; - enum mlx5_dev_event event; + unsigned int event; void *param; }; @@ -4236,6 +4236,51 @@ static void delay_drop_handler(struct work_struct *work) mutex_unlock(&delay_drop->lock); } +static int handle_port_change(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe, + struct ib_event *ibev) +{ + u8 port = (eqe->data.port.port >> 4) & 0xf; + + ibev->element.port_num = port; + + switch (eqe->sub_type) { + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: + /* In RoCE, port up/down events are handled in + * mlx5_netdev_event(). + */ + if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) == + IB_LINK_LAYER_ETHERNET) + return -EINVAL; + + ibev->event = (eqe->sub_type == MLX5_PORT_CHANGE_SUBTYPE_ACTIVE) ? + IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; + break; + + case MLX5_PORT_CHANGE_SUBTYPE_LID: + ibev->event = IB_EVENT_LID_CHANGE; + break; + + case MLX5_PORT_CHANGE_SUBTYPE_PKEY: + ibev->event = IB_EVENT_PKEY_CHANGE; + schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work); + break; + + case MLX5_PORT_CHANGE_SUBTYPE_GUID: + ibev->event = IB_EVENT_GID_CHANGE; + break; + + case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: + ibev->event = IB_EVENT_CLIENT_REREGISTER; + break; + default: + return -EINVAL; + } + + return 0; +} + static void mlx5_ib_handle_event(struct work_struct *_work) { struct mlx5_ib_event_work *work = @@ -4243,7 +4288,6 @@ static void mlx5_ib_handle_event(struct work_struct *_work) struct mlx5_ib_dev *ibdev; struct ib_event ibev; bool fatal = false; - u8 port = (u8)(unsigned long)work->param; if (work->is_slave) { ibdev = mlx5_ib_get_ibdev_from_mpi(work->mpi); @@ -4257,37 +4301,12 @@ static void mlx5_ib_handle_event(struct work_struct *_work) case MLX5_DEV_EVENT_SYS_ERROR: ibev.event = IB_EVENT_DEVICE_FATAL; mlx5_ib_handle_internal_error(ibdev); + ibev.element.port_num = (u8)(unsigned long)work->param; fatal = true; break; - case MLX5_DEV_EVENT_PORT_UP: - case MLX5_DEV_EVENT_PORT_DOWN: - case MLX5_DEV_EVENT_PORT_INITIALIZED: - /* In RoCE, port up/down events are handled in - * mlx5_netdev_event(). - */ - if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) == - IB_LINK_LAYER_ETHERNET) + case MLX5_EVENT_TYPE_PORT_CHANGE: + if (handle_port_change(ibdev, work->param, &ibev)) goto out; - - ibev.event = (work->event == MLX5_DEV_EVENT_PORT_UP) ? - IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; - break; - - case MLX5_DEV_EVENT_LID_CHANGE: - ibev.event = IB_EVENT_LID_CHANGE; - break; - - case MLX5_DEV_EVENT_PKEY_CHANGE: - ibev.event = IB_EVENT_PKEY_CHANGE; - schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work); - break; - - case MLX5_DEV_EVENT_GUID_CHANGE: - ibev.event = IB_EVENT_GID_CHANGE; - break; - - case MLX5_DEV_EVENT_CLIENT_REREG: - ibev.event = IB_EVENT_CLIENT_REREGISTER; break; case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT: schedule_work(&ibdev->delay_drop.delay_drop_work); @@ -4296,11 +4315,10 @@ static void mlx5_ib_handle_event(struct work_struct *_work) goto out; } - ibev.device = &ibdev->ib_dev; - ibev.element.port_num = port; + ibev.device = &ibdev->ib_dev; - if (!rdma_is_port_valid(&ibdev->ib_dev, port)) { - mlx5_ib_warn(ibdev, "warning: event on port %d\n", port); + if (!rdma_is_port_valid(&ibdev->ib_dev, ibev.element.port_num)) { + mlx5_ib_warn(ibdev, "warning: event on port %d\n", ibev.element.port_num); goto out; } -- cgit v1.2.3 From cb6191bf25f755f47593fc40b8fd39d3eedde884 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 26 Nov 2018 14:39:03 -0800 Subject: net/mlx5: Allow forwarding event type general event as is FW general event is used by mlx5_ib for RQ delay drop timeout event handling, in this patch we allow to forward FW general event type to mlx5 notifiers chain so mlx5_ib can handle it and to deprecate the software version of it. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/events.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index ab66f5d65a04..735a9b038a73 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -36,6 +36,7 @@ static struct mlx5_nb events_nbs_ref[] = { /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT }, }; struct mlx5_events { -- cgit v1.2.3 From 09e574fa76d6a47e283a7eb18ec1b0cb34174532 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 26 Nov 2018 14:39:04 -0800 Subject: IB/mlx5: Handle raw delay drop general event Handle FW general event rq delay drop as it was received from FW via mlx5 notifiers API, instead of handling the processed software version of that event. After this patch we can safely remove all software processed FW events types and definitions. Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/main.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a0668b923f78..7e6af18e7d82 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4236,6 +4236,18 @@ static void delay_drop_handler(struct work_struct *work) mutex_unlock(&delay_drop->lock); } +static void handle_general_event(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe, + struct ib_event *ibev) +{ + switch (eqe->sub_type) { + case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT: + schedule_work(&ibdev->delay_drop.delay_drop_work); + break; + default: /* do nothing */ + return; + } +} + static int handle_port_change(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe, struct ib_event *ibev) { @@ -4308,9 +4320,9 @@ static void mlx5_ib_handle_event(struct work_struct *_work) if (handle_port_change(ibdev, work->param, &ibev)) goto out; break; - case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT: - schedule_work(&ibdev->delay_drop.delay_drop_work); - goto out; + case MLX5_EVENT_TYPE_GENERAL_EVENT: + handle_general_event(ibdev, work->param, &ibev); + /* fall through */ default: goto out; } -- cgit v1.2.3 From b8267cd765b333673e05696b517d38a1a7eb5b2e Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 26 Nov 2018 14:39:05 -0800 Subject: net/mlx5: Remove all deprecated software versions of FW events Before the new mlx5 event notification infrastructure and API, mlx5_core used to process all events before forwarding them to mlx5 interfaces (mlx5e/mlx5_ib) and used to translate the event type enum to a software defined enum, this is not needed anymore since it is ok for mlx5e and mlx5_ib to receive FW events as is, at least the few ones mlx5 core allows. mlx5e and mlx5_ib already moved to use the new API and they only handle FW events types, it is now safe to remove all equivalent software defined events and the logic around them. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/events.c | 92 +----------------------- include/linux/mlx5/driver.h | 9 --- 2 files changed, 1 insertion(+), 100 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index 735a9b038a73..3708b42c1d6b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -19,8 +19,6 @@ struct mlx5_event_nb { * separate notifiers callbacks, specifically by those mlx5 components. */ static int any_notifier(struct notifier_block *, unsigned long, void *); -static int port_change(struct notifier_block *, unsigned long, void *); -static int general_event(struct notifier_block *, unsigned long, void *); static int temp_warn(struct notifier_block *, unsigned long, void *); static int port_module(struct notifier_block *, unsigned long, void *); @@ -28,9 +26,8 @@ static int port_module(struct notifier_block *, unsigned long, void *); static int forward_event(struct notifier_block *, unsigned long, void *); static struct mlx5_nb events_nbs_ref[] = { + /* Events to be proccessed by mlx5_core */ {.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY }, - {.nb.notifier_call = port_change, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE }, - {.nb.notifier_call = general_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT }, {.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT }, {.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT }, @@ -127,93 +124,6 @@ static int any_notifier(struct notifier_block *nb, return NOTIFY_OK; } -static enum mlx5_dev_event port_subtype2dev(u8 subtype) -{ - switch (subtype) { - case MLX5_PORT_CHANGE_SUBTYPE_DOWN: - return MLX5_DEV_EVENT_PORT_DOWN; - case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: - return MLX5_DEV_EVENT_PORT_UP; - case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: - return MLX5_DEV_EVENT_PORT_INITIALIZED; - case MLX5_PORT_CHANGE_SUBTYPE_LID: - return MLX5_DEV_EVENT_LID_CHANGE; - case MLX5_PORT_CHANGE_SUBTYPE_PKEY: - return MLX5_DEV_EVENT_PKEY_CHANGE; - case MLX5_PORT_CHANGE_SUBTYPE_GUID: - return MLX5_DEV_EVENT_GUID_CHANGE; - case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: - return MLX5_DEV_EVENT_CLIENT_REREG; - } - return -1; -} - -/* type == MLX5_EVENT_TYPE_PORT_CHANGE */ -static int port_change(struct notifier_block *nb, - unsigned long type, void *data) -{ - struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); - struct mlx5_events *events = event_nb->ctx; - struct mlx5_core_dev *dev = events->dev; - - bool dev_event_dispatch = false; - enum mlx5_dev_event dev_event; - unsigned long dev_event_data; - struct mlx5_eqe *eqe = data; - u8 port = (eqe->data.port.port >> 4) & 0xf; - - switch (eqe->sub_type) { - case MLX5_PORT_CHANGE_SUBTYPE_DOWN: - case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: - case MLX5_PORT_CHANGE_SUBTYPE_LID: - case MLX5_PORT_CHANGE_SUBTYPE_PKEY: - case MLX5_PORT_CHANGE_SUBTYPE_GUID: - case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: - case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: - dev_event = port_subtype2dev(eqe->sub_type); - dev_event_data = (unsigned long)port; - dev_event_dispatch = true; - break; - default: - mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n", - port, eqe->sub_type); - } - - if (dev_event_dispatch) - mlx5_notifier_call_chain(events, dev_event, (void *)dev_event_data); - - return NOTIFY_OK; -} - -/* type == MLX5_EVENT_TYPE_GENERAL_EVENT */ -static int general_event(struct notifier_block *nb, unsigned long type, void *data) -{ - struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); - struct mlx5_events *events = event_nb->ctx; - struct mlx5_core_dev *dev = events->dev; - - bool dev_event_dispatch = false; - enum mlx5_dev_event dev_event; - unsigned long dev_event_data; - struct mlx5_eqe *eqe = data; - - switch (eqe->sub_type) { - case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT: - dev_event = MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT; - dev_event_data = 0; - dev_event_dispatch = true; - break; - default: - mlx5_core_dbg(dev, "General event with unrecognized subtype: sub_type %d\n", - eqe->sub_type); - } - - if (dev_event_dispatch) - mlx5_notifier_call_chain(events, dev_event, (void *)dev_event_data); - - return NOTIFY_OK; -} - /* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */ static int temp_warn(struct notifier_block *nb, unsigned long type, void *data) { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d3ffc64f9a75..a77bedb8a556 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -196,15 +196,6 @@ struct mlx5_rsc_debug { enum mlx5_dev_event { MLX5_DEV_EVENT_SYS_ERROR = 128, /* 0 - 127 are FW events */ - MLX5_DEV_EVENT_PORT_UP, - MLX5_DEV_EVENT_PORT_DOWN, - MLX5_DEV_EVENT_PORT_INITIALIZED, - MLX5_DEV_EVENT_LID_CHANGE, - MLX5_DEV_EVENT_PKEY_CHANGE, - MLX5_DEV_EVENT_GUID_CHANGE, - MLX5_DEV_EVENT_CLIENT_REREG, - MLX5_DEV_EVENT_PPS, - MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT, }; enum mlx5_port_status { -- cgit v1.2.3 From 451be51c0b474f790e9833cd575fd9a6fbd679df Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 26 Nov 2018 14:39:06 -0800 Subject: net/mlx5: Forward QP/WorkQueues resource events Allow forwarding QP and WQ events to mlx5_core interfaces, e.g. mlx5_ib Use mlx5_notifier_register/unregister in qp.c in order to allow seamless transition of qp.c to infiniband subsystem. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/events.c | 10 ++++++++++ drivers/net/ethernet/mellanox/mlx5/core/qp.c | 8 ++++---- include/linux/mlx5/driver.h | 2 +- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index 3708b42c1d6b..201c5f6091ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -34,6 +34,16 @@ static struct mlx5_nb events_nbs_ref[] = { /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE }, {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT }, + /* QP/WQ resource events to forward */ + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_DCT_DRAINED }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_COMM_EST }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SQ_DRAINED }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR }, }; struct mlx5_events { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index 28726c63101f..388f205a497f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -150,7 +150,7 @@ static int rsc_event_notifier(struct notifier_block *nb, return NOTIFY_DONE; } - table = mlx5_nb_cof(nb, struct mlx5_qp_table, nb); + table = container_of(nb, struct mlx5_qp_table, nb); priv = container_of(table, struct mlx5_priv, qp_table); dev = container_of(priv, struct mlx5_core_dev, priv); @@ -523,15 +523,15 @@ void mlx5_init_qp_table(struct mlx5_core_dev *dev) INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); mlx5_qp_debugfs_init(dev); - MLX5_NB_INIT(&table->nb, rsc_event_notifier, NOTIFY_ANY); - mlx5_eq_notifier_register(dev, &table->nb); + table->nb.notifier_call = rsc_event_notifier; + mlx5_notifier_register(dev, &table->nb); } void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev) { struct mlx5_qp_table *table = &dev->priv.qp_table; - mlx5_eq_notifier_unregister(dev, &table->nb); + mlx5_notifier_unregister(dev, &table->nb); mlx5_qp_debugfs_cleanup(dev); } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a77bedb8a556..4f078b7f6620 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -456,7 +456,7 @@ struct mlx5_core_health { }; struct mlx5_qp_table { - struct mlx5_nb nb; + struct notifier_block nb; /* protect radix tree */ -- cgit v1.2.3 From 4e2df04ad25ab8e627878817e56d6a27645ca4a8 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 26 Nov 2018 14:39:07 -0800 Subject: net/mlx5: Forward SRQ resource events Allow forwarding of SRQ events to mlx5_core interfaces, e.g. mlx5_ib. Use mlx5_notifier_register/unregister in srq.c in order to allow seamless transition of srq.c to infiniband subsystem. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/events.c | 3 ++ drivers/net/ethernet/mellanox/mlx5/core/srq.c | 38 +++++++----------------- include/linux/mlx5/driver.h | 3 +- 3 files changed, 14 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index 201c5f6091ea..9e6e216faac3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -44,6 +44,9 @@ static struct mlx5_nb events_nbs_ref[] = { {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED }, {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR }, {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR }, + /* SRQ events */ + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT }, }; struct mlx5_events { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c index 0563866c13f2..79c5f0d57956 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -40,15 +40,21 @@ #include "mlx5_core.h" #include "lib/eq.h" -static int srq_event_notifier(struct mlx5_srq_table *table, +static int srq_event_notifier(struct notifier_block *nb, unsigned long type, void *data) { + struct mlx5_srq_table *table; struct mlx5_core_dev *dev; struct mlx5_core_srq *srq; struct mlx5_priv *priv; struct mlx5_eqe *eqe; u32 srqn; + if (type != MLX5_EVENT_TYPE_SRQ_CATAS_ERROR && + type != MLX5_EVENT_TYPE_SRQ_RQ_LIMIT) + return NOTIFY_DONE; + + table = container_of(nb, struct mlx5_srq_table, nb); priv = container_of(table, struct mlx5_priv, srq_table); dev = container_of(priv, struct mlx5_core_dev, priv); @@ -77,26 +83,6 @@ static int srq_event_notifier(struct mlx5_srq_table *table, return NOTIFY_OK; } -static int catas_err_notifier(struct notifier_block *nb, - unsigned long type, void *data) -{ - struct mlx5_srq_table *table; - - table = mlx5_nb_cof(nb, struct mlx5_srq_table, catas_err_nb); - /* type == MLX5_EVENT_TYPE_SRQ_CATAS_ERROR */ - return srq_event_notifier(table, type, data); -} - -static int rq_limit_notifier(struct notifier_block *nb, - unsigned long type, void *data) -{ - struct mlx5_srq_table *table; - - table = mlx5_nb_cof(nb, struct mlx5_srq_table, rq_limit_nb); - /* type == MLX5_EVENT_TYPE_SRQ_RQ_LIMIT */ - return srq_event_notifier(table, type, data); -} - static int get_pas_size(struct mlx5_srq_attr *in) { u32 log_page_size = in->log_page_size + 12; @@ -743,17 +729,13 @@ void mlx5_init_srq_table(struct mlx5_core_dev *dev) spin_lock_init(&table->lock); INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); - MLX5_NB_INIT(&table->catas_err_nb, catas_err_notifier, SRQ_CATAS_ERROR); - mlx5_eq_notifier_register(dev, &table->catas_err_nb); - - MLX5_NB_INIT(&table->rq_limit_nb, rq_limit_notifier, SRQ_RQ_LIMIT); - mlx5_eq_notifier_register(dev, &table->rq_limit_nb); + table->nb.notifier_call = srq_event_notifier; + mlx5_notifier_register(dev, &table->nb); } void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev) { struct mlx5_srq_table *table = &dev->priv.srq_table; - mlx5_eq_notifier_unregister(dev, &table->rq_limit_nb); - mlx5_eq_notifier_unregister(dev, &table->catas_err_nb); + mlx5_notifier_unregister(dev, &table->nb); } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 4f078b7f6620..27a481b159ed 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -465,8 +465,7 @@ struct mlx5_qp_table { }; struct mlx5_srq_table { - struct mlx5_nb catas_err_nb; - struct mlx5_nb rq_limit_nb; + struct notifier_block nb; /* protect radix tree */ spinlock_t lock; -- cgit v1.2.3 From 93631211c9c07dd3e60ca8462281ff475d3b87a6 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 26 Nov 2018 14:39:08 -0800 Subject: net/mlx5: Debug print for forwarded async events Print a debug message for every async FW event forwarded to mlx5 interfaces (mlx5e netdev and mlx5_ib rdma module). Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/events.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index 9e6e216faac3..e92df7020a26 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -229,7 +229,10 @@ static int forward_event(struct notifier_block *nb, unsigned long event, void *d { struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); struct mlx5_events *events = event_nb->ctx; + struct mlx5_eqe *eqe = data; + mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n", + eqe_type_str(eqe->type), eqe->sub_type); atomic_notifier_call_chain(&events->nh, event, data); return NOTIFY_OK; } -- cgit v1.2.3 From 6cd0014ab90f6959fa1f8cc8b3f38d302457c919 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 28 Nov 2018 20:53:33 +0200 Subject: net/mlx5: Align SRQ licenses and copyright information Ensure that both RDMA and netdev parts of SRQ implementation has same copyright and license information annotated by SPDX tags. Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/srq.c | 31 ++------------------------- drivers/net/ethernet/mellanox/mlx5/core/srq.c | 31 ++------------------------- include/linux/mlx5/srq.h | 31 ++------------------------- 3 files changed, 6 insertions(+), 87 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index d012e7dbcc38..28794780062e 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -1,33 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright (c) 2013-2018, Mellanox Technologies inc. All rights reserved. */ #include diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c index 79c5f0d57956..10036aaa200a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -1,33 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright (c) 2013-2018, Mellanox Technologies inc. All rights reserved. */ #include diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h index 1b1f3c20c6a3..77bc4264066d 100644 --- a/include/linux/mlx5/srq.h +++ b/include/linux/mlx5/srq.h @@ -1,33 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright (c) 2013-2018, Mellanox Technologies. All rights reserved. */ #ifndef MLX5_SRQ_H -- cgit v1.2.3 From 5b5f0f16276021794038f12adc56df70cec42b4f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 28 Nov 2018 20:53:34 +0200 Subject: net/mlx5: Remove dead transobj code Delete functions which are not called and not needed. Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/transobj.c | 66 ---------------------- include/linux/mlx5/transobj.h | 5 -- 2 files changed, 71 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index a1ee9a8a769e..ab482124e901 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -301,72 +301,6 @@ int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out) return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } -int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm) -{ - void *in; - void *rmpc; - void *wq; - void *bitmask; - int err; - - in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL); - if (!in) - return -ENOMEM; - - rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx); - bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask); - wq = MLX5_ADDR_OF(rmpc, rmpc, wq); - - MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY); - MLX5_SET(modify_rmp_in, in, rmpn, rmpn); - MLX5_SET(wq, wq, lwm, lwm); - MLX5_SET(rmp_bitmask, bitmask, lwm, 1); - MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); - - err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in)); - - kvfree(in); - - return err; -} - -int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen, - u32 *xsrqn) -{ - u32 out[MLX5_ST_SZ_DW(create_xrc_srq_out)] = {0}; - int err; - - MLX5_SET(create_xrc_srq_in, in, opcode, MLX5_CMD_OP_CREATE_XRC_SRQ); - err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); - if (!err) - *xsrqn = MLX5_GET(create_xrc_srq_out, out, xrc_srqn); - - return err; -} - -int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 xsrqn) -{ - u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0}; - - MLX5_SET(destroy_xrc_srq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ); - MLX5_SET(destroy_xrc_srq_in, in, xrc_srqn, xsrqn); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - -int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u16 lwm) -{ - u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; - - MLX5_SET(arm_xrc_srq_in, in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); - MLX5_SET(arm_xrc_srq_in, in, xrc_srqn, xsrqn); - MLX5_SET(arm_xrc_srq_in, in, lwm, lwm); - MLX5_SET(arm_xrc_srq_in, in, op_mod, - MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqtn) { diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h index 7f5ca2cd3a32..39ebb699875b 100644 --- a/include/linux/mlx5/transobj.h +++ b/include/linux/mlx5/transobj.h @@ -63,11 +63,6 @@ int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen); int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn); int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out); -int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm); -int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen, - u32 *rmpn); -int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 rmpn); -int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm); int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqtn); -- cgit v1.2.3 From 26d1164dff81096c38d955f00b84a0075563d8c7 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 28 Nov 2018 20:53:35 +0200 Subject: net/mlx5: Remove not-used lib/eq.h header file lib/eq.h is needed for EQ manipulation which are not performed in SRQ. Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/srq.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c index 10036aaa200a..248f1c8dd5d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -11,7 +11,6 @@ #include #include #include "mlx5_core.h" -#include "lib/eq.h" static int srq_event_notifier(struct notifier_block *nb, unsigned long type, void *data) -- cgit v1.2.3 From c23f88cb575fe4a0b49603d7f9e5c3782886269f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 28 Nov 2018 20:53:36 +0200 Subject: net/mlx5: Remove references to local mlx5_core functions As a preparation to move SRQ functionality to RDMA, drop all references to mlx5_core logic and make SRQ be dependent on shared code only. Most of the time, we are interested to know if events are working/not working and it is possible with previous commit ("net/mlx5: Debug print for forwarded async events"). Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/srq.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c index 248f1c8dd5d3..690815234838 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -10,15 +10,12 @@ #include #include #include -#include "mlx5_core.h" static int srq_event_notifier(struct notifier_block *nb, unsigned long type, void *data) { struct mlx5_srq_table *table; - struct mlx5_core_dev *dev; struct mlx5_core_srq *srq; - struct mlx5_priv *priv; struct mlx5_eqe *eqe; u32 srqn; @@ -27,12 +24,9 @@ static int srq_event_notifier(struct notifier_block *nb, return NOTIFY_DONE; table = container_of(nb, struct mlx5_srq_table, nb); - priv = container_of(table, struct mlx5_priv, srq_table); - dev = container_of(priv, struct mlx5_core_dev, priv); eqe = data; srqn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; - mlx5_core_dbg(dev, "SRQ event (%d): srqn 0x%x\n", eqe->type, srqn); spin_lock(&table->lock); @@ -42,10 +36,8 @@ static int srq_event_notifier(struct notifier_block *nb, spin_unlock(&table->lock); - if (!srq) { - mlx5_core_warn(dev, "Async event for bogus SRQ 0x%08x\n", srqn); + if (!srq) return NOTIFY_OK; - } srq->event(srq, eqe->type); @@ -617,10 +609,8 @@ int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, spin_lock_irq(&table->lock); err = radix_tree_insert(&table->tree, srq->srqn, srq); spin_unlock_irq(&table->lock); - if (err) { - mlx5_core_warn(dev, "err %d, srqn 0x%x\n", err, srq->srqn); + if (err) goto err_destroy_srq_split; - } return 0; @@ -640,14 +630,8 @@ int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) spin_lock_irq(&table->lock); tmp = radix_tree_delete(&table->tree, srq->srqn); spin_unlock_irq(&table->lock); - if (!tmp) { - mlx5_core_warn(dev, "srq 0x%x not found in tree\n", srq->srqn); + if (!tmp || tmp != srq) return -EINVAL; - } - if (tmp != srq) { - mlx5_core_warn(dev, "corruption on srqn 0x%x\n", srq->srqn); - return -EINVAL; - } err = destroy_srq_split(dev, srq); if (err) -- cgit v1.2.3 From f02d0d6e53ac2c8a75b6cc87dc86675a9351d84d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 28 Nov 2018 20:53:37 +0200 Subject: net/mlx5: Move SRQ functions to RDMA part There is no need to keep SRQ which is RDMA object in mlx5_core. In this patch, we partially move the execution code, while next patches will move table initialization/release logic too. Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/Makefile | 4 +- drivers/infiniband/hw/mlx5/cq.c | 1 + drivers/infiniband/hw/mlx5/srq.c | 2 +- drivers/infiniband/hw/mlx5/srq.h | 46 ++ drivers/infiniband/hw/mlx5/srq_cmd.c | 666 +++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/srq.c | 634 -------------------- drivers/net/ethernet/mellanox/mlx5/core/transobj.c | 43 -- include/linux/mlx5/driver.h | 8 - include/linux/mlx5/srq.h | 31 - include/linux/mlx5/transobj.h | 6 - 10 files changed, 717 insertions(+), 724 deletions(-) create mode 100644 drivers/infiniband/hw/mlx5/srq.h create mode 100644 drivers/infiniband/hw/mlx5/srq_cmd.c diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index b8e4b15e2674..33f5adb14e4e 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -1,6 +1,8 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o -mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o +mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq_cmd.o \ + srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o \ + cong.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 7d769b5538b4..c5d2824ada59 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -35,6 +35,7 @@ #include #include #include "mlx5_ib.h" +#include "srq.h" static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq) { diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 28794780062e..a86d9f153805 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -9,8 +9,8 @@ #include #include #include - #include "mlx5_ib.h" +#include "srq.h" /* not supported currently */ static int srq_signature; diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h new file mode 100644 index 000000000000..f23d5de12973 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/srq.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2013-2018, Mellanox Technologies. All rights reserved. + */ + +#ifndef MLX5_IB_SRQ_H +#define MLX5_IB_SRQ_H + +enum { + MLX5_SRQ_FLAG_ERR = (1 << 0), + MLX5_SRQ_FLAG_WQ_SIG = (1 << 1), + MLX5_SRQ_FLAG_RNDV = (1 << 2), +}; + +struct mlx5_srq_attr { + u32 type; + u32 flags; + u32 log_size; + u32 wqe_shift; + u32 log_page_size; + u32 wqe_cnt; + u32 srqn; + u32 xrcd; + u32 page_offset; + u32 cqn; + u32 pd; + u32 lwm; + u32 user_index; + u64 db_record; + __be64 *pas; + u32 tm_log_list_size; + u32 tm_next_tag; + u32 tm_hw_phase_cnt; + u32 tm_sw_phase_cnt; + u16 uid; +}; + +int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in); +int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq); +int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out); +int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + u16 lwm, int is_srq); +struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); +#endif /* MLX5_IB_SRQ_H */ diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c new file mode 100644 index 000000000000..4a64ad4c9b25 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -0,0 +1,666 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2013-2018, Mellanox Technologies inc. All rights reserved. + */ + +#include +#include +#include +#include "srq.h" + +static int get_pas_size(struct mlx5_srq_attr *in) +{ + u32 log_page_size = in->log_page_size + 12; + u32 log_srq_size = in->log_size; + u32 log_rq_stride = in->wqe_shift; + u32 page_offset = in->page_offset; + u32 po_quanta = 1 << (log_page_size - 6); + u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride); + u32 page_size = 1 << log_page_size; + u32 rq_sz_po = rq_sz + (page_offset * po_quanta); + u32 rq_num_pas = DIV_ROUND_UP(rq_sz_po, page_size); + + return rq_num_pas * sizeof(u64); +} + +static void set_wq(void *wq, struct mlx5_srq_attr *in) +{ + MLX5_SET(wq, wq, wq_signature, !!(in->flags + & MLX5_SRQ_FLAG_WQ_SIG)); + MLX5_SET(wq, wq, log_wq_pg_sz, in->log_page_size); + MLX5_SET(wq, wq, log_wq_stride, in->wqe_shift + 4); + MLX5_SET(wq, wq, log_wq_sz, in->log_size); + MLX5_SET(wq, wq, page_offset, in->page_offset); + MLX5_SET(wq, wq, lwm, in->lwm); + MLX5_SET(wq, wq, pd, in->pd); + MLX5_SET64(wq, wq, dbr_addr, in->db_record); +} + +static void set_srqc(void *srqc, struct mlx5_srq_attr *in) +{ + MLX5_SET(srqc, srqc, wq_signature, !!(in->flags + & MLX5_SRQ_FLAG_WQ_SIG)); + MLX5_SET(srqc, srqc, log_page_size, in->log_page_size); + MLX5_SET(srqc, srqc, log_rq_stride, in->wqe_shift); + MLX5_SET(srqc, srqc, log_srq_size, in->log_size); + MLX5_SET(srqc, srqc, page_offset, in->page_offset); + MLX5_SET(srqc, srqc, lwm, in->lwm); + MLX5_SET(srqc, srqc, pd, in->pd); + MLX5_SET64(srqc, srqc, dbr_addr, in->db_record); + MLX5_SET(srqc, srqc, xrcd, in->xrcd); + MLX5_SET(srqc, srqc, cqn, in->cqn); +} + +static void get_wq(void *wq, struct mlx5_srq_attr *in) +{ + if (MLX5_GET(wq, wq, wq_signature)) + in->flags &= MLX5_SRQ_FLAG_WQ_SIG; + in->log_page_size = MLX5_GET(wq, wq, log_wq_pg_sz); + in->wqe_shift = MLX5_GET(wq, wq, log_wq_stride) - 4; + in->log_size = MLX5_GET(wq, wq, log_wq_sz); + in->page_offset = MLX5_GET(wq, wq, page_offset); + in->lwm = MLX5_GET(wq, wq, lwm); + in->pd = MLX5_GET(wq, wq, pd); + in->db_record = MLX5_GET64(wq, wq, dbr_addr); +} + +static void get_srqc(void *srqc, struct mlx5_srq_attr *in) +{ + if (MLX5_GET(srqc, srqc, wq_signature)) + in->flags &= MLX5_SRQ_FLAG_WQ_SIG; + in->log_page_size = MLX5_GET(srqc, srqc, log_page_size); + in->wqe_shift = MLX5_GET(srqc, srqc, log_rq_stride); + in->log_size = MLX5_GET(srqc, srqc, log_srq_size); + in->page_offset = MLX5_GET(srqc, srqc, page_offset); + in->lwm = MLX5_GET(srqc, srqc, lwm); + in->pd = MLX5_GET(srqc, srqc, pd); + in->db_record = MLX5_GET64(srqc, srqc, dbr_addr); +} + +struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn) +{ + struct mlx5_srq_table *table = &dev->priv.srq_table; + struct mlx5_core_srq *srq; + + spin_lock(&table->lock); + + srq = radix_tree_lookup(&table->tree, srqn); + if (srq) + atomic_inc(&srq->refcount); + + spin_unlock(&table->lock); + + return srq; +} + +static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0}; + void *create_in; + void *srqc; + void *pas; + int pas_size; + int inlen; + int err; + + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size; + create_in = kvzalloc(inlen, GFP_KERNEL); + if (!create_in) + return -ENOMEM; + + MLX5_SET(create_srq_in, create_in, uid, in->uid); + srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry); + pas = MLX5_ADDR_OF(create_srq_in, create_in, pas); + + set_srqc(srqc, in); + memcpy(pas, in->pas, pas_size); + + MLX5_SET(create_srq_in, create_in, opcode, + MLX5_CMD_OP_CREATE_SRQ); + + err = mlx5_cmd_exec(dev, create_in, inlen, create_out, + sizeof(create_out)); + kvfree(create_in); + if (!err) { + srq->srqn = MLX5_GET(create_srq_out, create_out, srqn); + srq->uid = in->uid; + } + + return err; +} + +static int destroy_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq) +{ + u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0}; + u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0}; + + MLX5_SET(destroy_srq_in, srq_in, opcode, + MLX5_CMD_OP_DESTROY_SRQ); + MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn); + MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid); + + return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), + srq_out, sizeof(srq_out)); +} + +static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + u16 lwm, int is_srq) +{ + u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; + u32 srq_out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; + + MLX5_SET(arm_rq_in, srq_in, opcode, MLX5_CMD_OP_ARM_RQ); + MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ); + MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn); + MLX5_SET(arm_rq_in, srq_in, lwm, lwm); + MLX5_SET(arm_rq_in, srq_in, uid, srq->uid); + + return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), + srq_out, sizeof(srq_out)); +} + +static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0}; + u32 *srq_out; + void *srqc; + int err; + + srq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_srq_out), GFP_KERNEL); + if (!srq_out) + return -ENOMEM; + + MLX5_SET(query_srq_in, srq_in, opcode, + MLX5_CMD_OP_QUERY_SRQ); + MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn); + err = mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), + srq_out, MLX5_ST_SZ_BYTES(query_srq_out)); + if (err) + goto out; + + srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry); + get_srqc(srqc, out); + if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD) + out->flags |= MLX5_SRQ_FLAG_ERR; +out: + kvfree(srq_out); + return err; +} + +static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)]; + void *create_in; + void *xrc_srqc; + void *pas; + int pas_size; + int inlen; + int err; + + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size; + create_in = kvzalloc(inlen, GFP_KERNEL); + if (!create_in) + return -ENOMEM; + + MLX5_SET(create_xrc_srq_in, create_in, uid, in->uid); + xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in, + xrc_srq_context_entry); + pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas); + + set_srqc(xrc_srqc, in); + MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index); + memcpy(pas, in->pas, pas_size); + MLX5_SET(create_xrc_srq_in, create_in, opcode, + MLX5_CMD_OP_CREATE_XRC_SRQ); + + memset(create_out, 0, sizeof(create_out)); + err = mlx5_cmd_exec(dev, create_in, inlen, create_out, + sizeof(create_out)); + if (err) + goto out; + + srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn); + srq->uid = in->uid; +out: + kvfree(create_in); + return err; +} + +static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq) +{ + u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0}; + u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0}; + + MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode, + MLX5_CMD_OP_DESTROY_XRC_SRQ); + MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid); + + return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, sizeof(xrcsrq_out)); +} + +static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, u16 lwm) +{ + u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; + u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; + + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid); + + return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, sizeof(xrcsrq_out)); +} + +static int query_xrc_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)]; + u32 *xrcsrq_out; + void *xrc_srqc; + int err; + + xrcsrq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out), GFP_KERNEL); + if (!xrcsrq_out) + return -ENOMEM; + memset(xrcsrq_in, 0, sizeof(xrcsrq_in)); + + MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode, + MLX5_CMD_OP_QUERY_XRC_SRQ); + MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + + err = mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out, + MLX5_ST_SZ_BYTES(query_xrc_srq_out)); + if (err) + goto out; + + xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out, + xrc_srq_context_entry); + get_srqc(xrc_srqc, out); + if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD) + out->flags |= MLX5_SRQ_FLAG_ERR; + +out: + kvfree(xrcsrq_out); + return err; +} + +static int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rmpn) +{ + u32 out[MLX5_ST_SZ_DW(create_rmp_out)] = { 0 }; + int err; + + MLX5_SET(create_rmp_in, in, opcode, MLX5_CMD_OP_CREATE_RMP); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + if (!err) + *rmpn = MLX5_GET(create_rmp_out, out, rmpn); + + return err; +} + +static int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_rmp_out)] = {0}; + + MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); +} + +static int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out) +{ + u32 in[MLX5_ST_SZ_DW(query_rmp_in)] = {0}; + int outlen = MLX5_ST_SZ_BYTES(query_rmp_out); + + MLX5_SET(query_rmp_in, in, opcode, MLX5_CMD_OP_QUERY_RMP); + MLX5_SET(query_rmp_in, in, rmpn, rmpn); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} + +static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + void *create_in; + void *rmpc; + void *wq; + int pas_size; + int inlen; + int err; + + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; + create_in = kvzalloc(inlen, GFP_KERNEL); + if (!create_in) + return -ENOMEM; + + rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx); + wq = MLX5_ADDR_OF(rmpc, rmpc, wq); + + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + MLX5_SET(create_rmp_in, create_in, uid, in->uid); + set_wq(wq, in); + memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); + + err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn); + if (!err) + srq->uid = in->uid; + + kvfree(create_in); + return err; +} + +static int destroy_rmp_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {}; + + MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP); + MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn); + MLX5_SET(destroy_rmp_in, in, uid, srq->uid); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int arm_rmp_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + u16 lwm) +{ + void *in; + void *rmpc; + void *wq; + void *bitmask; + int err; + + in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx); + bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask); + wq = MLX5_ADDR_OF(rmpc, rmpc, wq); + + MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY); + MLX5_SET(modify_rmp_in, in, rmpn, srq->srqn); + MLX5_SET(modify_rmp_in, in, uid, srq->uid); + MLX5_SET(wq, wq, lwm, lwm); + MLX5_SET(rmp_bitmask, bitmask, lwm, 1); + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + + err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in)); + + kvfree(in); + return err; +} + +static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + u32 *rmp_out; + void *rmpc; + int err; + + rmp_out = kvzalloc(MLX5_ST_SZ_BYTES(query_rmp_out), GFP_KERNEL); + if (!rmp_out) + return -ENOMEM; + + err = mlx5_core_query_rmp(dev, srq->srqn, rmp_out); + if (err) + goto out; + + rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context); + get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out); + if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY) + out->flags |= MLX5_SRQ_FLAG_ERR; + +out: + kvfree(rmp_out); + return err; +} + +static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + u32 create_out[MLX5_ST_SZ_DW(create_xrq_out)] = {0}; + void *create_in; + void *xrqc; + void *wq; + int pas_size; + int inlen; + int err; + + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size; + create_in = kvzalloc(inlen, GFP_KERNEL); + if (!create_in) + return -ENOMEM; + + xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context); + wq = MLX5_ADDR_OF(xrqc, xrqc, wq); + + set_wq(wq, in); + memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size); + + if (in->type == IB_SRQT_TM) { + MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING); + if (in->flags & MLX5_SRQ_FLAG_RNDV) + MLX5_SET(xrqc, xrqc, offload, MLX5_XRQC_OFFLOAD_RNDV); + MLX5_SET(xrqc, xrqc, + tag_matching_topology_context.log_matching_list_sz, + in->tm_log_list_size); + } + MLX5_SET(xrqc, xrqc, user_index, in->user_index); + MLX5_SET(xrqc, xrqc, cqn, in->cqn); + MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ); + MLX5_SET(create_xrq_in, create_in, uid, in->uid); + err = mlx5_cmd_exec(dev, create_in, inlen, create_out, + sizeof(create_out)); + kvfree(create_in); + if (!err) { + srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn); + srq->uid = in->uid; + } + + return err; +} + +static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) +{ + u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0}; + + MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ); + MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn); + MLX5_SET(destroy_xrq_in, in, uid, srq->uid); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int arm_xrq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + u16 lwm) +{ + u32 out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; + + MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ); + MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ); + MLX5_SET(arm_rq_in, in, srq_number, srq->srqn); + MLX5_SET(arm_rq_in, in, lwm, lwm); + MLX5_SET(arm_rq_in, in, uid, srq->uid); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int query_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0}; + u32 *xrq_out; + int outlen = MLX5_ST_SZ_BYTES(query_xrq_out); + void *xrqc; + int err; + + xrq_out = kvzalloc(outlen, GFP_KERNEL); + if (!xrq_out) + return -ENOMEM; + + MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ); + MLX5_SET(query_xrq_in, in, xrqn, srq->srqn); + + err = mlx5_cmd_exec(dev, in, sizeof(in), xrq_out, outlen); + if (err) + goto out; + + xrqc = MLX5_ADDR_OF(query_xrq_out, xrq_out, xrq_context); + get_wq(MLX5_ADDR_OF(xrqc, xrqc, wq), out); + if (MLX5_GET(xrqc, xrqc, state) != MLX5_XRQC_STATE_GOOD) + out->flags |= MLX5_SRQ_FLAG_ERR; + out->tm_next_tag = + MLX5_GET(xrqc, xrqc, + tag_matching_topology_context.append_next_index); + out->tm_hw_phase_cnt = + MLX5_GET(xrqc, xrqc, + tag_matching_topology_context.hw_phase_cnt); + out->tm_sw_phase_cnt = + MLX5_GET(xrqc, xrqc, + tag_matching_topology_context.sw_phase_cnt); + +out: + kvfree(xrq_out); + return err; +} + +static int create_srq_split(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + if (!dev->issi) + return create_srq_cmd(dev, srq, in); + switch (srq->common.res) { + case MLX5_RES_XSRQ: + return create_xrc_srq_cmd(dev, srq, in); + case MLX5_RES_XRQ: + return create_xrq_cmd(dev, srq, in); + default: + return create_rmp_cmd(dev, srq, in); + } +} + +static int destroy_srq_split(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq) +{ + if (!dev->issi) + return destroy_srq_cmd(dev, srq); + switch (srq->common.res) { + case MLX5_RES_XSRQ: + return destroy_xrc_srq_cmd(dev, srq); + case MLX5_RES_XRQ: + return destroy_xrq_cmd(dev, srq); + default: + return destroy_rmp_cmd(dev, srq); + } +} + +int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + int err; + struct mlx5_srq_table *table = &dev->priv.srq_table; + + switch (in->type) { + case IB_SRQT_XRC: + srq->common.res = MLX5_RES_XSRQ; + break; + case IB_SRQT_TM: + srq->common.res = MLX5_RES_XRQ; + break; + default: + srq->common.res = MLX5_RES_SRQ; + } + + err = create_srq_split(dev, srq, in); + if (err) + return err; + + atomic_set(&srq->refcount, 1); + init_completion(&srq->free); + + spin_lock_irq(&table->lock); + err = radix_tree_insert(&table->tree, srq->srqn, srq); + spin_unlock_irq(&table->lock); + if (err) + goto err_destroy_srq_split; + + return 0; + +err_destroy_srq_split: + destroy_srq_split(dev, srq); + + return err; +} + +int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) +{ + struct mlx5_srq_table *table = &dev->priv.srq_table; + struct mlx5_core_srq *tmp; + int err; + + spin_lock_irq(&table->lock); + tmp = radix_tree_delete(&table->tree, srq->srqn); + spin_unlock_irq(&table->lock); + if (!tmp || tmp != srq) + return -EINVAL; + + err = destroy_srq_split(dev, srq); + if (err) + return err; + + if (atomic_dec_and_test(&srq->refcount)) + complete(&srq->free); + wait_for_completion(&srq->free); + + return 0; +} + +int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + if (!dev->issi) + return query_srq_cmd(dev, srq, out); + switch (srq->common.res) { + case MLX5_RES_XSRQ: + return query_xrc_srq_cmd(dev, srq, out); + case MLX5_RES_XRQ: + return query_xrq_cmd(dev, srq, out); + default: + return query_rmp_cmd(dev, srq, out); + } +} + +int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + u16 lwm, int is_srq) +{ + if (!dev->issi) + return arm_srq_cmd(dev, srq, lwm, is_srq); + switch (srq->common.res) { + case MLX5_RES_XSRQ: + return arm_xrc_srq_cmd(dev, srq, lwm); + case MLX5_RES_XRQ: + return arm_xrq_cmd(dev, srq, lwm); + default: + return arm_rmp_cmd(dev, srq, lwm); + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c index 690815234838..0e80ddbe2510 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -4,12 +4,8 @@ */ #include -#include #include -#include #include -#include -#include static int srq_event_notifier(struct notifier_block *nb, unsigned long type, void *data) @@ -47,636 +43,6 @@ static int srq_event_notifier(struct notifier_block *nb, return NOTIFY_OK; } -static int get_pas_size(struct mlx5_srq_attr *in) -{ - u32 log_page_size = in->log_page_size + 12; - u32 log_srq_size = in->log_size; - u32 log_rq_stride = in->wqe_shift; - u32 page_offset = in->page_offset; - u32 po_quanta = 1 << (log_page_size - 6); - u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride); - u32 page_size = 1 << log_page_size; - u32 rq_sz_po = rq_sz + (page_offset * po_quanta); - u32 rq_num_pas = DIV_ROUND_UP(rq_sz_po, page_size); - - return rq_num_pas * sizeof(u64); -} - -static void set_wq(void *wq, struct mlx5_srq_attr *in) -{ - MLX5_SET(wq, wq, wq_signature, !!(in->flags - & MLX5_SRQ_FLAG_WQ_SIG)); - MLX5_SET(wq, wq, log_wq_pg_sz, in->log_page_size); - MLX5_SET(wq, wq, log_wq_stride, in->wqe_shift + 4); - MLX5_SET(wq, wq, log_wq_sz, in->log_size); - MLX5_SET(wq, wq, page_offset, in->page_offset); - MLX5_SET(wq, wq, lwm, in->lwm); - MLX5_SET(wq, wq, pd, in->pd); - MLX5_SET64(wq, wq, dbr_addr, in->db_record); -} - -static void set_srqc(void *srqc, struct mlx5_srq_attr *in) -{ - MLX5_SET(srqc, srqc, wq_signature, !!(in->flags - & MLX5_SRQ_FLAG_WQ_SIG)); - MLX5_SET(srqc, srqc, log_page_size, in->log_page_size); - MLX5_SET(srqc, srqc, log_rq_stride, in->wqe_shift); - MLX5_SET(srqc, srqc, log_srq_size, in->log_size); - MLX5_SET(srqc, srqc, page_offset, in->page_offset); - MLX5_SET(srqc, srqc, lwm, in->lwm); - MLX5_SET(srqc, srqc, pd, in->pd); - MLX5_SET64(srqc, srqc, dbr_addr, in->db_record); - MLX5_SET(srqc, srqc, xrcd, in->xrcd); - MLX5_SET(srqc, srqc, cqn, in->cqn); -} - -static void get_wq(void *wq, struct mlx5_srq_attr *in) -{ - if (MLX5_GET(wq, wq, wq_signature)) - in->flags &= MLX5_SRQ_FLAG_WQ_SIG; - in->log_page_size = MLX5_GET(wq, wq, log_wq_pg_sz); - in->wqe_shift = MLX5_GET(wq, wq, log_wq_stride) - 4; - in->log_size = MLX5_GET(wq, wq, log_wq_sz); - in->page_offset = MLX5_GET(wq, wq, page_offset); - in->lwm = MLX5_GET(wq, wq, lwm); - in->pd = MLX5_GET(wq, wq, pd); - in->db_record = MLX5_GET64(wq, wq, dbr_addr); -} - -static void get_srqc(void *srqc, struct mlx5_srq_attr *in) -{ - if (MLX5_GET(srqc, srqc, wq_signature)) - in->flags &= MLX5_SRQ_FLAG_WQ_SIG; - in->log_page_size = MLX5_GET(srqc, srqc, log_page_size); - in->wqe_shift = MLX5_GET(srqc, srqc, log_rq_stride); - in->log_size = MLX5_GET(srqc, srqc, log_srq_size); - in->page_offset = MLX5_GET(srqc, srqc, page_offset); - in->lwm = MLX5_GET(srqc, srqc, lwm); - in->pd = MLX5_GET(srqc, srqc, pd); - in->db_record = MLX5_GET64(srqc, srqc, dbr_addr); -} - -struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn) -{ - struct mlx5_srq_table *table = &dev->priv.srq_table; - struct mlx5_core_srq *srq; - - spin_lock(&table->lock); - - srq = radix_tree_lookup(&table->tree, srqn); - if (srq) - atomic_inc(&srq->refcount); - - spin_unlock(&table->lock); - - return srq; -} -EXPORT_SYMBOL(mlx5_core_get_srq); - -static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0}; - void *create_in; - void *srqc; - void *pas; - int pas_size; - int inlen; - int err; - - pas_size = get_pas_size(in); - inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size; - create_in = kvzalloc(inlen, GFP_KERNEL); - if (!create_in) - return -ENOMEM; - - MLX5_SET(create_srq_in, create_in, uid, in->uid); - srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry); - pas = MLX5_ADDR_OF(create_srq_in, create_in, pas); - - set_srqc(srqc, in); - memcpy(pas, in->pas, pas_size); - - MLX5_SET(create_srq_in, create_in, opcode, - MLX5_CMD_OP_CREATE_SRQ); - - err = mlx5_cmd_exec(dev, create_in, inlen, create_out, - sizeof(create_out)); - kvfree(create_in); - if (!err) { - srq->srqn = MLX5_GET(create_srq_out, create_out, srqn); - srq->uid = in->uid; - } - - return err; -} - -static int destroy_srq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq) -{ - u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0}; - u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0}; - - MLX5_SET(destroy_srq_in, srq_in, opcode, - MLX5_CMD_OP_DESTROY_SRQ); - MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn); - MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid); - - return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), - srq_out, sizeof(srq_out)); -} - -static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - u16 lwm, int is_srq) -{ - u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; - u32 srq_out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; - - MLX5_SET(arm_rq_in, srq_in, opcode, MLX5_CMD_OP_ARM_RQ); - MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ); - MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn); - MLX5_SET(arm_rq_in, srq_in, lwm, lwm); - MLX5_SET(arm_rq_in, srq_in, uid, srq->uid); - - return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), - srq_out, sizeof(srq_out)); -} - -static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out) -{ - u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0}; - u32 *srq_out; - void *srqc; - int err; - - srq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_srq_out), GFP_KERNEL); - if (!srq_out) - return -ENOMEM; - - MLX5_SET(query_srq_in, srq_in, opcode, - MLX5_CMD_OP_QUERY_SRQ); - MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn); - err = mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), - srq_out, MLX5_ST_SZ_BYTES(query_srq_out)); - if (err) - goto out; - - srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry); - get_srqc(srqc, out); - if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD) - out->flags |= MLX5_SRQ_FLAG_ERR; -out: - kvfree(srq_out); - return err; -} - -static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)]; - void *create_in; - void *xrc_srqc; - void *pas; - int pas_size; - int inlen; - int err; - - pas_size = get_pas_size(in); - inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size; - create_in = kvzalloc(inlen, GFP_KERNEL); - if (!create_in) - return -ENOMEM; - - MLX5_SET(create_xrc_srq_in, create_in, uid, in->uid); - xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in, - xrc_srq_context_entry); - pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas); - - set_srqc(xrc_srqc, in); - MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index); - memcpy(pas, in->pas, pas_size); - MLX5_SET(create_xrc_srq_in, create_in, opcode, - MLX5_CMD_OP_CREATE_XRC_SRQ); - - memset(create_out, 0, sizeof(create_out)); - err = mlx5_cmd_exec(dev, create_in, inlen, create_out, - sizeof(create_out)); - if (err) - goto out; - - srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn); - srq->uid = in->uid; -out: - kvfree(create_in); - return err; -} - -static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq) -{ - u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0}; - u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0}; - - MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode, - MLX5_CMD_OP_DESTROY_XRC_SRQ); - MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); - MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid); - - return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), - xrcsrq_out, sizeof(xrcsrq_out)); -} - -static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, u16 lwm) -{ - u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; - u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; - - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm); - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid); - - return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), - xrcsrq_out, sizeof(xrcsrq_out)); -} - -static int query_xrc_srq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out) -{ - u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)]; - u32 *xrcsrq_out; - void *xrc_srqc; - int err; - - xrcsrq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out), GFP_KERNEL); - if (!xrcsrq_out) - return -ENOMEM; - memset(xrcsrq_in, 0, sizeof(xrcsrq_in)); - - MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode, - MLX5_CMD_OP_QUERY_XRC_SRQ); - MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); - - err = mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out, - MLX5_ST_SZ_BYTES(query_xrc_srq_out)); - if (err) - goto out; - - xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out, - xrc_srq_context_entry); - get_srqc(xrc_srqc, out); - if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD) - out->flags |= MLX5_SRQ_FLAG_ERR; - -out: - kvfree(xrcsrq_out); - return err; -} - -static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - void *create_in; - void *rmpc; - void *wq; - int pas_size; - int inlen; - int err; - - pas_size = get_pas_size(in); - inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; - create_in = kvzalloc(inlen, GFP_KERNEL); - if (!create_in) - return -ENOMEM; - - rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx); - wq = MLX5_ADDR_OF(rmpc, rmpc, wq); - - MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); - MLX5_SET(create_rmp_in, create_in, uid, in->uid); - set_wq(wq, in); - memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); - - err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn); - if (!err) - srq->uid = in->uid; - - kvfree(create_in); - return err; -} - -static int destroy_rmp_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq) -{ - u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {}; - u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {}; - - MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP); - MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn); - MLX5_SET(destroy_rmp_in, in, uid, srq->uid); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - -static int arm_rmp_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, - u16 lwm) -{ - void *in; - void *rmpc; - void *wq; - void *bitmask; - int err; - - in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL); - if (!in) - return -ENOMEM; - - rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx); - bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask); - wq = MLX5_ADDR_OF(rmpc, rmpc, wq); - - MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY); - MLX5_SET(modify_rmp_in, in, rmpn, srq->srqn); - MLX5_SET(modify_rmp_in, in, uid, srq->uid); - MLX5_SET(wq, wq, lwm, lwm); - MLX5_SET(rmp_bitmask, bitmask, lwm, 1); - MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); - - err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in)); - - kvfree(in); - return err; -} - -static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out) -{ - u32 *rmp_out; - void *rmpc; - int err; - - rmp_out = kvzalloc(MLX5_ST_SZ_BYTES(query_rmp_out), GFP_KERNEL); - if (!rmp_out) - return -ENOMEM; - - err = mlx5_core_query_rmp(dev, srq->srqn, rmp_out); - if (err) - goto out; - - rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context); - get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out); - if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY) - out->flags |= MLX5_SRQ_FLAG_ERR; - -out: - kvfree(rmp_out); - return err; -} - -static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - u32 create_out[MLX5_ST_SZ_DW(create_xrq_out)] = {0}; - void *create_in; - void *xrqc; - void *wq; - int pas_size; - int inlen; - int err; - - pas_size = get_pas_size(in); - inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size; - create_in = kvzalloc(inlen, GFP_KERNEL); - if (!create_in) - return -ENOMEM; - - xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context); - wq = MLX5_ADDR_OF(xrqc, xrqc, wq); - - set_wq(wq, in); - memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size); - - if (in->type == IB_SRQT_TM) { - MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING); - if (in->flags & MLX5_SRQ_FLAG_RNDV) - MLX5_SET(xrqc, xrqc, offload, MLX5_XRQC_OFFLOAD_RNDV); - MLX5_SET(xrqc, xrqc, - tag_matching_topology_context.log_matching_list_sz, - in->tm_log_list_size); - } - MLX5_SET(xrqc, xrqc, user_index, in->user_index); - MLX5_SET(xrqc, xrqc, cqn, in->cqn); - MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ); - MLX5_SET(create_xrq_in, create_in, uid, in->uid); - err = mlx5_cmd_exec(dev, create_in, inlen, create_out, - sizeof(create_out)); - kvfree(create_in); - if (!err) { - srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn); - srq->uid = in->uid; - } - - return err; -} - -static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) -{ - u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0}; - - MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ); - MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn); - MLX5_SET(destroy_xrq_in, in, uid, srq->uid); - - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - -static int arm_xrq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, - u16 lwm) -{ - u32 out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; - u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; - - MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ); - MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ); - MLX5_SET(arm_rq_in, in, srq_number, srq->srqn); - MLX5_SET(arm_rq_in, in, lwm, lwm); - MLX5_SET(arm_rq_in, in, uid, srq->uid); - - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - -static int query_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out) -{ - u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0}; - u32 *xrq_out; - int outlen = MLX5_ST_SZ_BYTES(query_xrq_out); - void *xrqc; - int err; - - xrq_out = kvzalloc(outlen, GFP_KERNEL); - if (!xrq_out) - return -ENOMEM; - - MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ); - MLX5_SET(query_xrq_in, in, xrqn, srq->srqn); - - err = mlx5_cmd_exec(dev, in, sizeof(in), xrq_out, outlen); - if (err) - goto out; - - xrqc = MLX5_ADDR_OF(query_xrq_out, xrq_out, xrq_context); - get_wq(MLX5_ADDR_OF(xrqc, xrqc, wq), out); - if (MLX5_GET(xrqc, xrqc, state) != MLX5_XRQC_STATE_GOOD) - out->flags |= MLX5_SRQ_FLAG_ERR; - out->tm_next_tag = - MLX5_GET(xrqc, xrqc, - tag_matching_topology_context.append_next_index); - out->tm_hw_phase_cnt = - MLX5_GET(xrqc, xrqc, - tag_matching_topology_context.hw_phase_cnt); - out->tm_sw_phase_cnt = - MLX5_GET(xrqc, xrqc, - tag_matching_topology_context.sw_phase_cnt); - -out: - kvfree(xrq_out); - return err; -} - -static int create_srq_split(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - if (!dev->issi) - return create_srq_cmd(dev, srq, in); - switch (srq->common.res) { - case MLX5_RES_XSRQ: - return create_xrc_srq_cmd(dev, srq, in); - case MLX5_RES_XRQ: - return create_xrq_cmd(dev, srq, in); - default: - return create_rmp_cmd(dev, srq, in); - } -} - -static int destroy_srq_split(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq) -{ - if (!dev->issi) - return destroy_srq_cmd(dev, srq); - switch (srq->common.res) { - case MLX5_RES_XSRQ: - return destroy_xrc_srq_cmd(dev, srq); - case MLX5_RES_XRQ: - return destroy_xrq_cmd(dev, srq); - default: - return destroy_rmp_cmd(dev, srq); - } -} - -int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - int err; - struct mlx5_srq_table *table = &dev->priv.srq_table; - - switch (in->type) { - case IB_SRQT_XRC: - srq->common.res = MLX5_RES_XSRQ; - break; - case IB_SRQT_TM: - srq->common.res = MLX5_RES_XRQ; - break; - default: - srq->common.res = MLX5_RES_SRQ; - } - - err = create_srq_split(dev, srq, in); - if (err) - return err; - - atomic_set(&srq->refcount, 1); - init_completion(&srq->free); - - spin_lock_irq(&table->lock); - err = radix_tree_insert(&table->tree, srq->srqn, srq); - spin_unlock_irq(&table->lock); - if (err) - goto err_destroy_srq_split; - - return 0; - -err_destroy_srq_split: - destroy_srq_split(dev, srq); - - return err; -} -EXPORT_SYMBOL(mlx5_core_create_srq); - -int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) -{ - struct mlx5_srq_table *table = &dev->priv.srq_table; - struct mlx5_core_srq *tmp; - int err; - - spin_lock_irq(&table->lock); - tmp = radix_tree_delete(&table->tree, srq->srqn); - spin_unlock_irq(&table->lock); - if (!tmp || tmp != srq) - return -EINVAL; - - err = destroy_srq_split(dev, srq); - if (err) - return err; - - if (atomic_dec_and_test(&srq->refcount)) - complete(&srq->free); - wait_for_completion(&srq->free); - - return 0; -} -EXPORT_SYMBOL(mlx5_core_destroy_srq); - -int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out) -{ - if (!dev->issi) - return query_srq_cmd(dev, srq, out); - switch (srq->common.res) { - case MLX5_RES_XSRQ: - return query_xrc_srq_cmd(dev, srq, out); - case MLX5_RES_XRQ: - return query_xrq_cmd(dev, srq, out); - default: - return query_rmp_cmd(dev, srq, out); - } -} -EXPORT_SYMBOL(mlx5_core_query_srq); - -int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - u16 lwm, int is_srq) -{ - if (!dev->issi) - return arm_srq_cmd(dev, srq, lwm, is_srq); - switch (srq->common.res) { - case MLX5_RES_XSRQ: - return arm_xrc_srq_cmd(dev, srq, lwm); - case MLX5_RES_XRQ: - return arm_xrq_cmd(dev, srq, lwm); - default: - return arm_rmp_cmd(dev, srq, lwm); - } -} -EXPORT_SYMBOL(mlx5_core_arm_srq); - void mlx5_init_srq_table(struct mlx5_core_dev *dev) { struct mlx5_srq_table *table = &dev->priv.srq_table; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index ab482124e901..c4d4b76096dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -258,49 +258,6 @@ void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn) } EXPORT_SYMBOL(mlx5_core_destroy_tis); -int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, - u32 *rmpn) -{ - u32 out[MLX5_ST_SZ_DW(create_rmp_out)] = {0}; - int err; - - MLX5_SET(create_rmp_in, in, opcode, MLX5_CMD_OP_CREATE_RMP); - err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); - if (!err) - *rmpn = MLX5_GET(create_rmp_out, out, rmpn); - - return err; -} - -int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen) -{ - u32 out[MLX5_ST_SZ_DW(modify_rmp_out)] = {0}; - - MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP); - return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); -} - -int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn) -{ - u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {0}; - - MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP); - MLX5_SET(destroy_rmp_in, in, rmpn, rmpn); - return mlx5_cmd_exec(dev, in, sizeof(in), out, - sizeof(out)); -} - -int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out) -{ - u32 in[MLX5_ST_SZ_DW(query_rmp_in)] = {0}; - int outlen = MLX5_ST_SZ_BYTES(query_rmp_out); - - MLX5_SET(query_rmp_in, in, opcode, MLX5_CMD_OP_QUERY_RMP); - MLX5_SET(query_rmp_in, in, rmpn, rmpn); - return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); -} - int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqtn) { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 27a481b159ed..1096da4fb368 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -904,13 +904,6 @@ struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, gfp_t flags, int npages); void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev, struct mlx5_cmd_mailbox *head); -int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in); -int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq); -int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out); -int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - u16 lwm, int is_srq); void mlx5_init_mkey_table(struct mlx5_core_dev *dev); void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev); int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, @@ -942,7 +935,6 @@ void mlx5_unregister_debugfs(void); void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas); void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); -struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, unsigned int *irqn); int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h index 77bc4264066d..9343306cd188 100644 --- a/include/linux/mlx5/srq.h +++ b/include/linux/mlx5/srq.h @@ -6,37 +6,6 @@ #ifndef MLX5_SRQ_H #define MLX5_SRQ_H -#include - -enum { - MLX5_SRQ_FLAG_ERR = (1 << 0), - MLX5_SRQ_FLAG_WQ_SIG = (1 << 1), - MLX5_SRQ_FLAG_RNDV = (1 << 2), -}; - -struct mlx5_srq_attr { - u32 type; - u32 flags; - u32 log_size; - u32 wqe_shift; - u32 log_page_size; - u32 wqe_cnt; - u32 srqn; - u32 xrcd; - u32 page_offset; - u32 cqn; - u32 pd; - u32 lwm; - u32 user_index; - u64 db_record; - __be64 *pas; - u32 tm_log_list_size; - u32 tm_next_tag; - u32 tm_hw_phase_cnt; - u32 tm_sw_phase_cnt; - u16 uid; -}; - struct mlx5_core_dev; void mlx5_init_srq_table(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h index 39ebb699875b..a261d5528ff7 100644 --- a/include/linux/mlx5/transobj.h +++ b/include/linux/mlx5/transobj.h @@ -58,12 +58,6 @@ int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen, int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in, int inlen); void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn); -int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, - u32 *rmpn); -int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen); -int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn); -int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out); - int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqtn); int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in, -- cgit v1.2.3 From c48d386b2bbb5b7546a409afb611346ba9520f73 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 28 Nov 2018 20:53:38 +0200 Subject: RDMA/mlx5: Remove SRQ signature global flag SRQ signature is not supported, hence no need for special static global variable to announce it. Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/srq.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index a86d9f153805..154fd0d3718e 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -12,9 +12,6 @@ #include "mlx5_ib.h" #include "srq.h" -/* not supported currently */ -static int srq_signature; - static void *get_wqe(struct mlx5_ib_srq *srq, int n) { return mlx5_buf_offset(&srq->buf, n << srq->msrq.wqe_shift); @@ -175,7 +172,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, err = -ENOMEM; goto err_in; } - srq->wq_sig = !!srq_signature; + srq->wq_sig = 0; in->log_page_size = srq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT; if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && -- cgit v1.2.3 From 81773ce5f07fd4c3acd4bb7e918ce03a4e652a5f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 28 Nov 2018 20:53:39 +0200 Subject: RDMA/mlx5: Use stages for callback to setup and release DEVX Reuse existing infrastructure to initialize and release DEVX uid. The DevX interface is intended for user space access, so it is supposed to be initialized before ib_register_device(). Also it isn't supported in switchdev mode and don't need to initialize it in that mode. Fixes: 76dc5a8406bf ("IB/mlx5: Manage device uid for DEVX white list commands") Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 26 +++++++++++++++++++------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 7e6af18e7d82..56472fa3e18b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6237,6 +6237,22 @@ static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev) mlx5_notifier_unregister(dev->mdev, &dev->mdev_events); } +static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev) +{ + int uid; + + uid = mlx5_ib_devx_create(dev); + if (uid > 0) + dev->devx_whitelist_uid = uid; + + return 0; +} +static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev) +{ + if (dev->devx_whitelist_uid) + mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid); +} + void __mlx5_ib_remove(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile, int stage) @@ -6248,8 +6264,6 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev, profile->stage[stage].cleanup(dev); } - if (dev->devx_whitelist_uid) - mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid); ib_dealloc_device((struct ib_device *)dev); } @@ -6258,7 +6272,6 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev, { int err; int i; - int uid; for (i = 0; i < MLX5_IB_STAGE_MAX; i++) { if (profile->stage[i].init) { @@ -6268,10 +6281,6 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev, } } - uid = mlx5_ib_devx_create(dev); - if (uid > 0) - dev->devx_whitelist_uid = uid; - dev->profile = profile; dev->ib_active = true; @@ -6326,6 +6335,9 @@ static const struct mlx5_ib_profile pf_profile = { STAGE_CREATE(MLX5_IB_STAGE_SPECS, mlx5_ib_stage_populate_specs, NULL), + STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID, + mlx5_ib_stage_devx_init, + mlx5_ib_stage_devx_cleanup), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 7326fb8710e0..332d5c4d8ab3 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -783,6 +783,7 @@ enum mlx5_ib_stages { MLX5_IB_STAGE_BFREG, MLX5_IB_STAGE_PRE_IB_REG_UMR, MLX5_IB_STAGE_SPECS, + MLX5_IB_STAGE_WHITELIST_UID, MLX5_IB_STAGE_IB_REG, MLX5_IB_STAGE_POST_IB_REG_UMR, MLX5_IB_STAGE_DELAY_DROP, -- cgit v1.2.3 From b4990804e1a46321806df6b56e3f04f1803a03d9 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 28 Nov 2018 20:53:40 +0200 Subject: RDMA/mlx5: Update SRQ functions signatures to mlx5_ib format Reflect the change of moving SRQ code from mlx5_core to mlx5_ib by updating function signatures do not require mlx5_core_dev as an input, because all operations in mlx5_ib are supposed to use mlx5_ib_dev. Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/cq.c | 3 +- drivers/infiniband/hw/mlx5/srq.c | 10 +-- drivers/infiniband/hw/mlx5/srq.h | 18 ++--- drivers/infiniband/hw/mlx5/srq_cmd.c | 129 ++++++++++++++++++----------------- 4 files changed, 83 insertions(+), 77 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index c5d2824ada59..0b99f7d0630d 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -178,8 +178,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, struct mlx5_core_srq *msrq = NULL; if (qp->ibqp.xrcd) { - msrq = mlx5_core_get_srq(dev->mdev, - be32_to_cpu(cqe->srqn)); + msrq = mlx5_cmd_get_srq(dev, be32_to_cpu(cqe->srqn)); srq = to_mibsrq(msrq); } else { srq = to_msrq(qp->ibqp.srq); diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 154fd0d3718e..2b184c7f531a 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -297,7 +297,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, in.pd = to_mpd(pd)->pdn; in.db_record = srq->db.dma; - err = mlx5_core_create_srq(dev->mdev, &srq->msrq, &in); + err = mlx5_cmd_create_srq(dev, &srq->msrq, &in); kvfree(in.pas); if (err) { mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err); @@ -321,7 +321,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, return &srq->ibsrq; err_core: - mlx5_core_destroy_srq(dev->mdev, &srq->msrq); + mlx5_cmd_destroy_srq(dev, &srq->msrq); err_usr_kern_srq: if (pd->uobject) @@ -351,7 +351,7 @@ int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, return -EINVAL; mutex_lock(&srq->mutex); - ret = mlx5_core_arm_srq(dev->mdev, &srq->msrq, attr->srq_limit, 1); + ret = mlx5_cmd_arm_srq(dev, &srq->msrq, attr->srq_limit, 1); mutex_unlock(&srq->mutex); if (ret) @@ -372,7 +372,7 @@ int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) if (!out) return -ENOMEM; - ret = mlx5_core_query_srq(dev->mdev, &srq->msrq, out); + ret = mlx5_cmd_query_srq(dev, &srq->msrq, out); if (ret) goto out_box; @@ -390,7 +390,7 @@ int mlx5_ib_destroy_srq(struct ib_srq *srq) struct mlx5_ib_dev *dev = to_mdev(srq->device); struct mlx5_ib_srq *msrq = to_msrq(srq); - mlx5_core_destroy_srq(dev->mdev, &msrq->msrq); + mlx5_cmd_destroy_srq(dev, &msrq->msrq); if (srq->uobject) { mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db); diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h index f23d5de12973..1110aeaa775e 100644 --- a/drivers/infiniband/hw/mlx5/srq.h +++ b/drivers/infiniband/hw/mlx5/srq.h @@ -35,12 +35,14 @@ struct mlx5_srq_attr { u16 uid; }; -int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in); -int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq); -int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out); -int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - u16 lwm, int is_srq); -struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); +struct mlx5_ib_dev; + +int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in); +int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq); +int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out); +int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + u16 lwm, int is_srq); +struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn); #endif /* MLX5_IB_SRQ_H */ diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index 4a64ad4c9b25..fdb9443f49f0 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -6,6 +6,7 @@ #include #include #include +#include "mlx5_ib.h" #include "srq.h" static int get_pas_size(struct mlx5_srq_attr *in) @@ -77,11 +78,14 @@ static void get_srqc(void *srqc, struct mlx5_srq_attr *in) in->db_record = MLX5_GET64(srqc, srqc, dbr_addr); } -struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn) +struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn) { - struct mlx5_srq_table *table = &dev->priv.srq_table; + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_srq_table *table; struct mlx5_core_srq *srq; + table = &mdev->priv.srq_table; + spin_lock(&table->lock); srq = radix_tree_lookup(&table->tree, srqn); @@ -93,7 +97,7 @@ struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn) return srq; } -static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, +static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *in) { u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0}; @@ -120,7 +124,7 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(create_srq_in, create_in, opcode, MLX5_CMD_OP_CREATE_SRQ); - err = mlx5_cmd_exec(dev, create_in, inlen, create_out, + err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, sizeof(create_out)); kvfree(create_in); if (!err) { @@ -131,8 +135,7 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, return err; } -static int destroy_srq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq) +static int destroy_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) { u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0}; u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0}; @@ -142,11 +145,11 @@ static int destroy_srq_cmd(struct mlx5_core_dev *dev, MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn); MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid); - return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), - srq_out, sizeof(srq_out)); + return mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out, + sizeof(srq_out)); } -static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, +static int arm_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, u16 lwm, int is_srq) { u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; @@ -158,11 +161,11 @@ static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(arm_rq_in, srq_in, lwm, lwm); MLX5_SET(arm_rq_in, srq_in, uid, srq->uid); - return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), - srq_out, sizeof(srq_out)); + return mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out, + sizeof(srq_out)); } -static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, +static int query_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *out) { u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0}; @@ -177,8 +180,8 @@ static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(query_srq_in, srq_in, opcode, MLX5_CMD_OP_QUERY_SRQ); MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn); - err = mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), - srq_out, MLX5_ST_SZ_BYTES(query_srq_out)); + err = mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out, + MLX5_ST_SZ_BYTES(query_srq_out)); if (err) goto out; @@ -191,7 +194,7 @@ out: return err; } -static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, +static int create_xrc_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *in) { @@ -221,7 +224,7 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, MLX5_CMD_OP_CREATE_XRC_SRQ); memset(create_out, 0, sizeof(create_out)); - err = mlx5_cmd_exec(dev, create_in, inlen, create_out, + err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, sizeof(create_out)); if (err) goto out; @@ -233,7 +236,7 @@ out: return err; } -static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev, +static int destroy_xrc_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) { u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0}; @@ -244,12 +247,12 @@ static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev, MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid); - return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), + return mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out, sizeof(xrcsrq_out)); } -static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, u16 lwm) +static int arm_xrc_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + u16 lwm) { u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; @@ -260,11 +263,11 @@ static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev, MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm); MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid); - return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), + return mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out, sizeof(xrcsrq_out)); } -static int query_xrc_srq_cmd(struct mlx5_core_dev *dev, +static int query_xrc_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *out) { @@ -282,8 +285,8 @@ static int query_xrc_srq_cmd(struct mlx5_core_dev *dev, MLX5_CMD_OP_QUERY_XRC_SRQ); MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); - err = mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out, - MLX5_ST_SZ_BYTES(query_xrc_srq_out)); + err = mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, MLX5_ST_SZ_BYTES(query_xrc_srq_out)); if (err) goto out; @@ -298,39 +301,39 @@ out: return err; } -static int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, +static int mlx5_core_create_rmp(struct mlx5_ib_dev *dev, u32 *in, int inlen, u32 *rmpn) { u32 out[MLX5_ST_SZ_DW(create_rmp_out)] = { 0 }; int err; MLX5_SET(create_rmp_in, in, opcode, MLX5_CMD_OP_CREATE_RMP); - err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + err = mlx5_cmd_exec(dev->mdev, in, inlen, out, sizeof(out)); if (!err) *rmpn = MLX5_GET(create_rmp_out, out, rmpn); return err; } -static int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen) +static int mlx5_core_modify_rmp(struct mlx5_ib_dev *dev, u32 *in, int inlen) { u32 out[MLX5_ST_SZ_DW(modify_rmp_out)] = {0}; MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP); - return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + return mlx5_cmd_exec(dev->mdev, in, inlen, out, sizeof(out)); } -static int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out) +static int mlx5_core_query_rmp(struct mlx5_ib_dev *dev, u32 rmpn, u32 *out) { u32 in[MLX5_ST_SZ_DW(query_rmp_in)] = {0}; int outlen = MLX5_ST_SZ_BYTES(query_rmp_out); MLX5_SET(query_rmp_in, in, opcode, MLX5_CMD_OP_QUERY_RMP); MLX5_SET(query_rmp_in, in, rmpn, rmpn); - return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); + return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, outlen); } -static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, +static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *in) { void *create_in; @@ -362,8 +365,7 @@ static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, return err; } -static int destroy_rmp_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq) +static int destroy_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) { u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {}; u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {}; @@ -371,11 +373,10 @@ static int destroy_rmp_cmd(struct mlx5_core_dev *dev, MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP); MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn); MLX5_SET(destroy_rmp_in, in, uid, srq->uid); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); } -static int arm_rmp_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, +static int arm_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, u16 lwm) { void *in; @@ -405,7 +406,7 @@ static int arm_rmp_cmd(struct mlx5_core_dev *dev, return err; } -static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, +static int query_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *out) { u32 *rmp_out; @@ -430,7 +431,7 @@ out: return err; } -static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, +static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *in) { u32 create_out[MLX5_ST_SZ_DW(create_xrq_out)] = {0}; @@ -465,7 +466,7 @@ static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(xrqc, xrqc, cqn, in->cqn); MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ); MLX5_SET(create_xrq_in, create_in, uid, in->uid); - err = mlx5_cmd_exec(dev, create_in, inlen, create_out, + err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, sizeof(create_out)); kvfree(create_in); if (!err) { @@ -476,7 +477,7 @@ static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, return err; } -static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) +static int destroy_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) { u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0}; u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0}; @@ -485,10 +486,10 @@ static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn); MLX5_SET(destroy_xrq_in, in, uid, srq->uid); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); } -static int arm_xrq_cmd(struct mlx5_core_dev *dev, +static int arm_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, u16 lwm) { @@ -501,10 +502,10 @@ static int arm_xrq_cmd(struct mlx5_core_dev *dev, MLX5_SET(arm_rq_in, in, lwm, lwm); MLX5_SET(arm_rq_in, in, uid, srq->uid); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); } -static int query_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, +static int query_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *out) { u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0}; @@ -520,7 +521,7 @@ static int query_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ); MLX5_SET(query_xrq_in, in, xrqn, srq->srqn); - err = mlx5_cmd_exec(dev, in, sizeof(in), xrq_out, outlen); + err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), xrq_out, outlen); if (err) goto out; @@ -543,11 +544,10 @@ out: return err; } -static int create_srq_split(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, +static int create_srq_split(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *in) { - if (!dev->issi) + if (!dev->mdev->issi) return create_srq_cmd(dev, srq, in); switch (srq->common.res) { case MLX5_RES_XSRQ: @@ -559,10 +559,9 @@ static int create_srq_split(struct mlx5_core_dev *dev, } } -static int destroy_srq_split(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq) +static int destroy_srq_split(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) { - if (!dev->issi) + if (!dev->mdev->issi) return destroy_srq_cmd(dev, srq); switch (srq->common.res) { case MLX5_RES_XSRQ: @@ -574,11 +573,14 @@ static int destroy_srq_split(struct mlx5_core_dev *dev, } } -int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) +int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) { + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_srq_table *table; int err; - struct mlx5_srq_table *table = &dev->priv.srq_table; + + table = &mdev->priv.srq_table; switch (in->type) { case IB_SRQT_XRC: @@ -612,12 +614,15 @@ err_destroy_srq_split: return err; } -int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) +int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) { - struct mlx5_srq_table *table = &dev->priv.srq_table; + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_srq_table *table; struct mlx5_core_srq *tmp; int err; + table = &mdev->priv.srq_table; + spin_lock_irq(&table->lock); tmp = radix_tree_delete(&table->tree, srq->srqn); spin_unlock_irq(&table->lock); @@ -635,10 +640,10 @@ int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) return 0; } -int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out) +int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) { - if (!dev->issi) + if (!dev->mdev->issi) return query_srq_cmd(dev, srq, out); switch (srq->common.res) { case MLX5_RES_XSRQ: @@ -650,10 +655,10 @@ int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, } } -int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - u16 lwm, int is_srq) +int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + u16 lwm, int is_srq) { - if (!dev->issi) + if (!dev->mdev->issi) return arm_srq_cmd(dev, srq, lwm, is_srq); switch (srq->common.res) { case MLX5_RES_XSRQ: -- cgit v1.2.3 From f3da6577da67a3cd44610ca54e308c6838c92157 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 28 Nov 2018 20:53:41 +0200 Subject: RDMA/mlx5: Initialize SRQ tables on mlx5_ib Transfer initialization and cleanup from mlx5_priv struct of mlx5_core_dev to be part of mlx5_ib_dev. This completes removal of SRQ from mlx5_core. Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/ib_rep.c | 4 ++ drivers/infiniband/hw/mlx5/main.c | 7 +++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 +- drivers/infiniband/hw/mlx5/srq.c | 1 - drivers/infiniband/hw/mlx5/srq.h | 25 ++++++++ drivers/infiniband/hw/mlx5/srq_cmd.c | 72 ++++++++++++++++++++---- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 5 -- drivers/net/ethernet/mellanox/mlx5/core/srq.c | 63 --------------------- include/linux/mlx5/driver.h | 25 -------- include/linux/mlx5/srq.h | 14 ----- 11 files changed, 101 insertions(+), 122 deletions(-) delete mode 100644 drivers/net/ethernet/mellanox/mlx5/core/srq.c delete mode 100644 include/linux/mlx5/srq.h diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 584ff2ea7810..8a682d86d634 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -4,6 +4,7 @@ */ #include "ib_rep.h" +#include "srq.h" static const struct mlx5_ib_profile rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_INIT, @@ -21,6 +22,9 @@ static const struct mlx5_ib_profile rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_ROCE, mlx5_ib_stage_rep_roce_init, mlx5_ib_stage_rep_roce_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_SRQ, + mlx5_init_srq_table, + mlx5_cleanup_srq_table), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, mlx5_ib_stage_dev_res_init, mlx5_ib_stage_dev_res_cleanup), diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 56472fa3e18b..96515a8c9d2c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -60,6 +60,7 @@ #include "mlx5_ib.h" #include "ib_rep.h" #include "cmd.h" +#include "srq.h" #include #include #include @@ -6308,6 +6309,9 @@ static const struct mlx5_ib_profile pf_profile = { STAGE_CREATE(MLX5_IB_STAGE_ROCE, mlx5_ib_stage_roce_init, mlx5_ib_stage_roce_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_SRQ, + mlx5_init_srq_table, + mlx5_cleanup_srq_table), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, mlx5_ib_stage_dev_res_init, mlx5_ib_stage_dev_res_cleanup), @@ -6365,6 +6369,9 @@ static const struct mlx5_ib_profile nic_rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_ROCE, mlx5_ib_stage_rep_roce_init, mlx5_ib_stage_rep_roce_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_SRQ, + mlx5_init_srq_table, + mlx5_cleanup_srq_table), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, mlx5_ib_stage_dev_res_init, mlx5_ib_stage_dev_res_cleanup), diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 332d5c4d8ab3..861b68f2e330 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -41,7 +41,6 @@ #include #include #include -#include #include #include #include @@ -50,6 +49,8 @@ #include #include +#include "srq.h" + #define mlx5_ib_dbg(_dev, format, arg...) \ dev_dbg(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \ __LINE__, current->pid, ##arg) @@ -774,6 +775,7 @@ enum mlx5_ib_stages { MLX5_IB_STAGE_CAPS, MLX5_IB_STAGE_NON_DEFAULT_CB, MLX5_IB_STAGE_ROCE, + MLX5_IB_STAGE_SRQ, MLX5_IB_STAGE_DEVICE_RESOURCES, MLX5_IB_STAGE_DEVICE_NOTIFIER, MLX5_IB_STAGE_ODP, @@ -942,6 +944,7 @@ struct mlx5_ib_dev { u64 sys_image_guid; struct mlx5_memic memic; u16 devx_whitelist_uid; + struct mlx5_srq_table srq_table; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 2b184c7f531a..91dcd3918d96 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -5,7 +5,6 @@ #include #include -#include #include #include #include diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h index 1110aeaa775e..75eb5839ae95 100644 --- a/drivers/infiniband/hw/mlx5/srq.h +++ b/drivers/infiniband/hw/mlx5/srq.h @@ -37,6 +37,28 @@ struct mlx5_srq_attr { struct mlx5_ib_dev; +struct mlx5_core_srq { + struct mlx5_core_rsc_common common; /* must be first */ + u32 srqn; + int max; + size_t max_gs; + size_t max_avail_gather; + int wqe_shift; + void (*event)(struct mlx5_core_srq *srq, enum mlx5_event e); + + atomic_t refcount; + struct completion free; + u16 uid; +}; + +struct mlx5_srq_table { + struct notifier_block nb; + /* protect radix tree + */ + spinlock_t lock; + struct radix_tree_root tree; +}; + int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *in); int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq); @@ -45,4 +67,7 @@ int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, u16 lwm, int is_srq); struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn); + +int mlx5_init_srq_table(struct mlx5_ib_dev *dev); +void mlx5_cleanup_srq_table(struct mlx5_ib_dev *dev); #endif /* MLX5_IB_SRQ_H */ diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index fdb9443f49f0..6be89c6be40f 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -80,12 +80,9 @@ static void get_srqc(void *srqc, struct mlx5_srq_attr *in) struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn) { - struct mlx5_core_dev *mdev = dev->mdev; - struct mlx5_srq_table *table; + struct mlx5_srq_table *table = &dev->srq_table; struct mlx5_core_srq *srq; - table = &mdev->priv.srq_table; - spin_lock(&table->lock); srq = radix_tree_lookup(&table->tree, srqn); @@ -576,12 +573,9 @@ static int destroy_srq_split(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *in) { - struct mlx5_core_dev *mdev = dev->mdev; - struct mlx5_srq_table *table; + struct mlx5_srq_table *table = &dev->srq_table; int err; - table = &mdev->priv.srq_table; - switch (in->type) { case IB_SRQT_XRC: srq->common.res = MLX5_RES_XSRQ; @@ -616,13 +610,10 @@ err_destroy_srq_split: int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) { - struct mlx5_core_dev *mdev = dev->mdev; - struct mlx5_srq_table *table; + struct mlx5_srq_table *table = &dev->srq_table; struct mlx5_core_srq *tmp; int err; - table = &mdev->priv.srq_table; - spin_lock_irq(&table->lock); tmp = radix_tree_delete(&table->tree, srq->srqn); spin_unlock_irq(&table->lock); @@ -669,3 +660,60 @@ int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, return arm_rmp_cmd(dev, srq, lwm); } } + +static int srq_event_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_srq_table *table; + struct mlx5_core_srq *srq; + struct mlx5_eqe *eqe; + u32 srqn; + + if (type != MLX5_EVENT_TYPE_SRQ_CATAS_ERROR && + type != MLX5_EVENT_TYPE_SRQ_RQ_LIMIT) + return NOTIFY_DONE; + + table = container_of(nb, struct mlx5_srq_table, nb); + + eqe = data; + srqn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; + + spin_lock(&table->lock); + + srq = radix_tree_lookup(&table->tree, srqn); + if (srq) + atomic_inc(&srq->refcount); + + spin_unlock(&table->lock); + + if (!srq) + return NOTIFY_OK; + + srq->event(srq, eqe->type); + + if (atomic_dec_and_test(&srq->refcount)) + complete(&srq->free); + + return NOTIFY_OK; +} + +int mlx5_init_srq_table(struct mlx5_ib_dev *dev) +{ + struct mlx5_srq_table *table = &dev->srq_table; + + memset(table, 0, sizeof(*table)); + spin_lock_init(&table->lock); + INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); + + table->nb.notifier_call = srq_event_notifier; + mlx5_notifier_register(dev->mdev, &table->nb); + + return 0; +} + +void mlx5_cleanup_srq_table(struct mlx5_ib_dev *dev) +{ + struct mlx5_srq_table *table = &dev->srq_table; + + mlx5_notifier_unregister(dev->mdev, &table->nb); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 26afe0779a0c..d499b3d00348 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -12,7 +12,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o # mlx5 core basic # mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ - health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ + health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \ mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ diag/fs_tracepoint.o diag/fw_tracer.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 4bc27a073dc4..778995573812 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -43,7 +43,6 @@ #include #include #include -#include #include #include #include @@ -749,8 +748,6 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_init_qp_table(dev); - mlx5_init_srq_table(dev); - mlx5_init_mkey_table(dev); mlx5_init_reserved_gids(dev); @@ -804,7 +801,6 @@ err_rl_cleanup: err_tables_cleanup: mlx5_vxlan_destroy(dev->vxlan); mlx5_cleanup_mkey_table(dev); - mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cq_debugfs_cleanup(dev); err_events_cleanup: @@ -828,7 +824,6 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_cleanup_clock(dev); mlx5_cleanup_reserved_gids(dev); mlx5_cleanup_mkey_table(dev); - mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cq_debugfs_cleanup(dev); mlx5_events_cleanup(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c deleted file mode 100644 index 0e80ddbe2510..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ /dev/null @@ -1,63 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB -/* - * Copyright (c) 2013-2018, Mellanox Technologies inc. All rights reserved. - */ - -#include -#include -#include - -static int srq_event_notifier(struct notifier_block *nb, - unsigned long type, void *data) -{ - struct mlx5_srq_table *table; - struct mlx5_core_srq *srq; - struct mlx5_eqe *eqe; - u32 srqn; - - if (type != MLX5_EVENT_TYPE_SRQ_CATAS_ERROR && - type != MLX5_EVENT_TYPE_SRQ_RQ_LIMIT) - return NOTIFY_DONE; - - table = container_of(nb, struct mlx5_srq_table, nb); - - eqe = data; - srqn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; - - spin_lock(&table->lock); - - srq = radix_tree_lookup(&table->tree, srqn); - if (srq) - atomic_inc(&srq->refcount); - - spin_unlock(&table->lock); - - if (!srq) - return NOTIFY_OK; - - srq->event(srq, eqe->type); - - if (atomic_dec_and_test(&srq->refcount)) - complete(&srq->free); - - return NOTIFY_OK; -} - -void mlx5_init_srq_table(struct mlx5_core_dev *dev) -{ - struct mlx5_srq_table *table = &dev->priv.srq_table; - - memset(table, 0, sizeof(*table)); - spin_lock_init(&table->lock); - INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); - - table->nb.notifier_call = srq_event_notifier; - mlx5_notifier_register(dev, &table->nb); -} - -void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev) -{ - struct mlx5_srq_table *table = &dev->priv.srq_table; - - mlx5_notifier_unregister(dev, &table->nb); -} diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 1096da4fb368..584d8a5df7eb 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -50,7 +50,6 @@ #include #include -#include #include #include #include @@ -393,20 +392,6 @@ struct mlx5_core_rsc_common { struct completion free; }; -struct mlx5_core_srq { - struct mlx5_core_rsc_common common; /* must be first */ - u32 srqn; - int max; - size_t max_gs; - size_t max_avail_gather; - int wqe_shift; - void (*event) (struct mlx5_core_srq *, enum mlx5_event); - - atomic_t refcount; - struct completion free; - u16 uid; -}; - struct mlx5_uars_page { void __iomem *map; bool wc; @@ -464,14 +449,6 @@ struct mlx5_qp_table { struct radix_tree_root tree; }; -struct mlx5_srq_table { - struct notifier_block nb; - /* protect radix tree - */ - spinlock_t lock; - struct radix_tree_root tree; -}; - struct mlx5_mkey_table { /* protect radix tree */ @@ -547,8 +524,6 @@ struct mlx5_priv { struct mlx5_core_health health; - struct mlx5_srq_table srq_table; - /* start: qp staff */ struct mlx5_qp_table qp_table; struct dentry *qp_debugfs; diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h deleted file mode 100644 index 9343306cd188..000000000000 --- a/include/linux/mlx5/srq.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ -/* - * Copyright (c) 2013-2018, Mellanox Technologies. All rights reserved. - */ - -#ifndef MLX5_SRQ_H -#define MLX5_SRQ_H - -struct mlx5_core_dev; - -void mlx5_init_srq_table(struct mlx5_core_dev *dev); -void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev); - -#endif /* MLX5_SRQ_H */ -- cgit v1.2.3 From a1eb180238a71797d84bd91acfb1de1d632623c5 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 28 Nov 2018 20:53:42 +0200 Subject: RDMA/mlx5: Unfold create RMP function There is no need to perform create_rmp in two separate function, while one of them uses stack as a placeholder for data while other allocates it dynamically. Combine those two functions to one instead of two. Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/srq_cmd.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index 6be89c6be40f..955df638b416 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -298,20 +298,6 @@ out: return err; } -static int mlx5_core_create_rmp(struct mlx5_ib_dev *dev, u32 *in, int inlen, - u32 *rmpn) -{ - u32 out[MLX5_ST_SZ_DW(create_rmp_out)] = { 0 }; - int err; - - MLX5_SET(create_rmp_in, in, opcode, MLX5_CMD_OP_CREATE_RMP); - err = mlx5_cmd_exec(dev->mdev, in, inlen, out, sizeof(out)); - if (!err) - *rmpn = MLX5_GET(create_rmp_out, out, rmpn); - - return err; -} - static int mlx5_core_modify_rmp(struct mlx5_ib_dev *dev, u32 *in, int inlen) { u32 out[MLX5_ST_SZ_DW(modify_rmp_out)] = {0}; @@ -333,18 +319,24 @@ static int mlx5_core_query_rmp(struct mlx5_ib_dev *dev, u32 rmpn, u32 *out) static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *in) { - void *create_in; + void *create_out = NULL; + void *create_in = NULL; void *rmpc; void *wq; int pas_size; + int outlen; int inlen; int err; pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; + outlen = MLX5_ST_SZ_BYTES(create_rmp_out); create_in = kvzalloc(inlen, GFP_KERNEL); - if (!create_in) - return -ENOMEM; + create_out = kvzalloc(outlen, GFP_KERNEL); + if (!create_in || !create_out) { + err = -ENOMEM; + goto out; + } rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx); wq = MLX5_ADDR_OF(rmpc, rmpc, wq); @@ -354,11 +346,16 @@ static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, set_wq(wq, in); memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); - err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn); - if (!err) + MLX5_SET(create_rmp_in, create_in, opcode, MLX5_CMD_OP_CREATE_RMP); + err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, outlen); + if (!err) { + srq->srqn = MLX5_GET(create_rmp_out, create_out, rmpn); srq->uid = in->uid; + } +out: kvfree(create_in); + kvfree(create_out); return err; } -- cgit v1.2.3 From 36ff48805ae457d79886147913ad07056780dca0 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 28 Nov 2018 20:53:43 +0200 Subject: RDMA/mlx5: Unfold modify RMP function There is no need to perform modify_rmp in two separate function, while one of them uses stack as a placeholder for data while other allocates it dynamically. Combine those two functions to one call instead of two. Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/srq_cmd.c | 62 ++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index 955df638b416..7aaaffbd4afa 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -298,24 +298,6 @@ out: return err; } -static int mlx5_core_modify_rmp(struct mlx5_ib_dev *dev, u32 *in, int inlen) -{ - u32 out[MLX5_ST_SZ_DW(modify_rmp_out)] = {0}; - - MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP); - return mlx5_cmd_exec(dev->mdev, in, inlen, out, sizeof(out)); -} - -static int mlx5_core_query_rmp(struct mlx5_ib_dev *dev, u32 rmpn, u32 *out) -{ - u32 in[MLX5_ST_SZ_DW(query_rmp_in)] = {0}; - int outlen = MLX5_ST_SZ_BYTES(query_rmp_out); - - MLX5_SET(query_rmp_in, in, opcode, MLX5_CMD_OP_QUERY_RMP); - MLX5_SET(query_rmp_in, in, rmpn, rmpn); - return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, outlen); -} - static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *in) { @@ -373,15 +355,24 @@ static int destroy_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) static int arm_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, u16 lwm) { - void *in; + void *out = NULL; + void *in = NULL; void *rmpc; void *wq; void *bitmask; + int outlen; + int inlen; int err; - in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL); - if (!in) - return -ENOMEM; + inlen = MLX5_ST_SZ_BYTES(modify_rmp_in); + outlen = MLX5_ST_SZ_BYTES(modify_rmp_out); + + in = kvzalloc(inlen, GFP_KERNEL); + out = kvzalloc(outlen, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx); bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask); @@ -393,25 +384,39 @@ static int arm_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(wq, wq, lwm, lwm); MLX5_SET(rmp_bitmask, bitmask, lwm, 1); MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP); - err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in)); + err = mlx5_cmd_exec(dev->mdev, in, inlen, out, outlen); +out: kvfree(in); + kvfree(out); return err; } static int query_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *out) { - u32 *rmp_out; + u32 *rmp_out = NULL; + u32 *rmp_in = NULL; void *rmpc; + int outlen; + int inlen; int err; - rmp_out = kvzalloc(MLX5_ST_SZ_BYTES(query_rmp_out), GFP_KERNEL); - if (!rmp_out) - return -ENOMEM; + outlen = MLX5_ST_SZ_BYTES(query_rmp_out); + inlen = MLX5_ST_SZ_BYTES(query_rmp_in); + + rmp_out = kvzalloc(outlen, GFP_KERNEL); + rmp_in = kvzalloc(inlen, GFP_KERNEL); + if (!rmp_out || !rmp_in) { + err = -ENOMEM; + goto out; + } - err = mlx5_core_query_rmp(dev, srq->srqn, rmp_out); + MLX5_SET(query_rmp_in, rmp_in, opcode, MLX5_CMD_OP_QUERY_RMP); + MLX5_SET(query_rmp_in, rmp_in, rmpn, srq->srqn); + err = mlx5_cmd_exec(dev->mdev, rmp_in, inlen, rmp_out, outlen); if (err) goto out; @@ -422,6 +427,7 @@ static int query_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, out: kvfree(rmp_out); + kvfree(rmp_in); return err; } -- cgit v1.2.3 From 9d43faac02e3a4a26171f96f4de69fa650d3b6f6 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 26 Nov 2018 08:28:32 +0200 Subject: net/mlx5: Update mlx5_ifc with DEVX UCTX capabilities bits Expose device capabilities for DEVX user context, it includes which caps the device is supported and a matching bit to set as part of user context creation. Signed-off-by: Yishai Hadas Reviewed-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6f64e814cc10..ece1b606c909 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -883,6 +883,10 @@ enum { MLX5_CAP_UMR_FENCE_NONE = 0x2, }; +enum { + MLX5_UCTX_CAP_RAW_TX = 1UL << 0, +}; + struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_0[0x30]; u8 vhca_id[0x10]; @@ -1193,7 +1197,13 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 num_vhca_ports[0x8]; u8 reserved_at_618[0x6]; u8 sw_owner_id[0x1]; - u8 reserved_at_61f[0x1e1]; + u8 reserved_at_61f[0x1]; + + u8 reserved_at_620[0x80]; + + u8 uctx_cap[0x20]; + + u8 reserved_at_6c0[0x140]; }; enum mlx5_flow_destination_type { @@ -9276,7 +9286,9 @@ struct mlx5_ifc_umem_bits { struct mlx5_ifc_uctx_bits { u8 modify_field_select[0x40]; - u8 reserved_at_40[0x1c0]; + u8 cap[0x20]; + + u8 reserved_at_60[0x1a0]; }; struct mlx5_ifc_create_umem_in_bits { -- cgit v1.2.3 From 3fd3c80acc172fcaab2356c15e5e3c05758a9d51 Mon Sep 17 00:00:00 2001 From: Danit Goldberg Date: Fri, 30 Nov 2018 13:22:04 +0200 Subject: net/mlx5: Expose packet based credit mode Packet based credit mode bit determines whether the credit mode is done per message or packet. Expose the QP creation flag and the HCA capability. Signed-off-by: Danit Goldberg Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ece1b606c909..91d6e85e3cef 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1047,7 +1047,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 vector_calc[0x1]; u8 umr_ptr_rlky[0x1]; u8 imaicl[0x1]; - u8 reserved_at_232[0x4]; + u8 qp_packet_based[0x1]; + u8 reserved_at_233[0x3]; u8 qkv[0x1]; u8 pkv[0x1]; u8 set_deth_sqpn[0x1]; @@ -2259,7 +2260,8 @@ struct mlx5_ifc_qpc_bits { u8 st[0x8]; u8 reserved_at_10[0x3]; u8 pm_state[0x2]; - u8 reserved_at_15[0x3]; + u8 reserved_at_15[0x1]; + u8 req_e2e_credit_mode[0x2]; u8 offload_type[0x4]; u8 end_padding_mode[0x2]; u8 reserved_at_1e[0x2]; -- cgit v1.2.3 From fe206c209314f3cebe232518db9b5ad2606e34f1 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 4 Dec 2018 18:03:00 -0800 Subject: net/mlx5: When fetching CQEs return CQE instead of void pointer The function is only used to retrieve CQEs, use the proper type as the return value. Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/wq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index b1293d153a58..9bc2184a46bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -177,7 +177,7 @@ static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq) return mlx5_cqwq_ctr2ix(wq, wq->cc); } -static inline void *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix) +static inline struct mlx5_cqe64 *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix) { return mlx5_frag_buf_get_wqe(&wq->fbc, ix); } -- cgit v1.2.3 From 6254adeb1f6943a66cbed892a5f683400b9db194 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 4 Dec 2018 18:03:01 -0800 Subject: net/mlx5: Use helper to get CQE opcode Introduce and use a helper that extracts the opcode from a CQE (completion queue entry) structure. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 10 +++++----- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c | 2 +- include/linux/mlx5/device.h | 5 +++++ 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 79638dcbae78..31956ddd394e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -554,9 +554,9 @@ static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq, mlx5_cqwq_pop(&cq->wq); - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) { + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { netdev_WARN_ONCE(cq->channel->netdev, - "Bad OP in ICOSQ CQE: 0x%x\n", cqe->op_own); + "Bad OP in ICOSQ CQE: 0x%x\n", get_cqe_opcode(cqe)); return; } @@ -898,7 +898,7 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, prefetchw(va); /* xdp_frame data area */ prefetch(data); - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) { rq->stats->wqe_err++; return NULL; } @@ -930,7 +930,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, u16 byte_cnt = cqe_bcnt - headlen; struct sk_buff *skb; - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) { rq->stats->wqe_err++; return NULL; } @@ -1148,7 +1148,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wi->consumed_strides += cstrides; - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) { rq->stats->wqe_err++; goto mpwrq_cqe_out; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 6dacaeba2fbf..46b5a6914d71 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -507,7 +507,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) wqe_counter = be16_to_cpu(cqe->wqe_counter); - if (unlikely(cqe->op_own >> 4 == MLX5_CQE_REQ_ERR)) { + if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) { if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) { mlx5e_dump_error_cqe(sq, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index 8ca1d1949d93..873541ef4c1b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -334,7 +334,7 @@ static void mlx5_fpga_conn_handle_cqe(struct mlx5_fpga_conn *conn, { u8 opcode, status = 0; - opcode = cqe->op_own >> 4; + opcode = get_cqe_opcode(cqe); switch (opcode) { case MLX5_CQE_REQ_ERR: diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index f7c8bebfe472..c66867c8fc2f 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -781,6 +781,11 @@ static inline u8 mlx5_get_cqe_format(struct mlx5_cqe64 *cqe) return (cqe->op_own >> 2) & 0x3; } +static inline u8 get_cqe_opcode(struct mlx5_cqe64 *cqe) +{ + return cqe->op_own >> 4; +} + static inline u8 get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe) { return (cqe->lro_tcppsh_abort_dupack >> 6) & 1; -- cgit v1.2.3 From bdefffd13bf294a5a0c16255650020bff98954c7 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 4 Dec 2018 18:03:02 -0800 Subject: IB/mlx5: Use helper to get CQE opcode Use the new helper that extracts the opcode from a CQE (completion queue entry) structure. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Acked-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/cq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 0b99f7d0630d..26ab9041f94a 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -82,7 +82,7 @@ static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n) cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; - if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) && + if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) && !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) { return cqe; } else { @@ -197,7 +197,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, } wc->byte_len = be32_to_cpu(cqe->byte_cnt); - switch (cqe->op_own >> 4) { + switch (get_cqe_opcode(cqe)) { case MLX5_CQE_RESP_WR_IMM: wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; wc->wc_flags = IB_WC_WITH_IMM; @@ -537,7 +537,7 @@ repoll: */ rmb(); - opcode = cqe64->op_own >> 4; + opcode = get_cqe_opcode(cqe64); if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) { if (likely(cq->resize_buf)) { free_cq_buf(dev, &cq->buf); @@ -1295,7 +1295,7 @@ static int copy_resize_cqes(struct mlx5_ib_cq *cq) return -EINVAL; } - while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) { + while (get_cqe_opcode(scqe64) != MLX5_CQE_RESIZE_CQ) { dcqe = mlx5_frag_buf_get_wqe(&cq->resize_buf->fbc, (i + 1) & cq->resize_buf->nent); dcqe64 = dsize == 64 ? dcqe : dcqe + 64; -- cgit v1.2.3 From 7300375f181e06ba98183509065c6b71bead442d Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 4 Dec 2018 18:03:03 -0800 Subject: net/mlx5: Move flow counters data structures from flow steering header After the following flow counters API refactoring: ("net/mlx5: Use flow counter IDs and not the wrapping cache object") flow counters private data structures mlx5_fc_cache and mlx5_fc are redundantly exposed in fs_core.h, they have nothing to do with flow steering core and they are private to fs_counter.c, this patch moves them to where they belong and reduces their exposure in the driver. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 23 ---------------------- .../net/ethernet/mellanox/mlx5/core/fs_counters.c | 23 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index b51ad217da32..2dc86347af58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -145,29 +145,6 @@ struct mlx5_flow_table { struct rhltable fgs_hash; }; -struct mlx5_fc_cache { - u64 packets; - u64 bytes; - u64 lastuse; -}; - -struct mlx5_fc { - struct list_head list; - struct llist_node addlist; - struct llist_node dellist; - - /* last{packets,bytes} members are used when calculating the delta since - * last reading - */ - u64 lastpackets; - u64 lastbytes; - - u32 id; - bool aging; - - struct mlx5_fc_cache cache ____cacheline_aligned_in_smp; -}; - struct mlx5_ft_underlay_qp { struct list_head list; u32 qpn; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 32accd6b041b..c6c28f56aa29 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -41,6 +41,29 @@ /* Max number of counters to query in bulk read is 32K */ #define MLX5_SW_MAX_COUNTERS_BULK BIT(15) +struct mlx5_fc_cache { + u64 packets; + u64 bytes; + u64 lastuse; +}; + +struct mlx5_fc { + struct list_head list; + struct llist_node addlist; + struct llist_node dellist; + + /* last{packets,bytes} members are used when calculating the delta since + * last reading + */ + u64 lastpackets; + u64 lastbytes; + + u32 id; + bool aging; + + struct mlx5_fc_cache cache ____cacheline_aligned_in_smp; +}; + /* locking scheme: * * It is the responsibility of the user to prevent concurrent calls or bad -- cgit v1.2.3 From c2fb3db22d35f01774d0547b1d8e5085df193646 Mon Sep 17 00:00:00 2001 From: Mikhael Goikhman Date: Mon, 10 Dec 2018 13:15:09 -0800 Subject: net/mlx5: Rework handling of port module events Add explicit HW defined error values. For simplicity, keep counters for all statuses starting from 0, although currently status=0 is not used. Additionally, when HW signals an unexpected cable status, it is reported now rather than ignored. And status counter is now updated on errors. Signed-off-by: Mikhael Goikhman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 8 +-- drivers/net/ethernet/mellanox/mlx5/core/events.c | 83 ++++++++++++++-------- drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h | 19 +++-- 3 files changed, 65 insertions(+), 45 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 748d23806391..881c54c12e19 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -1087,13 +1087,13 @@ static void mlx5e_grp_per_prio_update_stats(struct mlx5e_priv *priv) } static const struct counter_desc mlx5e_pme_status_desc[] = { - { "module_unplug", 8 }, + { "module_unplug", sizeof(u64) * MLX5_MODULE_STATUS_UNPLUGGED }, }; static const struct counter_desc mlx5e_pme_error_desc[] = { - { "module_bus_stuck", 16 }, /* bus stuck (I2C or data shorted) */ - { "module_high_temp", 48 }, /* high temperature */ - { "module_bad_shorted", 56 }, /* bad or shorted cable/module */ + { "module_bus_stuck", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_BUS_STUCK }, + { "module_high_temp", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE }, + { "module_bad_shorted", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_BAD_CABLE }, }; #define NUM_PME_STATUS_STATS ARRAY_SIZE(mlx5e_pme_status_desc) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index e92df7020a26..587d93ec905f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -157,23 +157,43 @@ static int temp_warn(struct notifier_block *nb, unsigned long type, void *data) } /* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */ -static const char *mlx5_pme_status[MLX5_MODULE_STATUS_NUM] = { - "Cable plugged", /* MLX5_MODULE_STATUS_PLUGGED = 0x1 */ - "Cable unplugged", /* MLX5_MODULE_STATUS_UNPLUGGED = 0x2 */ - "Cable error", /* MLX5_MODULE_STATUS_ERROR = 0x3 */ -}; +static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status) +{ + switch (status) { + case MLX5_MODULE_STATUS_PLUGGED: + return "Cable plugged"; + case MLX5_MODULE_STATUS_UNPLUGGED: + return "Cable unplugged"; + case MLX5_MODULE_STATUS_ERROR: + return "Cable error"; + default: + return "Unknown status"; + } +} -static const char *mlx5_pme_error[MLX5_MODULE_EVENT_ERROR_NUM] = { - "Power budget exceeded", - "Long Range for non MLNX cable", - "Bus stuck(I2C or data shorted)", - "No EEPROM/retry timeout", - "Enforce part number list", - "Unknown identifier", - "High Temperature", - "Bad or shorted cable/module", - "Unknown status", -}; +static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error) +{ + switch (error) { + case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED: + return "Power budget exceeded"; + case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX: + return "Long Range for non MLNX cable"; + case MLX5_MODULE_EVENT_ERROR_BUS_STUCK: + return "Bus stuck (I2C or data shorted)"; + case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT: + return "No EEPROM/retry timeout"; + case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST: + return "Enforce part number list"; + case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER: + return "Unknown identifier"; + case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE: + return "High Temperature"; + case MLX5_MODULE_EVENT_ERROR_BAD_CABLE: + return "Bad or shorted cable/module"; + default: + return "Unknown error"; + } +} /* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */ static int port_module(struct notifier_block *nb, unsigned long type, void *data) @@ -185,6 +205,7 @@ static int port_module(struct notifier_block *nb, unsigned long type, void *data enum port_module_event_status_type module_status; enum port_module_event_error_type error_type; struct mlx5_eqe_port_module *module_event_eqe; + const char *status_str, *error_str; u8 module_num; module_event_eqe = &eqe->data.port_module; @@ -193,28 +214,28 @@ static int port_module(struct notifier_block *nb, unsigned long type, void *data PORT_MODULE_EVENT_MODULE_STATUS_MASK; error_type = module_event_eqe->error_type & PORT_MODULE_EVENT_ERROR_TYPE_MASK; - if (module_status < MLX5_MODULE_STATUS_ERROR) { - events->pme_stats.status_counters[module_status - 1]++; - } else if (module_status == MLX5_MODULE_STATUS_ERROR) { - if (error_type >= MLX5_MODULE_EVENT_ERROR_UNKNOWN) - /* Unknown error type */ - error_type = MLX5_MODULE_EVENT_ERROR_UNKNOWN; - events->pme_stats.error_counters[error_type]++; + + if (module_status < MLX5_MODULE_STATUS_NUM) + events->pme_stats.status_counters[module_status]++; + status_str = mlx5_pme_status_to_string(module_status); + + if (module_status == MLX5_MODULE_STATUS_ERROR) { + if (error_type < MLX5_MODULE_EVENT_ERROR_NUM) + events->pme_stats.error_counters[error_type]++; + error_str = mlx5_pme_error_to_string(error_type); } if (!printk_ratelimit()) return NOTIFY_OK; - if (module_status < MLX5_MODULE_STATUS_ERROR) + if (module_status == MLX5_MODULE_STATUS_ERROR) + mlx5_core_err(events->dev, + "Port module event[error]: module %u, %s, %s\n", + module_num, status_str, error_str); + else mlx5_core_info(events->dev, "Port module event: module %u, %s\n", - module_num, mlx5_pme_status[module_status - 1]); - - else if (module_status == MLX5_MODULE_STATUS_ERROR) - mlx5_core_info(events->dev, - "Port module event[error]: module %u, %s, %s\n", - module_num, mlx5_pme_status[module_status - 1], - mlx5_pme_error[error_type]); + module_num, status_str); return NOTIFY_OK; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h index 4d78a459676e..af19fa61e9ef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -51,19 +51,18 @@ enum port_module_event_status_type { MLX5_MODULE_STATUS_PLUGGED = 0x1, MLX5_MODULE_STATUS_UNPLUGGED = 0x2, MLX5_MODULE_STATUS_ERROR = 0x3, - MLX5_MODULE_STATUS_NUM = 0x3, + MLX5_MODULE_STATUS_NUM, }; enum port_module_event_error_type { - MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED, - MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX_CABLE_MODULE, - MLX5_MODULE_EVENT_ERROR_BUS_STUCK, - MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT, - MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST, - MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER, - MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE, - MLX5_MODULE_EVENT_ERROR_BAD_CABLE, - MLX5_MODULE_EVENT_ERROR_UNKNOWN, + MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED = 0x0, + MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX = 0x1, + MLX5_MODULE_EVENT_ERROR_BUS_STUCK = 0x2, + MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT = 0x3, + MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST = 0x4, + MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER = 0x5, + MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE = 0x6, + MLX5_MODULE_EVENT_ERROR_BAD_CABLE = 0x7, MLX5_MODULE_EVENT_ERROR_NUM, }; -- cgit v1.2.3 From 37a12aae06fda00f36eadb168d5d7ba0be3a8c86 Mon Sep 17 00:00:00 2001 From: Mikhael Goikhman Date: Mon, 10 Dec 2018 13:15:10 -0800 Subject: net/mlx5: Add support for PCIe power slot exceeded error in PME Support a new hardware error type in port module events: - error_type=0xc (PCIe system power slot exceeded) Signed-off-by: Mikhael Goikhman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/events.c | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index 587d93ec905f..ba9249bcfcba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -190,6 +190,8 @@ static const char *mlx5_pme_error_to_string(enum port_module_event_error_type er return "High Temperature"; case MLX5_MODULE_EVENT_ERROR_BAD_CABLE: return "Bad or shorted cable/module"; + case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED: + return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot"; default: return "Unknown error"; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h index af19fa61e9ef..979970f8fa0f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -63,6 +63,7 @@ enum port_module_event_error_type { MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER = 0x5, MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE = 0x6, MLX5_MODULE_EVENT_ERROR_BAD_CABLE = 0x7, + MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED = 0xc, MLX5_MODULE_EVENT_ERROR_NUM, }; -- cgit v1.2.3 From 8d6b57e6445989e9f34077ef1aac80b621029b08 Mon Sep 17 00:00:00 2001 From: Mikhael Goikhman Date: Mon, 10 Dec 2018 13:15:11 -0800 Subject: net/mlx5: Add support for plugged-disabled cable status in PME Support a new hardware module status in port module events: - module_status=0x4 (Cable plugged, but disabled) Signed-off-by: Mikhael Goikhman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/events.c | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index ba9249bcfcba..900fdd235ba0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -166,6 +166,8 @@ static const char *mlx5_pme_status_to_string(enum port_module_event_status_type return "Cable unplugged"; case MLX5_MODULE_STATUS_ERROR: return "Cable error"; + case MLX5_MODULE_STATUS_DISABLED: + return "Cable disabled"; default: return "Unknown status"; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h index 979970f8fa0f..397a2847867a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -51,6 +51,7 @@ enum port_module_event_status_type { MLX5_MODULE_STATUS_PLUGGED = 0x1, MLX5_MODULE_STATUS_UNPLUGGED = 0x2, MLX5_MODULE_STATUS_ERROR = 0x3, + MLX5_MODULE_STATUS_DISABLED = 0x4, MLX5_MODULE_STATUS_NUM, }; -- cgit v1.2.3 From fd4572b3ff3ff57ca7fa612f9ea42b90afdd8bff Mon Sep 17 00:00:00 2001 From: Eyal Davidovich Date: Mon, 10 Dec 2018 13:15:12 -0800 Subject: net/mlx5: Add monitor commands layout and event data Will be used in downstream patch to monitor counter changes by the HCA and report it to the driver by an event. The driver will update its counters cached data accordingly. Signed-off-by: Eyal Davidovich Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 ++ drivers/net/ethernet/mellanox/mlx5/core/eq.c | 3 + drivers/net/ethernet/mellanox/mlx5/core/events.c | 2 + include/linux/mlx5/device.h | 1 + include/linux/mlx5/mlx5_ifc.h | 87 +++++++++++++++++++++++- 5 files changed, 96 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 8ab636d59edb..d3125cdf69db 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -373,6 +373,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_VPORT_COUNTER: case MLX5_CMD_OP_ALLOC_Q_COUNTER: case MLX5_CMD_OP_QUERY_Q_COUNTER: + case MLX5_CMD_OP_SET_MONITOR_COUNTER: + case MLX5_CMD_OP_ARM_MONITOR_COUNTER: case MLX5_CMD_OP_SET_PP_RATE_LIMIT: case MLX5_CMD_OP_QUERY_RATE_LIMIT: case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: @@ -522,6 +524,8 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER); MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER); MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER); + MLX5_COMMAND_STR_CASE(SET_MONITOR_COUNTER); + MLX5_COMMAND_STR_CASE(ARM_MONITOR_COUNTER); MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT); MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT); MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 4aa39a1fe23f..ee04aab65a9f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -527,6 +527,9 @@ static u64 gather_async_events_mask(struct mlx5_core_dev *dev) if (MLX5_CAP_MCAM_REG(dev, tracer_registers)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER); + if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER); + return async_event_mask; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index 900fdd235ba0..fbc42b7252a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -117,6 +117,8 @@ static const char *eqe_type_str(u8 type) return "MLX5_EVENT_TYPE_FPGA_QP_ERROR"; case MLX5_EVENT_TYPE_GENERAL_EVENT: return "MLX5_EVENT_TYPE_GENERAL_EVENT"; + case MLX5_EVENT_TYPE_MONITOR_COUNTER: + return "MLX5_EVENT_TYPE_MONITOR_COUNTER"; case MLX5_EVENT_TYPE_DEVICE_TRACER: return "MLX5_EVENT_TYPE_DEVICE_TRACER"; default: diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index c66867c8fc2f..4674b9e99f45 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -330,6 +330,7 @@ enum mlx5_event { MLX5_EVENT_TYPE_TEMP_WARN_EVENT = 0x17, MLX5_EVENT_TYPE_REMOTE_CONFIG = 0x19, MLX5_EVENT_TYPE_GENERAL_EVENT = 0x22, + MLX5_EVENT_TYPE_MONITOR_COUNTER = 0x24, MLX5_EVENT_TYPE_PPS_EVENT = 0x25, MLX5_EVENT_TYPE_DB_BF_CONGESTION = 0x1a, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 91d6e85e3cef..9f7cc26bfb3b 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -161,6 +161,8 @@ enum { MLX5_CMD_OP_ALLOC_Q_COUNTER = 0x771, MLX5_CMD_OP_DEALLOC_Q_COUNTER = 0x772, MLX5_CMD_OP_QUERY_Q_COUNTER = 0x773, + MLX5_CMD_OP_SET_MONITOR_COUNTER = 0x774, + MLX5_CMD_OP_ARM_MONITOR_COUNTER = 0x775, MLX5_CMD_OP_SET_PP_RATE_LIMIT = 0x780, MLX5_CMD_OP_QUERY_RATE_LIMIT = 0x781, MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT = 0x782, @@ -1200,7 +1202,13 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 sw_owner_id[0x1]; u8 reserved_at_61f[0x1]; - u8 reserved_at_620[0x80]; + u8 max_num_of_monitor_counters[0x10]; + u8 num_ppcnt_monitor_counters[0x10]; + + u8 reserved_at_640[0x10]; + u8 num_q_monitor_counters[0x10]; + + u8 reserved_at_660[0x40]; u8 uctx_cap[0x20]; @@ -3808,6 +3816,83 @@ enum { MLX5_VPORT_STATE_OP_MOD_ESW_VPORT = 0x1, }; +struct mlx5_ifc_arm_monitor_counter_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x20]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_arm_monitor_counter_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +enum { + MLX5_QUERY_MONITOR_CNT_TYPE_PPCNT = 0x0, + MLX5_QUERY_MONITOR_CNT_TYPE_Q_COUNTER = 0x1, +}; + +enum mlx5_monitor_counter_ppcnt { + MLX5_QUERY_MONITOR_PPCNT_IN_RANGE_LENGTH_ERRORS = 0X0, + MLX5_QUERY_MONITOR_PPCNT_OUT_OF_RANGE_LENGTH_FIELD = 0X1, + MLX5_QUERY_MONITOR_PPCNT_FRAME_TOO_LONG_ERRORS = 0X2, + MLX5_QUERY_MONITOR_PPCNT_FRAME_CHECK_SEQUENCE_ERRORS = 0X3, + MLX5_QUERY_MONITOR_PPCNT_ALIGNMENT_ERRORS = 0X4, + MLX5_QUERY_MONITOR_PPCNT_IF_OUT_DISCARDS = 0X5, +}; + +enum { + MLX5_QUERY_MONITOR_Q_COUNTER_RX_OUT_OF_BUFFER = 0X4, +}; + +struct mlx5_ifc_monitor_counter_output_bits { + u8 reserved_at_0[0x4]; + u8 type[0x4]; + u8 reserved_at_8[0x8]; + u8 counter[0x10]; + + u8 counter_group_id[0x20]; +}; + +#define MLX5_CMD_SET_MONITOR_NUM_PPCNT_COUNTER_SET1 (6) +#define MLX5_CMD_SET_MONITOR_NUM_Q_COUNTERS_SET1 (1) +#define MLX5_CMD_SET_MONITOR_NUM_COUNTER (MLX5_CMD_SET_MONITOR_NUM_PPCNT_COUNTER_SET1 +\ + MLX5_CMD_SET_MONITOR_NUM_Q_COUNTERS_SET1) + +struct mlx5_ifc_set_monitor_counter_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 num_of_counters[0x10]; + + u8 reserved_at_60[0x20]; + + struct mlx5_ifc_monitor_counter_output_bits monitor_counter[MLX5_CMD_SET_MONITOR_NUM_COUNTER]; +}; + +struct mlx5_ifc_set_monitor_counter_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + struct mlx5_ifc_query_vport_state_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; -- cgit v1.2.3 From 5886a96ad19dacebe6c4f7f8c001d489b06125dc Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Mon, 10 Dec 2018 13:15:13 -0800 Subject: net/mlx5: Revise gre and nvgre key formats GRE RFC defines a 32 bit key field. NVGRE RFC splits the 32 bit key field to 24 bit VSID (gre_key_h) and 8 bit flow entropy (gre_key_l). Define the two key parsing alternatives in a union, thus enabling both access methods. Signed-off-by: Oz Shlomo Reviewed-by: Eli Britstein Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/main.c | 4 ++-- .../net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c | 8 ++++---- include/linux/mlx5/mlx5_ifc.h | 13 +++++++++++-- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 96515a8c9d2c..2560996fce79 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2680,11 +2680,11 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, ntohs(ib_spec->gre.val.protocol)); memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c, - gre_key_h), + gre_key.nvgre.hi), &ib_spec->gre.mask.key, sizeof(ib_spec->gre.mask.key)); memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v, - gre_key_h), + gre_key.nvgre.hi), &ib_spec->gre.val.key, sizeof(ib_spec->gre.val.key)); break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c index 0f11fff32a9b..424457ff9759 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c @@ -161,10 +161,10 @@ static void print_misc_parameters_hdrs(struct trace_seq *p, PRINT_MASKED_VAL(name, p, format); \ } DECLARE_MASK_VAL(u64, gre_key) = { - .m = MLX5_GET(fte_match_set_misc, mask, gre_key_h) << 8 | - MLX5_GET(fte_match_set_misc, mask, gre_key_l), - .v = MLX5_GET(fte_match_set_misc, value, gre_key_h) << 8 | - MLX5_GET(fte_match_set_misc, value, gre_key_l)}; + .m = MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.hi) << 8 | + MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.lo), + .v = MLX5_GET(fte_match_set_misc, value, gre_key.nvgre.hi) << 8 | + MLX5_GET(fte_match_set_misc, value, gre_key.nvgre.lo)}; PRINT_MASKED_VAL(gre_key, p, "%llu"); PRINT_MASKED_VAL_MISC(u32, source_sqn, source_sqn, p, "%u"); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 9f7cc26bfb3b..688a549e74f1 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -423,6 +423,16 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits { union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6; }; +struct mlx5_ifc_nvgre_key_bits { + u8 hi[0x18]; + u8 lo[0x8]; +}; + +union mlx5_ifc_gre_key_bits { + struct mlx5_ifc_nvgre_key_bits nvgre; + u8 key[0x20]; +}; + struct mlx5_ifc_fte_match_set_misc_bits { u8 reserved_at_0[0x8]; u8 source_sqn[0x18]; @@ -444,8 +454,7 @@ struct mlx5_ifc_fte_match_set_misc_bits { u8 reserved_at_64[0xc]; u8 gre_protocol[0x10]; - u8 gre_key_h[0x18]; - u8 gre_key_l[0x8]; + union mlx5_ifc_gre_key_bits gre_key; u8 vxlan_vni[0x18]; u8 reserved_at_b8[0x8]; -- cgit v1.2.3 From 1b115498598f25d578cfc0df7b7aea9772bae0a1 Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Mon, 10 Dec 2018 13:15:14 -0800 Subject: net/mlx5: Introduce extended destination fields Extended destinations provide the ability to configure different encapsulation properties per destination on a single FTE. This is needed for use-cases such as remote mirroring over tunneled networks. Signed-off-by: Eli Britstein Reviewed-by: Or Gerlitz Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 688a549e74f1..60c1d49eb40c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -621,7 +621,9 @@ struct mlx5_ifc_e_switch_cap_bits { u8 vxlan_encap_decap[0x1]; u8 nvgre_encap_decap[0x1]; - u8 reserved_at_22[0x9]; + u8 reserved_at_22[0x1]; + u8 log_max_fdb_encap_uplink[0x5]; + u8 reserved_at_21[0x3]; u8 log_max_packet_reformat_context[0x5]; u8 reserved_2b[0x6]; u8 max_encap_header_size[0xa]; @@ -1237,8 +1239,10 @@ enum mlx5_flow_destination_type { struct mlx5_ifc_dest_format_struct_bits { u8 destination_type[0x8]; u8 destination_id[0x18]; + u8 destination_eswitch_owner_vhca_id_valid[0x1]; - u8 reserved_at_21[0xf]; + u8 packet_reformat[0x1]; + u8 reserved_at_22[0xe]; u8 destination_eswitch_owner_vhca_id[0x10]; }; @@ -1248,6 +1252,14 @@ struct mlx5_ifc_flow_counter_list_bits { u8 reserved_at_20[0x20]; }; +struct mlx5_ifc_extended_dest_format_bits { + struct mlx5_ifc_dest_format_struct_bits destination_entry; + + u8 packet_reformat_id[0x20]; + + u8 reserved_at_60[0x20]; +}; + union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits { struct mlx5_ifc_dest_format_struct_bits dest_format_struct; struct mlx5_ifc_flow_counter_list_bits flow_counter_list; @@ -2469,7 +2481,8 @@ struct mlx5_ifc_flow_context_bits { u8 reserved_at_60[0x10]; u8 action[0x10]; - u8 reserved_at_80[0x8]; + u8 extended_destination[0x1]; + u8 reserved_at_80[0x7]; u8 destination_list_size[0x18]; u8 reserved_at_a0[0x8]; -- cgit v1.2.3 From aa39c2c0e44d16b5804f8fb6b5350cdf4e33b4c3 Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Mon, 10 Dec 2018 13:15:15 -0800 Subject: net/mlx5: E-Switch, Change vhca id valid bool field to bit flag Change the driver flow destination struct to use bit flags with the vhca id valid being the 1st one. The flags field is more extendable and will be used in downstream patch. Signed-off-by: Eli Britstein Reviewed-by: Or Gerlitz Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 8 +++++--- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 3 ++- include/linux/mlx5/fs.h | 6 +++++- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 9eac137790f5..4d7b65df32ef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -125,8 +125,9 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, dest[i].vport.num = attr->out_rep[j]->vport; dest[i].vport.vhca_id = MLX5_CAP_GEN(attr->out_mdev[j], vhca_id); - dest[i].vport.vhca_id_valid = - !!MLX5_CAP_ESW(esw->dev, merged_eswitch); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + dest[i].vport.flags |= + MLX5_FLOW_DEST_VPORT_VHCA_ID; i++; } } @@ -220,7 +221,8 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, dest[i].vport.num = attr->out_rep[i]->vport; dest[i].vport.vhca_id = MLX5_CAP_GEN(attr->out_mdev[i], vhca_id); - dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; } dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[i].ft = fwd_fdb, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 08a891f9aade..dda63dedaa49 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -387,7 +387,8 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, id = dst->dest_attr.vport.num; MLX5_SET(dest_format_struct, in_dests, destination_eswitch_owner_vhca_id_valid, - dst->dest_attr.vport.vhca_id_valid); + !!(dst->dest_attr.vport.flags & + MLX5_FLOW_DEST_VPORT_VHCA_ID)); MLX5_SET(dest_format_struct, in_dests, destination_eswitch_owner_vhca_id, dst->dest_attr.vport.vhca_id); diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 5660f07d3be0..25ffd8018b72 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -86,6 +86,10 @@ struct mlx5_flow_spec { u32 match_value[MLX5_ST_SZ_DW(fte_match_param)]; }; +enum { + MLX5_FLOW_DEST_VPORT_VHCA_ID = BIT(0), +}; + struct mlx5_flow_destination { enum mlx5_flow_destination_type type; union { @@ -96,7 +100,7 @@ struct mlx5_flow_destination { struct { u16 num; u16 vhca_id; - bool vhca_id_valid; + u8 flags; } vport; }; }; -- cgit v1.2.3 From a2c6162b12f15fbbbe38d0eb3a38186bcfc79c0f Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Mon, 10 Dec 2018 13:15:16 -0800 Subject: net/mlx5: Support extended destination format in flow steering command Update the flow steering command formatting according to the extended destination API. Note that the FW dictates that multi destination FTEs that involve at least one encap must use the extended destination format, while single destination ones must use the legacy format. Using extended destination format requires FW support. Check for its capabilities and return error if not supported. Signed-off-by: Eli Britstein Reviewed-by: Or Gerlitz Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 80 +++++++++++++++++++++--- include/linux/mlx5/fs.h | 2 + 2 files changed, 75 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index dda63dedaa49..c44ccb67c4a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -308,22 +308,68 @@ static int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev, return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } +static int mlx5_set_extended_dest(struct mlx5_core_dev *dev, + struct fs_fte *fte, bool *extended_dest) +{ + int fw_log_max_fdb_encap_uplink = + MLX5_CAP_ESW(dev, log_max_fdb_encap_uplink); + int num_fwd_destinations = 0; + struct mlx5_flow_rule *dst; + int num_encap = 0; + + *extended_dest = false; + if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) + return 0; + + list_for_each_entry(dst, &fte->node.children, node.list) { + if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT && + dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) + num_encap++; + num_fwd_destinations++; + } + if (num_fwd_destinations > 1 && num_encap > 0) + *extended_dest = true; + + if (*extended_dest && !fw_log_max_fdb_encap_uplink) { + mlx5_core_warn(dev, "FW does not support extended destination"); + return -EOPNOTSUPP; + } + if (num_encap > (1 << fw_log_max_fdb_encap_uplink)) { + mlx5_core_warn(dev, "FW does not support more than %d encaps", + 1 << fw_log_max_fdb_encap_uplink); + return -EOPNOTSUPP; + } + + return 0; +} static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, int opmod, int modify_mask, struct mlx5_flow_table *ft, unsigned group_id, struct fs_fte *fte) { - unsigned int inlen = MLX5_ST_SZ_BYTES(set_fte_in) + - fte->dests_size * MLX5_ST_SZ_BYTES(dest_format_struct); u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {0}; + bool extended_dest = false; struct mlx5_flow_rule *dst; void *in_flow_context, *vlan; void *in_match_value; + unsigned int inlen; + int dst_cnt_size; void *in_dests; u32 *in; int err; + if (mlx5_set_extended_dest(dev, fte, &extended_dest)) + return -EOPNOTSUPP; + + if (!extended_dest) + dst_cnt_size = MLX5_ST_SZ_BYTES(dest_format_struct); + else + dst_cnt_size = MLX5_ST_SZ_BYTES(extended_dest_format); + + inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fte->dests_size * dst_cnt_size; in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -343,9 +389,20 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(flow_context, in_flow_context, group_id, group_id); MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag); - MLX5_SET(flow_context, in_flow_context, action, fte->action.action); - MLX5_SET(flow_context, in_flow_context, packet_reformat_id, - fte->action.reformat_id); + MLX5_SET(flow_context, in_flow_context, extended_destination, + extended_dest); + if (extended_dest) { + u32 action; + + action = fte->action.action & + ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + MLX5_SET(flow_context, in_flow_context, action, action); + } else { + MLX5_SET(flow_context, in_flow_context, action, + fte->action.action); + MLX5_SET(flow_context, in_flow_context, packet_reformat_id, + fte->action.reformat_id); + } MLX5_SET(flow_context, in_flow_context, modify_header_id, fte->action.modify_id); @@ -392,6 +449,15 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(dest_format_struct, in_dests, destination_eswitch_owner_vhca_id, dst->dest_attr.vport.vhca_id); + if (extended_dest) { + MLX5_SET(dest_format_struct, in_dests, + packet_reformat, + !!(dst->dest_attr.vport.flags & + MLX5_FLOW_DEST_VPORT_REFORMAT_ID)); + MLX5_SET(extended_dest_format, in_dests, + packet_reformat_id, + dst->dest_attr.vport.reformat_id); + } break; default: id = dst->dest_attr.tir_num; @@ -400,7 +466,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(dest_format_struct, in_dests, destination_type, type); MLX5_SET(dest_format_struct, in_dests, destination_id, id); - in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); + in_dests += dst_cnt_size; list_size++; } @@ -421,7 +487,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(flow_counter_list, in_dests, flow_counter_id, dst->dest_attr.counter_id); - in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); + in_dests += dst_cnt_size; list_size++; } if (list_size > max_list_size) { diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 25ffd8018b72..9df51da04621 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -88,6 +88,7 @@ struct mlx5_flow_spec { enum { MLX5_FLOW_DEST_VPORT_VHCA_ID = BIT(0), + MLX5_FLOW_DEST_VPORT_REFORMAT_ID = BIT(1), }; struct mlx5_flow_destination { @@ -100,6 +101,7 @@ struct mlx5_flow_destination { struct { u16 num; u16 vhca_id; + u32 reformat_id; u8 flags; } vport; }; -- cgit v1.2.3 From 6c22a11957f46ca7e9b8db20ac7c6b05441c55ed Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Mon, 10 Dec 2018 13:15:17 -0800 Subject: net/mlx5: Remove the get protocol device interface entry This isn't used anywhere across the mlx5 driver stack, remove it. Signed-off-by: Or Gerlitz Reviewed-by: Saeed Mahameed Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 22 ---------------------- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 -------- include/linux/mlx5/driver.h | 2 -- 3 files changed, 32 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index d2ed14bc37c3..ebc046fa97d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -256,28 +256,6 @@ void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol) mutex_unlock(&mlx5_intf_mutex); } -void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol) -{ - struct mlx5_priv *priv = &mdev->priv; - struct mlx5_device_context *dev_ctx; - unsigned long flags; - void *result = NULL; - - spin_lock_irqsave(&priv->ctx_lock, flags); - - list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list) - if ((dev_ctx->intf->protocol == protocol) && - dev_ctx->intf->get_dev) { - result = dev_ctx->intf->get_dev(dev_ctx->context); - break; - } - - spin_unlock_irqrestore(&priv->ctx_lock, flags); - - return result; -} -EXPORT_SYMBOL(mlx5_get_protocol_dev); - /* Must be called with intf_mutex held */ void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 56bc41b1c31f..a43092de3cc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5160,20 +5160,12 @@ static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) kfree(ppriv); } -static void *mlx5e_get_netdev(void *vpriv) -{ - struct mlx5e_priv *priv = vpriv; - - return priv->netdev; -} - static struct mlx5_interface mlx5e_interface = { .add = mlx5e_add, .remove = mlx5e_remove, .attach = mlx5e_attach, .detach = mlx5e_detach, .protocol = MLX5_INTERFACE_PROTOCOL_ETH, - .get_dev = mlx5e_get_netdev, }; void mlx5e_init(void) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 584d8a5df7eb..cc29e880c733 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1004,12 +1004,10 @@ struct mlx5_interface { void (*remove)(struct mlx5_core_dev *dev, void *context); int (*attach)(struct mlx5_core_dev *dev, void *context); void (*detach)(struct mlx5_core_dev *dev, void *context); - void * (*get_dev)(void *context); int protocol; struct list_head list; }; -void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol); int mlx5_register_interface(struct mlx5_interface *intf); void mlx5_unregister_interface(struct mlx5_interface *intf); int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb); -- cgit v1.2.3 From ec1366c20721157cc2d077e84d2a062f93175ce5 Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Thu, 25 Oct 2018 21:51:11 +0300 Subject: net/mlx5e: Store eswitch uplink representor state on a dedicated struct Currently only a single field in the representor private structure is relevant for uplink representors. As a pre-step to allow adding additional uplink representor fields, introduce uplink representor private structure. This is prepration step towards replacing egdev logic with the indirect block notification mechanism. This patch doesn't change any functionality. Signed-off-by: Oz Shlomo Reviewed-by: Eli Britstein Acked-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 6 +++--- drivers/net/ethernet/mellanox/mlx5/core/en_rep.h | 9 ++++++++- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index a137540b84bf..25a656b50264 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1244,7 +1244,7 @@ mlx5e_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); - + struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; int err; if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { @@ -1258,7 +1258,7 @@ mlx5e_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) goto err_remove_sqs; /* init shared tc flow table */ - err = mlx5e_tc_esw_init(&rpriv->tc_ht); + err = mlx5e_tc_esw_init(&uplink_priv->tc_ht); if (err) goto err_neigh_cleanup; @@ -1281,7 +1281,7 @@ mlx5e_nic_rep_unload(struct mlx5_eswitch_rep *rep) mlx5e_remove_sqs_fwd_rules(priv); /* clean uplink offloaded TC rules, delete shared tc flow table */ - mlx5e_tc_esw_cleanup(&rpriv->tc_ht); + mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht); mlx5e_rep_neigh_cleanup(rpriv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 844d32d5c29f..8d39c5c59aeb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -53,13 +53,20 @@ struct mlx5e_neigh_update_table { unsigned long min_interval; /* jiffies */ }; +struct mlx5_rep_uplink_priv { + /* Filters DB - instantiated by the uplink representor and shared by + * the uplink's VFs + */ + struct rhashtable tc_ht; +}; + struct mlx5e_rep_priv { struct mlx5_eswitch_rep *rep; struct mlx5e_neigh_update_table neigh_update; struct net_device *netdev; struct mlx5_flow_handle *vport_rx_rule; struct list_head vport_sqs_list; - struct rhashtable tc_ht; /* valid for uplink rep */ + struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */ }; static inline diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index a3ff2492646d..42eb6ee37754 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3018,7 +3018,7 @@ static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv) if (MLX5_VPORT_MANAGER(priv->mdev) && esw->mode == SRIOV_OFFLOADS) { uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - return &uplink_rpriv->tc_ht; + return &uplink_rpriv->uplink_priv.tc_ht; } else return &priv->fs.tc.ht; } -- cgit v1.2.3 From f5bc2c5de1015caf7e49b0bf9e7e97077736045a Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Sun, 28 Oct 2018 08:34:51 +0200 Subject: net/mlx5e: Support TC indirect block notifications for eswitch uplink reprs Towards using this mechanism as the means to offload tunnel decap rules set on SW tunnel devices instead of egdev, add the supporting structures and functions. Signed-off-by: Oz Shlomo Reviewed-by: Eli Britstein Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 184 +++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/en_rep.h | 13 ++ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 10 ++ drivers/net/ethernet/mellanox/mlx5/core/en_tc.h | 3 + 4 files changed, 210 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 25a656b50264..3aab3ce9119d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -49,6 +49,15 @@ static const char mlx5e_rep_driver_name[] = "mlx5e_rep"; +struct mlx5e_rep_indr_block_priv { + struct net_device *netdev; + struct mlx5e_rep_priv *rpriv; + + struct list_head list; +}; + +static void mlx5e_rep_indr_unregister_block(struct net_device *netdev); + static void mlx5e_rep_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) { @@ -518,6 +527,166 @@ static void mlx5e_rep_neigh_update(struct work_struct *work) neigh_release(n); } +static struct mlx5e_rep_indr_block_priv * +mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv, + struct net_device *netdev) +{ + struct mlx5e_rep_indr_block_priv *cb_priv; + + /* All callback list access should be protected by RTNL. */ + ASSERT_RTNL(); + + list_for_each_entry(cb_priv, + &rpriv->uplink_priv.tc_indr_block_priv_list, + list) + if (cb_priv->netdev == netdev) + return cb_priv; + + return NULL; +} + +static void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5e_rep_indr_block_priv *cb_priv, *temp; + struct list_head *head = &rpriv->uplink_priv.tc_indr_block_priv_list; + + list_for_each_entry_safe(cb_priv, temp, head, list) { + mlx5e_rep_indr_unregister_block(cb_priv->netdev); + kfree(cb_priv); + } +} + +static int +mlx5e_rep_indr_offload(struct net_device *netdev, + struct tc_cls_flower_offload *flower, + struct mlx5e_rep_indr_block_priv *indr_priv) +{ + return -EOPNOTSUPP; +} + +static int mlx5e_rep_indr_setup_block_cb(enum tc_setup_type type, + void *type_data, void *indr_priv) +{ + struct mlx5e_rep_indr_block_priv *priv = indr_priv; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return mlx5e_rep_indr_offload(priv->netdev, type_data, priv); + default: + return -EOPNOTSUPP; + } +} + +static int +mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, + struct mlx5e_rep_priv *rpriv, + struct tc_block_offload *f) +{ + struct mlx5e_rep_indr_block_priv *indr_priv; + int err = 0; + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev); + if (indr_priv) + return -EEXIST; + + indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL); + if (!indr_priv) + return -ENOMEM; + + indr_priv->netdev = netdev; + indr_priv->rpriv = rpriv; + list_add(&indr_priv->list, + &rpriv->uplink_priv.tc_indr_block_priv_list); + + err = tcf_block_cb_register(f->block, + mlx5e_rep_indr_setup_block_cb, + netdev, indr_priv, f->extack); + if (err) { + list_del(&indr_priv->list); + kfree(indr_priv); + } + + return err; + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, + mlx5e_rep_indr_setup_block_cb, + netdev); + indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev); + if (indr_priv) { + list_del(&indr_priv->list); + kfree(indr_priv); + } + + return 0; + default: + return -EOPNOTSUPP; + } + return 0; +} + +static +int mlx5e_rep_indr_setup_tc_cb(struct net_device *netdev, void *cb_priv, + enum tc_setup_type type, void *type_data) +{ + switch (type) { + case TC_SETUP_BLOCK: + return mlx5e_rep_indr_setup_tc_block(netdev, cb_priv, + type_data); + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv, + struct net_device *netdev) +{ + int err; + + err = __tc_indr_block_cb_register(netdev, rpriv, + mlx5e_rep_indr_setup_tc_cb, + netdev); + if (err) { + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + + mlx5_core_err(priv->mdev, "Failed to register remote block notifier for %s err=%d\n", + netdev_name(netdev), err); + } + return err; +} + +static void mlx5e_rep_indr_unregister_block(struct net_device *netdev) +{ + __tc_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb, + netdev); +} + +static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv, + uplink_priv.netdevice_nb); + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + + if (!mlx5e_tc_tun_device_to_offload(priv, netdev)) + return NOTIFY_OK; + + switch (event) { + case NETDEV_REGISTER: + mlx5e_rep_indr_register_block(rpriv, netdev); + break; + case NETDEV_UNREGISTER: + mlx5e_rep_indr_unregister_block(netdev); + break; + } + return NOTIFY_OK; +} + static struct mlx5e_neigh_hash_entry * mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, struct mlx5e_neigh *m_neigh); @@ -1262,8 +1431,19 @@ mlx5e_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) if (err) goto err_neigh_cleanup; + /* init indirect block notifications */ + INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list); + uplink_priv->netdevice_nb.notifier_call = mlx5e_nic_rep_netdevice_event; + err = register_netdevice_notifier(&uplink_priv->netdevice_nb); + if (err) { + mlx5_core_err(priv->mdev, "Failed to register netdev notifier\n"); + goto err_indirect_block_cleanup; + } + return 0; +err_indirect_block_cleanup: + mlx5e_tc_esw_cleanup(&uplink_priv->tc_ht); err_neigh_cleanup: mlx5e_rep_neigh_cleanup(rpriv); err_remove_sqs: @@ -1280,6 +1460,10 @@ mlx5e_nic_rep_unload(struct mlx5_eswitch_rep *rep) if (test_bit(MLX5E_STATE_OPENED, &priv->state)) mlx5e_remove_sqs_fwd_rules(priv); + /* clean indirect TC block notifications */ + unregister_netdevice_notifier(&rpriv->uplink_priv.netdevice_nb); + mlx5e_rep_indr_clean_block_privs(rpriv); + /* clean uplink offloaded TC rules, delete shared tc flow table */ mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 8d39c5c59aeb..c078c6703dc7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -58,6 +58,19 @@ struct mlx5_rep_uplink_priv { * the uplink's VFs */ struct rhashtable tc_ht; + + /* indirect block callbacks are invoked on bind/unbind events + * on registered higher level devices (e.g. tunnel devices) + * + * tc_indr_block_cb_priv_list is used to lookup indirect callback + * private data + * + * netdevice_nb is the netdev events notifier - used to register + * tunnel devices for block events + * + */ + struct list_head tc_indr_block_priv_list; + struct notifier_block netdevice_nb; }; struct mlx5e_rep_priv { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 42eb6ee37754..cb5bd3eaf738 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2704,6 +2704,16 @@ out: return err; } +bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, + struct net_device *netdev) +{ + if (netif_is_vxlan(netdev) && + MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) + return true; + + return false; +} + static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct ip_tunnel_info *tun_info, struct net_device *mirred_dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 49436bf3b80a..378507988a32 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -70,6 +70,9 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe); int mlx5e_tc_num_filters(struct mlx5e_priv *priv); +bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, + struct net_device *netdev); + #else /* CONFIG_MLX5_ESWITCH */ static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} -- cgit v1.2.3 From 71d82d2a90327c9c816e65471b7859b601993f89 Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Sun, 28 Oct 2018 09:14:50 +0200 Subject: net/mlx5e: Provide the TC filter netdev as parameter to flower callbacks Currently the driver controls flower filters that are installed on its devices. However, with the introduction of the indirect block notifications platform the driver may receive control events for filters that are installed on higher level net devices (e.g. tunnel devices). Therefore, the driver filter control API will not be able to implicitly assume the filter's net device. Explicitly specify the filter's net device, no functional change Signed-off-by: Oz Shlomo Reviewed-by: Eli Britstein Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 9 ++++++--- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 9 ++++++--- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 +++--- drivers/net/ethernet/mellanox/mlx5/core/en_tc.h | 6 +++--- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 06b1c0172a7b..88116a4750b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3386,11 +3386,14 @@ static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv, { switch (cls_flower->command) { case TC_CLSFLOWER_REPLACE: - return mlx5e_configure_flower(priv, cls_flower, flags); + return mlx5e_configure_flower(priv->netdev, priv, cls_flower, + flags); case TC_CLSFLOWER_DESTROY: - return mlx5e_delete_flower(priv, cls_flower, flags); + return mlx5e_delete_flower(priv->netdev, priv, cls_flower, + flags); case TC_CLSFLOWER_STATS: - return mlx5e_stats_flower(priv, cls_flower, flags); + return mlx5e_stats_flower(priv->netdev, priv, cls_flower, + flags); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 3aab3ce9119d..ab9c7ec77520 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1007,11 +1007,14 @@ mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv, { switch (cls_flower->command) { case TC_CLSFLOWER_REPLACE: - return mlx5e_configure_flower(priv, cls_flower, flags); + return mlx5e_configure_flower(priv->netdev, priv, cls_flower, + flags); case TC_CLSFLOWER_DESTROY: - return mlx5e_delete_flower(priv, cls_flower, flags); + return mlx5e_delete_flower(priv->netdev, priv, cls_flower, + flags); case TC_CLSFLOWER_STATS: - return mlx5e_stats_flower(priv, cls_flower, flags); + return mlx5e_stats_flower(priv->netdev, priv, cls_flower, + flags); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index cb5bd3eaf738..b614513df127 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3178,7 +3178,7 @@ mlx5e_tc_add_flow(struct mlx5e_priv *priv, return err; } -int mlx5e_configure_flower(struct mlx5e_priv *priv, +int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags) { struct netlink_ext_ack *extack = f->common.extack; @@ -3224,7 +3224,7 @@ static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags) return false; } -int mlx5e_delete_flower(struct mlx5e_priv *priv, +int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags) { struct rhashtable *tc_ht = get_tc_ht(priv); @@ -3243,7 +3243,7 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv, return 0; } -int mlx5e_stats_flower(struct mlx5e_priv *priv, +int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags) { struct rhashtable *tc_ht = get_tc_ht(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 378507988a32..004e679a4f53 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -51,12 +51,12 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv); int mlx5e_tc_esw_init(struct rhashtable *tc_ht); void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht); -int mlx5e_configure_flower(struct mlx5e_priv *priv, +int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags); -int mlx5e_delete_flower(struct mlx5e_priv *priv, +int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags); -int mlx5e_stats_flower(struct mlx5e_priv *priv, +int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags); struct mlx5e_encap_entry; -- cgit v1.2.3 From d11afc2600377c6775abcbc1fd410f199c5bf458 Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Sun, 28 Oct 2018 10:46:34 +0200 Subject: net/mlx5e: Propagate the filter's net device to mlx5e structures Propagate the filter's net_device parameter to the tc flower parsed attributes structure so that it can later be used in tunnel decap offloading sequences. Pre-step for replacing egdev logic with the indirect block notification mechanism. Signed-off-by: Oz Shlomo Reviewed-by: Eli Britstein Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 26 ++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index b614513df127..3271eda8098b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -96,6 +96,7 @@ struct mlx5e_tc_flow { struct mlx5e_tc_flow_parse_attr { struct ip_tunnel_info tun_info; + struct net_device *filter_dev; struct mlx5_flow_spec spec; int num_mod_hdr_actions; void *mod_hdr_actions; @@ -3054,10 +3055,6 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, flow->flags = flow_flags; flow->priv = priv; - err = parse_cls_flower(priv, flow, &parse_attr->spec, f); - if (err) - goto err_free; - *__flow = flow; *__parse_attr = parse_attr; @@ -3073,6 +3070,7 @@ static int mlx5e_add_fdb_flow(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, u16 flow_flags, + struct net_device *filter_dev, struct mlx5e_tc_flow **__flow) { struct netlink_ext_ack *extack = f->common.extack; @@ -3086,6 +3084,11 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv, &parse_attr, &flow); if (err) goto out; + parse_attr->filter_dev = filter_dev; + flow->esw_attr->parse_attr = parse_attr; + err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, f); + if (err) + goto err_free; flow->esw_attr->chain = f->common.chain_index; flow->esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16; @@ -3116,6 +3119,7 @@ static int mlx5e_add_nic_flow(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, u16 flow_flags, + struct net_device *filter_dev, struct mlx5e_tc_flow **__flow) { struct netlink_ext_ack *extack = f->common.extack; @@ -3134,6 +3138,11 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv, if (err) goto out; + parse_attr->filter_dev = filter_dev; + err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, f); + if (err) + goto err_free; + err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow, extack); if (err) goto err_free; @@ -3159,6 +3168,7 @@ static int mlx5e_tc_add_flow(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags, + struct net_device *filter_dev, struct mlx5e_tc_flow **flow) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -3171,9 +3181,11 @@ mlx5e_tc_add_flow(struct mlx5e_priv *priv, return -EOPNOTSUPP; if (esw && esw->mode == SRIOV_OFFLOADS) - err = mlx5e_add_fdb_flow(priv, f, flow_flags, flow); + err = mlx5e_add_fdb_flow(priv, f, flow_flags, + filter_dev, flow); else - err = mlx5e_add_nic_flow(priv, f, flow_flags, flow); + err = mlx5e_add_nic_flow(priv, f, flow_flags, + filter_dev, flow); return err; } @@ -3196,7 +3208,7 @@ int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, goto out; } - err = mlx5e_tc_add_flow(priv, f, flags, &flow); + err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow); if (err) goto out; -- cgit v1.2.3 From ef381359e3a81422cee85ddee1f7dc3ac90f3c38 Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Sun, 28 Oct 2018 13:03:54 +0200 Subject: net/mlx5e: Replace egdev with indirect block notifications Use TC indirect block notifications to offload filters that are configured on higher level device interfaces (e.g. tunnel devices). This mechanism replaces the current egdev implementation. Signed-off-by: Oz Shlomo Reviewed-by: Eli Britstein Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 47 +++++++++++------------- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 14 +++++++ 2 files changed, 36 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index ab9c7ec77520..5cac4de435c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -561,7 +561,27 @@ mlx5e_rep_indr_offload(struct net_device *netdev, struct tc_cls_flower_offload *flower, struct mlx5e_rep_indr_block_priv *indr_priv) { - return -EOPNOTSUPP; + int err = 0; + struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev); + + switch (flower->command) { + case TC_CLSFLOWER_REPLACE: + err = mlx5e_configure_flower(netdev, priv, + flower, MLX5E_TC_EGRESS); + break; + case TC_CLSFLOWER_DESTROY: + err = mlx5e_delete_flower(netdev, priv, + flower, MLX5E_TC_EGRESS); + break; + case TC_CLSFLOWER_STATS: + err = mlx5e_stats_flower(netdev, priv, + flower, MLX5E_TC_EGRESS); + break; + default: + err = -EOPNOTSUPP; + } + + return err; } static int mlx5e_rep_indr_setup_block_cb(enum tc_setup_type type, @@ -1020,19 +1040,6 @@ mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv, } } -static int mlx5e_rep_setup_tc_cb_egdev(enum tc_setup_type type, void *type_data, - void *cb_priv) -{ - struct mlx5e_priv *priv = cb_priv; - - switch (type) { - case TC_SETUP_CLSFLOWER: - return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_EGRESS); - default: - return -EOPNOTSUPP; - } -} - static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { @@ -1516,24 +1523,16 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) uplink_rpriv = mlx5_eswitch_get_uplink_priv(dev->priv.eswitch, REP_ETH); upriv = netdev_priv(uplink_rpriv->netdev); - err = tc_setup_cb_egdev_register(netdev, mlx5e_rep_setup_tc_cb_egdev, - upriv); - if (err) - goto err_neigh_cleanup; err = register_netdev(netdev); if (err) { pr_warn("Failed to register representor netdev for vport %d\n", rep->vport); - goto err_egdev_cleanup; + goto err_neigh_cleanup; } return 0; -err_egdev_cleanup: - tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb_egdev, - upriv); - err_neigh_cleanup: mlx5e_rep_neigh_cleanup(rpriv); @@ -1560,8 +1559,6 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep) uplink_rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch, REP_ETH); upriv = netdev_priv(uplink_rpriv->netdev); - tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb_egdev, - upriv); mlx5e_rep_neigh_cleanup(rpriv); mlx5e_detach_netdev(priv); mlx5e_destroy_netdev(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 3271eda8098b..6ca94a2f167f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2906,6 +2906,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, struct net_device *out_dev; out_dev = tcf_mirred_dev(a); + if (!out_dev) { + /* out_dev is NULL when filters with + * non-existing mirred device are replayed to + * the driver. + */ + return -EINVAL; + } if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) { NL_SET_ERR_MSG_MOD(extack, @@ -2932,6 +2939,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; /* attr->out_rep is resolved when we handle encap */ + } else if (parse_attr->filter_dev != priv->netdev) { + /* All mlx5 devices are called to configure + * high level device filters. Therefore, the + * *attempt* to install a filter on invalid + * eswitch should not trigger an explicit error + */ + return -EINVAL; } else { NL_SET_ERR_MSG_MOD(extack, "devices are not on same switch HW, can't offload forwarding"); -- cgit v1.2.3 From ea7162ac3a33a44a7bb0b5793057015d25617e68 Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Wed, 14 Nov 2018 15:41:50 +0200 Subject: net/mlx5e: Refactor VXLAN tunnel encap offloading code Separates the vxlan header encap logic from the general ipv4/6 encapsulation methods, thus allowing the common IP encap/decap code to branch in downstream patch to multiple IP tunnels. Code refactoring with no functional change. Signed-off-by: Oz Shlomo Reviewed-by: Eli Britstein Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 189 +++++++++--------------- 1 file changed, 70 insertions(+), 119 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 6ca94a2f167f..ed04ab8287e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2416,70 +2416,25 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, return 0; } -static void gen_vxlan_header_ipv4(struct net_device *out_dev, - char buf[], int encap_size, - unsigned char h_dest[ETH_ALEN], - u8 tos, u8 ttl, - __be32 daddr, - __be32 saddr, - __be16 udp_dst_port, - __be32 vx_vni) -{ - struct ethhdr *eth = (struct ethhdr *)buf; - struct iphdr *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr)); - struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr)); - struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); - - memset(buf, 0, encap_size); - - ether_addr_copy(eth->h_dest, h_dest); - ether_addr_copy(eth->h_source, out_dev->dev_addr); - eth->h_proto = htons(ETH_P_IP); - - ip->daddr = daddr; - ip->saddr = saddr; - - ip->tos = tos; - ip->ttl = ttl; - ip->protocol = IPPROTO_UDP; - ip->version = 0x4; - ip->ihl = 0x5; +static int mlx5e_gen_vxlan_header(char buf[], struct ip_tunnel_key *tun_key) +{ + __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); + struct udphdr *udp = (struct udphdr *)(buf); + struct vxlanhdr *vxh = (struct vxlanhdr *) + ((char *)udp + sizeof(struct udphdr)); - udp->dest = udp_dst_port; + udp->dest = tun_key->tp_dst; vxh->vx_flags = VXLAN_HF_VNI; - vxh->vx_vni = vxlan_vni_field(vx_vni); + vxh->vx_vni = vxlan_vni_field(tun_id); + + return 0; } -static void gen_vxlan_header_ipv6(struct net_device *out_dev, - char buf[], int encap_size, - unsigned char h_dest[ETH_ALEN], - u8 tos, u8 ttl, - struct in6_addr *daddr, - struct in6_addr *saddr, - __be16 udp_dst_port, - __be32 vx_vni) +static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto, + struct ip_tunnel_key *tun_key) { - struct ethhdr *eth = (struct ethhdr *)buf; - struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr)); - struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr)); - struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); - - memset(buf, 0, encap_size); - - ether_addr_copy(eth->h_dest, h_dest); - ether_addr_copy(eth->h_source, out_dev->dev_addr); - eth->h_proto = htons(ETH_P_IPV6); - - ip6_flow_hdr(ip6h, tos, 0); - /* the HW fills up ipv6 payload len */ - ip6h->nexthdr = IPPROTO_UDP; - ip6h->hop_limit = ttl; - ip6h->daddr = *daddr; - ip6h->saddr = *saddr; - - udp->dest = udp_dst_port; - vxh->vx_flags = VXLAN_HF_VNI; - vxh->vx_vni = vxlan_vni_field(vx_vni); + *ip_proto = IPPROTO_UDP; + return mlx5e_gen_vxlan_header(buf, tun_key); } static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, @@ -2487,13 +2442,17 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); - int ipv4_encap_size = ETH_HLEN + sizeof(struct iphdr) + VXLAN_HLEN; + int ipv4_encap_size = ETH_HLEN + + sizeof(struct iphdr) + + VXLAN_HLEN; struct ip_tunnel_key *tun_key = &e->tun_info.key; struct net_device *out_dev; struct neighbour *n = NULL; struct flowi4 fl4 = {}; - u8 nud_state, tos, ttl; char *encap_header; + struct ethhdr *eth; + u8 nud_state, ttl; + struct iphdr *ip; int err; if (max_encap_size < ipv4_encap_size) { @@ -2506,22 +2465,11 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, if (!encap_header) return -ENOMEM; - switch (e->tunnel_type) { - case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: - fl4.flowi4_proto = IPPROTO_UDP; - fl4.fl4_dport = tun_key->tp_dst; - break; - default: - err = -EOPNOTSUPP; - goto free_encap; - } - - tos = tun_key->tos; - ttl = tun_key->ttl; - + /* add the IP fields */ fl4.flowi4_tos = tun_key->tos; fl4.daddr = tun_key->u.ipv4.dst; fl4.saddr = tun_key->u.ipv4.src; + ttl = tun_key->ttl; err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, &fl4, &n, &ttl); @@ -2536,7 +2484,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); e->out_dev = out_dev; - /* It's importent to add the neigh to the hash table before checking + /* It's important to add the neigh to the hash table before checking * the neigh validity state. So if we'll get a notification, in case the * neigh changes it's validity state, we would find the relevant neigh * in the hash. @@ -2550,18 +2498,27 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, ether_addr_copy(e->h_dest, n->ha); read_unlock_bh(&n->lock); - switch (e->tunnel_type) { - case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: - gen_vxlan_header_ipv4(out_dev, encap_header, - ipv4_encap_size, e->h_dest, tos, ttl, - fl4.daddr, - fl4.saddr, tun_key->tp_dst, - tunnel_id_to_key32(tun_key->tun_id)); - break; - default: - err = -EOPNOTSUPP; + /* add ethernet header */ + eth = (struct ethhdr *)encap_header; + ether_addr_copy(eth->h_dest, e->h_dest); + ether_addr_copy(eth->h_source, out_dev->dev_addr); + eth->h_proto = htons(ETH_P_IP); + + /* add ip header */ + ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr)); + ip->tos = tun_key->tos; + ip->version = 0x4; + ip->ihl = 0x5; + ip->ttl = ttl; + ip->daddr = fl4.daddr; + ip->saddr = fl4.saddr; + + /* add tunneling protocol header */ + err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr), + &ip->protocol, tun_key); + if (err) goto destroy_neigh_entry; - } + e->encap_size = ipv4_encap_size; e->encap_header = encap_header; @@ -2598,13 +2555,17 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); - int ipv6_encap_size = ETH_HLEN + sizeof(struct ipv6hdr) + VXLAN_HLEN; + int ipv6_encap_size = ETH_HLEN + + sizeof(struct ipv6hdr) + + VXLAN_HLEN; struct ip_tunnel_key *tun_key = &e->tun_info.key; struct net_device *out_dev; struct neighbour *n = NULL; struct flowi6 fl6 = {}; - u8 nud_state, tos, ttl; + struct ipv6hdr *ip6h; char *encap_header; + struct ethhdr *eth; + u8 nud_state, ttl; int err; if (max_encap_size < ipv6_encap_size) { @@ -2617,17 +2578,6 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, if (!encap_header) return -ENOMEM; - switch (e->tunnel_type) { - case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: - fl6.flowi6_proto = IPPROTO_UDP; - fl6.fl6_dport = tun_key->tp_dst; - break; - default: - err = -EOPNOTSUPP; - goto free_encap; - } - - tos = tun_key->tos; ttl = tun_key->ttl; fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label); @@ -2661,18 +2611,25 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, ether_addr_copy(e->h_dest, n->ha); read_unlock_bh(&n->lock); - switch (e->tunnel_type) { - case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: - gen_vxlan_header_ipv6(out_dev, encap_header, - ipv6_encap_size, e->h_dest, tos, ttl, - &fl6.daddr, - &fl6.saddr, tun_key->tp_dst, - tunnel_id_to_key32(tun_key->tun_id)); - break; - default: - err = -EOPNOTSUPP; + /* add ethernet header */ + eth = (struct ethhdr *)encap_header; + ether_addr_copy(eth->h_dest, e->h_dest); + ether_addr_copy(eth->h_source, out_dev->dev_addr); + eth->h_proto = htons(ETH_P_IPV6); + + /* add ip header */ + ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr)); + ip6_flow_hdr(ip6h, tun_key->tos, 0); + /* the HW fills up ipv6 payload len */ + ip6h->hop_limit = ttl; + ip6h->daddr = fl6.daddr; + ip6h->saddr = fl6.saddr; + + /* add tunneling protocol header */ + err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr), + &ip6h->nexthdr, tun_key); + if (err) goto destroy_neigh_entry; - } e->encap_size = ipv6_encap_size; e->encap_header = encap_header; @@ -2731,17 +2688,11 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, uintptr_t hash_key; bool found = false; - /* udp dst port must be set */ - if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst))) - goto vxlan_encap_offload_err; - - /* setting udp src port isn't supported */ - if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src))) { -vxlan_encap_offload_err: + if (!MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) { NL_SET_ERR_MSG_MOD(extack, - "must set udp dst port and not set udp src port"); + "vxlan HW offloading is not supported"); netdev_warn(priv->netdev, - "must set udp dst port and not set udp src port\n"); + "vxlan HW offloading is not supported\n"); return -EOPNOTSUPP; } -- cgit v1.2.3 From 4d70564d1c9e812b83f30c8dc23cd506abfcfc08 Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Wed, 14 Nov 2018 15:41:50 +0200 Subject: net/mlx5e: Refactor VXLAN tunnel decap offloading code Separates the vxlan header match handling from the matching on the general fields of ipv4/6 tunnels, thus allowing the common IP tunnel match code to branch in down stream patch, to multiple IP tunnels. This patch doesn't add any functionality. Signed-off-by: Oz Shlomo Reviewed-by: Eli Britstein Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 90 ++++++++++++++----------- 1 file changed, 51 insertions(+), 39 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index ed04ab8287e9..74823cab1d16 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1193,21 +1193,59 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, mlx5e_tc_del_nic_flow(priv, flow); } -static void parse_vxlan_attr(struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f) +static int parse_tunnel_vxlan_attr(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v) { - void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - outer_headers); - void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, - outer_headers); - void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + struct netlink_ext_ack *extack = f->common.extack; + struct flow_dissector_key_ports *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_PORTS, + f->key); + struct flow_dissector_key_ports *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_PORTS, + f->mask); + void *misc_c = MLX5_ADDR_OF(fte_match_param, + spec->match_criteria, misc_parameters); - void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + void *misc_v = MLX5_ADDR_OF(fte_match_param, + spec->match_value, misc_parameters); + /* Full udp dst port must be given */ + if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS) || + memchr_inv(&mask->dst, 0xff, sizeof(mask->dst))) { + NL_SET_ERR_MSG_MOD(extack, + "VXLAN decap filter must include enc_dst_port condition"); + netdev_warn(priv->netdev, + "VXLAN decap filter must include enc_dst_port condition\n"); + return -EOPNOTSUPP; + } + + /* udp dst port must be knonwn as a VXLAN port */ + if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->dst))) { + NL_SET_ERR_MSG_MOD(extack, + "Matched UDP port is not registered as a VXLAN port"); + netdev_warn(priv->netdev, + "UDP port %d is not registered as a VXLAN port\n", + be16_to_cpu(key->dst)); + return -EOPNOTSUPP; + } + + /* dst UDP port is valid here */ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, ntohs(mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, ntohs(key->dst)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, ntohs(mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, ntohs(key->src)); + + /* match on VNI */ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { struct flow_dissector_key_keyid *key = skb_flow_dissector_target(f->dissector, @@ -1222,6 +1260,7 @@ static void parse_vxlan_attr(struct mlx5_flow_spec *spec, MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, be32_to_cpu(key->keyid)); } + return 0; } static int parse_tunnel_attr(struct mlx5e_priv *priv, @@ -1240,39 +1279,12 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, f->key); if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) { - struct flow_dissector_key_ports *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_PORTS, - f->key); - struct flow_dissector_key_ports *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_PORTS, - f->mask); + int err = 0; - /* Full udp dst port must be given */ - if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst))) + err = parse_tunnel_vxlan_attr(priv, spec, f, + headers_c, headers_v); + if (err) goto vxlan_match_offload_err; - - if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->dst)) && - MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) - parse_vxlan_attr(spec, f); - else { - NL_SET_ERR_MSG_MOD(extack, - "port isn't an offloaded vxlan udp dport"); - netdev_warn(priv->netdev, - "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->dst)); - return -EOPNOTSUPP; - } - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - udp_dport, ntohs(mask->dst)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - udp_dport, ntohs(key->dst)); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - udp_sport, ntohs(mask->src)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - udp_sport, ntohs(key->src)); } else { /* udp dst port must be given */ vxlan_match_offload_err: NL_SET_ERR_MSG_MOD(extack, -- cgit v1.2.3 From 54c177ca9c6efe5df516eefb886761b89a82eaf0 Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Wed, 14 Nov 2018 15:21:27 +0200 Subject: net/mlx5e: Branch according to classified tunnel type Currently the tunnel offloading encap/decap methods assumes that VXLAN is the sole tunneling protocol. Lay the infrastructure for supporting multiple tunneling protocols by branching according to the tunnel net device kind. Encap filters tunnel type is determined according to the egress/mirred net device. Decap filters classify the tunnel type according to the filter's ingress net device kind. Distinguish between the tunnel type as defined by the SW model and the FW reformat type that specifies the HW operation being made. Signed-off-by: Oz Shlomo Reviewed-by: Eli Britstein Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.h | 2 + drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 160 ++++++++++++++++------- 2 files changed, 114 insertions(+), 48 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index c078c6703dc7..2c8798332c32 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -149,6 +149,8 @@ struct mlx5e_encap_entry { struct net_device *out_dev; int tunnel_type; + int tunnel_hlen; + int reformat_type; u8 flags; char *encap_header; int encap_size; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 74823cab1d16..7d7f490d8ff1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -79,6 +79,13 @@ enum { #define MLX5E_TC_MAX_SPLITS 1 +enum { + MLX5E_TC_TUNNEL_TYPE_UNKNOWN, + MLX5E_TC_TUNNEL_TYPE_VXLAN +}; + +static int mlx5e_get_tunnel_type(struct net_device *tunnel_dev); + struct mlx5e_tc_flow { struct rhash_head node; struct mlx5e_priv *priv; @@ -677,6 +684,14 @@ static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv, } } +static const char *mlx5e_netdev_kind(struct net_device *dev) +{ + if (dev->rtnl_link_ops) + return dev->rtnl_link_ops->kind; + else + return ""; +} + static int mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr, @@ -1037,7 +1052,8 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow; int err; - err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type, + err = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, e->encap_size, e->encap_header, MLX5_FLOW_NAMESPACE_FDB, &e->encap_id); @@ -1265,7 +1281,8 @@ static int parse_tunnel_vxlan_attr(struct mlx5e_priv *priv, static int parse_tunnel_attr(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f) + struct tc_cls_flower_offload *f, + struct net_device *filter_dev) { struct netlink_ext_ack *extack = f->common.extack; void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, @@ -1277,20 +1294,28 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, skb_flow_dissector_target(f->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL, f->key); + int tunnel_type; + int err = 0; - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) { - int err = 0; - + tunnel_type = mlx5e_get_tunnel_type(filter_dev); + if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { err = parse_tunnel_vxlan_attr(priv, spec, f, headers_c, headers_v); - if (err) - goto vxlan_match_offload_err; - } else { /* udp dst port must be given */ -vxlan_match_offload_err: + } else { NL_SET_ERR_MSG_MOD(extack, - "IP tunnel decap offload supported only for vxlan, must set UDP dport"); + "decapsulation offload is not supported"); netdev_warn(priv->netdev, - "IP tunnel decap offload supported only for vxlan, must set UDP dport\n"); + "decapsulation offload is not supported for %s net device (%d)\n", + mlx5e_netdev_kind(filter_dev), tunnel_type); + return -EOPNOTSUPP; + } + + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "failed to parse tunnel attributes"); + netdev_warn(priv->netdev, + "failed to parse %s tunnel attributes (%d)\n", + mlx5e_netdev_kind(filter_dev), tunnel_type); return -EOPNOTSUPP; } @@ -1395,6 +1420,7 @@ vxlan_match_offload_err: static int __parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct tc_cls_flower_offload *f, + struct net_device *filter_dev, u8 *match_level) { struct netlink_ext_ack *extack = f->common.extack; @@ -1446,7 +1472,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, switch (key->addr_type) { case FLOW_DISSECTOR_KEY_IPV4_ADDRS: case FLOW_DISSECTOR_KEY_IPV6_ADDRS: - if (parse_tunnel_attr(priv, spec, f)) + if (parse_tunnel_attr(priv, spec, f, filter_dev)) return -EOPNOTSUPP; break; default: @@ -1788,7 +1814,8 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f) + struct tc_cls_flower_offload *f, + struct net_device *filter_dev) { struct netlink_ext_ack *extack = f->common.extack; struct mlx5_core_dev *dev = priv->mdev; @@ -1798,7 +1825,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv, u8 match_level; int err; - err = __parse_cls_flower(priv, spec, f, &match_level); + err = __parse_cls_flower(priv, spec, f, filter_dev, &match_level); if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) { rep = rpriv->rep; @@ -2443,10 +2470,21 @@ static int mlx5e_gen_vxlan_header(char buf[], struct ip_tunnel_key *tun_key) } static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto, - struct ip_tunnel_key *tun_key) + struct mlx5e_encap_entry *e) { - *ip_proto = IPPROTO_UDP; - return mlx5e_gen_vxlan_header(buf, tun_key); + int err = 0; + struct ip_tunnel_key *key = &e->tun_info.key; + + if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { + *ip_proto = IPPROTO_UDP; + err = mlx5e_gen_vxlan_header(buf, key); + } else { + pr_warn("mlx5: Cannot generate tunnel header for tunnel type (%d)\n" + , e->tunnel_type); + err = -EOPNOTSUPP; + } + + return err; } static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, @@ -2456,7 +2494,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); int ipv4_encap_size = ETH_HLEN + sizeof(struct iphdr) + - VXLAN_HLEN; + e->tunnel_hlen; struct ip_tunnel_key *tun_key = &e->tun_info.key; struct net_device *out_dev; struct neighbour *n = NULL; @@ -2527,7 +2565,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, /* add tunneling protocol header */ err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr), - &ip->protocol, tun_key); + &ip->protocol, e); if (err) goto destroy_neigh_entry; @@ -2540,7 +2578,8 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, goto out; } - err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type, + err = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, ipv4_encap_size, encap_header, MLX5_FLOW_NAMESPACE_FDB, &e->encap_id); @@ -2569,7 +2608,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); int ipv6_encap_size = ETH_HLEN + sizeof(struct ipv6hdr) + - VXLAN_HLEN; + e->tunnel_hlen; struct ip_tunnel_key *tun_key = &e->tun_info.key; struct net_device *out_dev; struct neighbour *n = NULL; @@ -2639,7 +2678,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, /* add tunneling protocol header */ err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr), - &ip6h->nexthdr, tun_key); + &ip6h->nexthdr, e); if (err) goto destroy_neigh_entry; @@ -2652,7 +2691,8 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, goto out; } - err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type, + err = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, ipv6_encap_size, encap_header, MLX5_FLOW_NAMESPACE_FDB, &e->encap_id); @@ -2674,14 +2714,52 @@ out: return err; } +static int mlx5e_get_tunnel_type(struct net_device *tunnel_dev) +{ + if (netif_is_vxlan(tunnel_dev)) + return MLX5E_TC_TUNNEL_TYPE_VXLAN; + else + return MLX5E_TC_TUNNEL_TYPE_UNKNOWN; +} + bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, struct net_device *netdev) { - if (netif_is_vxlan(netdev) && + int tunnel_type = mlx5e_get_tunnel_type(netdev); + + if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN && MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) return true; + else + return false; +} - return false; +static int mlx5e_init_tunnel_attr(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + e->tunnel_type = mlx5e_get_tunnel_type(tunnel_dev); + + if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { + int dst_port = be16_to_cpu(e->tun_info.key.tp_dst); + + if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) { + NL_SET_ERR_MSG_MOD(extack, + "vxlan udp dport was not registered with the HW"); + netdev_warn(priv->netdev, + "%d isn't an offloaded vxlan udp dport\n", + dst_port); + return -EOPNOTSUPP; + } + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; + e->tunnel_hlen = VXLAN_HLEN; + } else { + e->reformat_type = -1; + e->tunnel_hlen = -1; + return -EOPNOTSUPP; + } + return 0; } static int mlx5e_attach_encap(struct mlx5e_priv *priv, @@ -2696,28 +2774,9 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct ip_tunnel_key *key = &tun_info->key; struct mlx5e_encap_entry *e; - int tunnel_type, err = 0; uintptr_t hash_key; bool found = false; - - if (!MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) { - NL_SET_ERR_MSG_MOD(extack, - "vxlan HW offloading is not supported"); - netdev_warn(priv->netdev, - "vxlan HW offloading is not supported\n"); - return -EOPNOTSUPP; - } - - if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->tp_dst)) && - MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) { - tunnel_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; - } else { - NL_SET_ERR_MSG_MOD(extack, - "port isn't an offloaded vxlan udp dport"); - netdev_warn(priv->netdev, - "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->tp_dst)); - return -EOPNOTSUPP; - } + int err = 0; hash_key = hash_encap_info(key); @@ -2738,7 +2797,10 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, return -ENOMEM; e->tun_info = *tun_info; - e->tunnel_type = tunnel_type; + err = mlx5e_init_tunnel_attr(mirred_dev, priv, e, extack); + if (err) + goto out_err; + INIT_LIST_HEAD(&e->flows); if (family == AF_INET) @@ -3063,7 +3125,8 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv, goto out; parse_attr->filter_dev = filter_dev; flow->esw_attr->parse_attr = parse_attr; - err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, f); + err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, + f, filter_dev); if (err) goto err_free; @@ -3116,7 +3179,8 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv, goto out; parse_attr->filter_dev = filter_dev; - err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, f); + err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, + f, filter_dev); if (err) goto err_free; -- cgit v1.2.3 From 101f4de9dd521c6d06dfdacaa35e506a8db8494b Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Sun, 2 Dec 2018 14:43:27 +0200 Subject: net/mlx5e: Move TC tunnel offloading code to separate source file Move tunnel offloading related code to a separate source file for better code maintainability. Code refactoring with no functional change. Signed-off-by: Oz Shlomo Reviewed-by: Eli Britstein Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +- .../net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 496 ++++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/en/tc_tun.h | 43 ++ drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 500 +-------------------- drivers/net/ethernet/mellanox/mlx5/core/en_tc.h | 2 - 6 files changed, 548 insertions(+), 496 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index d499b3d00348..40764d87413a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -30,7 +30,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o -mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o +mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o # # Core extra diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c new file mode 100644 index 000000000000..eaa43477e0ea --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -0,0 +1,496 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include +#include "lib/vxlan.h" +#include "en/tc_tun.h" + +static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct net_device **out_dev, + struct flowi4 *fl4, + struct neighbour **out_n, + u8 *out_ttl) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *uplink_rpriv; + struct rtable *rt; + struct neighbour *n = NULL; + +#if IS_ENABLED(CONFIG_INET) + int ret; + + rt = ip_route_output_key(dev_net(mirred_dev), fl4); + ret = PTR_ERR_OR_ZERO(rt); + if (ret) + return ret; +#else + return -EOPNOTSUPP; +#endif + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + /* if the egress device isn't on the same HW e-switch, we use the uplink */ + if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev)) + *out_dev = uplink_rpriv->netdev; + else + *out_dev = rt->dst.dev; + + if (!(*out_ttl)) + *out_ttl = ip4_dst_hoplimit(&rt->dst); + n = dst_neigh_lookup(&rt->dst, &fl4->daddr); + ip_rt_put(rt); + if (!n) + return -ENOMEM; + + *out_n = n; + return 0; +} + +static const char *mlx5e_netdev_kind(struct net_device *dev) +{ + if (dev->rtnl_link_ops) + return dev->rtnl_link_ops->kind; + else + return ""; +} + +static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct net_device **out_dev, + struct flowi6 *fl6, + struct neighbour **out_n, + u8 *out_ttl) +{ + struct neighbour *n = NULL; + struct dst_entry *dst; + +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + int ret; + + ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst, + fl6); + if (ret < 0) + return ret; + + if (!(*out_ttl)) + *out_ttl = ip6_dst_hoplimit(dst); + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + /* if the egress device isn't on the same HW e-switch, we use the uplink */ + if (!switchdev_port_same_parent_id(priv->netdev, dst->dev)) + *out_dev = uplink_rpriv->netdev; + else + *out_dev = dst->dev; +#else + return -EOPNOTSUPP; +#endif + + n = dst_neigh_lookup(dst, &fl6->daddr); + dst_release(dst); + if (!n) + return -ENOMEM; + + *out_n = n; + return 0; +} + +static int mlx5e_gen_vxlan_header(char buf[], struct ip_tunnel_key *tun_key) +{ + __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); + struct udphdr *udp = (struct udphdr *)(buf); + struct vxlanhdr *vxh = (struct vxlanhdr *) + ((char *)udp + sizeof(struct udphdr)); + + udp->dest = tun_key->tp_dst; + vxh->vx_flags = VXLAN_HF_VNI; + vxh->vx_vni = vxlan_vni_field(tun_id); + + return 0; +} + +static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto, + struct mlx5e_encap_entry *e) +{ + int err = 0; + struct ip_tunnel_key *key = &e->tun_info.key; + + if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { + *ip_proto = IPPROTO_UDP; + err = mlx5e_gen_vxlan_header(buf, key); + } else { + pr_warn("mlx5: Cannot generate tunnel header for tunnel type (%d)\n" + , e->tunnel_type); + err = -EOPNOTSUPP; + } + + return err; +} + +int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) +{ + int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + int ipv4_encap_size = ETH_HLEN + + sizeof(struct iphdr) + + e->tunnel_hlen; + struct ip_tunnel_key *tun_key = &e->tun_info.key; + struct net_device *out_dev; + struct neighbour *n = NULL; + struct flowi4 fl4 = {}; + char *encap_header; + struct ethhdr *eth; + u8 nud_state, ttl; + struct iphdr *ip; + int err; + + if (max_encap_size < ipv4_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv4_encap_size, max_encap_size); + return -EOPNOTSUPP; + } + + encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL); + if (!encap_header) + return -ENOMEM; + + /* add the IP fields */ + fl4.flowi4_tos = tun_key->tos; + fl4.daddr = tun_key->u.ipv4.dst; + fl4.saddr = tun_key->u.ipv4.src; + ttl = tun_key->ttl; + + err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, + &fl4, &n, &ttl); + if (err) + goto free_encap; + + /* used by mlx5e_detach_encap to lookup a neigh hash table + * entry in the neigh hash table when a user deletes a rule + */ + e->m_neigh.dev = n->dev; + e->m_neigh.family = n->ops->family; + memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + e->out_dev = out_dev; + + /* It's important to add the neigh to the hash table before checking + * the neigh validity state. So if we'll get a notification, in case the + * neigh changes it's validity state, we would find the relevant neigh + * in the hash. + */ + err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); + if (err) + goto free_encap; + + read_lock_bh(&n->lock); + nud_state = n->nud_state; + ether_addr_copy(e->h_dest, n->ha); + read_unlock_bh(&n->lock); + + /* add ethernet header */ + eth = (struct ethhdr *)encap_header; + ether_addr_copy(eth->h_dest, e->h_dest); + ether_addr_copy(eth->h_source, out_dev->dev_addr); + eth->h_proto = htons(ETH_P_IP); + + /* add ip header */ + ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr)); + ip->tos = tun_key->tos; + ip->version = 0x4; + ip->ihl = 0x5; + ip->ttl = ttl; + ip->daddr = fl4.daddr; + ip->saddr = fl4.saddr; + + /* add tunneling protocol header */ + err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr), + &ip->protocol, e); + if (err) + goto destroy_neigh_entry; + + e->encap_size = ipv4_encap_size; + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); + err = -EAGAIN; + goto out; + } + + err = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, + ipv4_encap_size, encap_header, + MLX5_FLOW_NAMESPACE_FDB, + &e->encap_id); + if (err) + goto destroy_neigh_entry; + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); + neigh_release(n); + return err; + +destroy_neigh_entry: + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); +free_encap: + kfree(encap_header); +out: + if (n) + neigh_release(n); + return err; +} + +int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) +{ + int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + int ipv6_encap_size = ETH_HLEN + + sizeof(struct ipv6hdr) + + e->tunnel_hlen; + struct ip_tunnel_key *tun_key = &e->tun_info.key; + struct net_device *out_dev; + struct neighbour *n = NULL; + struct flowi6 fl6 = {}; + struct ipv6hdr *ip6h; + char *encap_header; + struct ethhdr *eth; + u8 nud_state, ttl; + int err; + + if (max_encap_size < ipv6_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv6_encap_size, max_encap_size); + return -EOPNOTSUPP; + } + + encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL); + if (!encap_header) + return -ENOMEM; + + ttl = tun_key->ttl; + + fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label); + fl6.daddr = tun_key->u.ipv6.dst; + fl6.saddr = tun_key->u.ipv6.src; + + err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, + &fl6, &n, &ttl); + if (err) + goto free_encap; + + /* used by mlx5e_detach_encap to lookup a neigh hash table + * entry in the neigh hash table when a user deletes a rule + */ + e->m_neigh.dev = n->dev; + e->m_neigh.family = n->ops->family; + memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + e->out_dev = out_dev; + + /* It's importent to add the neigh to the hash table before checking + * the neigh validity state. So if we'll get a notification, in case the + * neigh changes it's validity state, we would find the relevant neigh + * in the hash. + */ + err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); + if (err) + goto free_encap; + + read_lock_bh(&n->lock); + nud_state = n->nud_state; + ether_addr_copy(e->h_dest, n->ha); + read_unlock_bh(&n->lock); + + /* add ethernet header */ + eth = (struct ethhdr *)encap_header; + ether_addr_copy(eth->h_dest, e->h_dest); + ether_addr_copy(eth->h_source, out_dev->dev_addr); + eth->h_proto = htons(ETH_P_IPV6); + + /* add ip header */ + ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr)); + ip6_flow_hdr(ip6h, tun_key->tos, 0); + /* the HW fills up ipv6 payload len */ + ip6h->hop_limit = ttl; + ip6h->daddr = fl6.daddr; + ip6h->saddr = fl6.saddr; + + /* add tunneling protocol header */ + err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr), + &ip6h->nexthdr, e); + if (err) + goto destroy_neigh_entry; + + e->encap_size = ipv6_encap_size; + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); + err = -EAGAIN; + goto out; + } + + err = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, + ipv6_encap_size, encap_header, + MLX5_FLOW_NAMESPACE_FDB, + &e->encap_id); + if (err) + goto destroy_neigh_entry; + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); + neigh_release(n); + return err; + +destroy_neigh_entry: + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); +free_encap: + kfree(encap_header); +out: + if (n) + neigh_release(n); + return err; +} + +int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev) +{ + if (netif_is_vxlan(tunnel_dev)) + return MLX5E_TC_TUNNEL_TYPE_VXLAN; + else + return MLX5E_TC_TUNNEL_TYPE_UNKNOWN; +} + +bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, + struct net_device *netdev) +{ + int tunnel_type = mlx5e_tc_tun_get_type(netdev); + + if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN && + MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) + return true; + else + return false; +} + +int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + e->tunnel_type = mlx5e_tc_tun_get_type(tunnel_dev); + + if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { + int dst_port = be16_to_cpu(e->tun_info.key.tp_dst); + + if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) { + NL_SET_ERR_MSG_MOD(extack, + "vxlan udp dport was not registered with the HW"); + netdev_warn(priv->netdev, + "%d isn't an offloaded vxlan udp dport\n", + dst_port); + return -EOPNOTSUPP; + } + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; + e->tunnel_hlen = VXLAN_HLEN; + } else { + e->reformat_type = -1; + e->tunnel_hlen = -1; + return -EOPNOTSUPP; + } + return 0; +} + +static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v) +{ + struct netlink_ext_ack *extack = f->common.extack; + struct flow_dissector_key_ports *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_PORTS, + f->key); + struct flow_dissector_key_ports *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_PORTS, + f->mask); + void *misc_c = MLX5_ADDR_OF(fte_match_param, + spec->match_criteria, + misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, + spec->match_value, + misc_parameters); + + /* Full udp dst port must be given */ + if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS) || + memchr_inv(&mask->dst, 0xff, sizeof(mask->dst))) { + NL_SET_ERR_MSG_MOD(extack, + "VXLAN decap filter must include enc_dst_port condition"); + netdev_warn(priv->netdev, + "VXLAN decap filter must include enc_dst_port condition\n"); + return -EOPNOTSUPP; + } + + /* udp dst port must be knonwn as a VXLAN port */ + if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->dst))) { + NL_SET_ERR_MSG_MOD(extack, + "Matched UDP port is not registered as a VXLAN port"); + netdev_warn(priv->netdev, + "UDP port %d is not registered as a VXLAN port\n", + be16_to_cpu(key->dst)); + return -EOPNOTSUPP; + } + + /* dst UDP port is valid here */ + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, ntohs(mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, ntohs(key->dst)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, ntohs(mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, ntohs(key->src)); + + /* match on VNI */ + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_dissector_key_keyid *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + f->key); + struct flow_dissector_key_keyid *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + f->mask); + MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni, + be32_to_cpu(mask->keyid)); + MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, + be32_to_cpu(key->keyid)); + } + return 0; +} + +int mlx5e_tc_tun_parse(struct net_device *filter_dev, + struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v) +{ + int tunnel_type; + int err = 0; + + tunnel_type = mlx5e_tc_tun_get_type(filter_dev); + if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { + err = mlx5e_tc_tun_parse_vxlan(priv, spec, f, + headers_c, headers_v); + } else { + netdev_warn(priv->netdev, + "decapsulation offload is not supported for %s net device (%d)\n", + mlx5e_netdev_kind(filter_dev), tunnel_type); + return -EOPNOTSUPP; + } + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h new file mode 100644 index 000000000000..ad4fc93b17a1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TC_TUNNEL_H__ +#define __MLX5_EN_TC_TUNNEL_H__ + +#include +#include +#include +#include +#include "en.h" +#include "en_rep.h" + +enum { + MLX5E_TC_TUNNEL_TYPE_UNKNOWN, + MLX5E_TC_TUNNEL_TYPE_VXLAN +}; + +int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack); + +int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e); + +int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e); + +int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev); +bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, + struct net_device *netdev); + +int mlx5e_tc_tun_parse(struct net_device *filter_dev, + struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v); + +#endif //__MLX5_EN_TC_TUNNEL_H__ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 5cac4de435c9..85e51bd4147f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -42,6 +42,7 @@ #include "en.h" #include "en_rep.h" #include "en_tc.h" +#include "en/tc_tun.h" #include "fs_core.h" #define MLX5E_REP_PARAMS_LOG_SQ_SIZE \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 7d7f490d8ff1..abc200947e84 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -44,15 +44,14 @@ #include #include #include -#include #include #include "en.h" #include "en_rep.h" #include "en_tc.h" #include "eswitch.h" -#include "lib/vxlan.h" #include "fs_core.h" #include "en/port.h" +#include "en/tc_tun.h" struct mlx5_nic_flow_attr { u32 action; @@ -79,13 +78,6 @@ enum { #define MLX5E_TC_MAX_SPLITS 1 -enum { - MLX5E_TC_TUNNEL_TYPE_UNKNOWN, - MLX5E_TC_TUNNEL_TYPE_VXLAN -}; - -static int mlx5e_get_tunnel_type(struct net_device *tunnel_dev); - struct mlx5e_tc_flow { struct rhash_head node; struct mlx5e_priv *priv; @@ -684,14 +676,6 @@ static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv, } } -static const char *mlx5e_netdev_kind(struct net_device *dev) -{ - if (dev->rtnl_link_ops) - return dev->rtnl_link_ops->kind; - else - return ""; -} - static int mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr, @@ -1209,75 +1193,6 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, mlx5e_tc_del_nic_flow(priv, flow); } -static int parse_tunnel_vxlan_attr(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f, - void *headers_c, - void *headers_v) -{ - struct netlink_ext_ack *extack = f->common.extack; - struct flow_dissector_key_ports *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_PORTS, - f->key); - struct flow_dissector_key_ports *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_PORTS, - f->mask); - void *misc_c = MLX5_ADDR_OF(fte_match_param, - spec->match_criteria, - misc_parameters); - void *misc_v = MLX5_ADDR_OF(fte_match_param, - spec->match_value, - misc_parameters); - - /* Full udp dst port must be given */ - if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS) || - memchr_inv(&mask->dst, 0xff, sizeof(mask->dst))) { - NL_SET_ERR_MSG_MOD(extack, - "VXLAN decap filter must include enc_dst_port condition"); - netdev_warn(priv->netdev, - "VXLAN decap filter must include enc_dst_port condition\n"); - return -EOPNOTSUPP; - } - - /* udp dst port must be knonwn as a VXLAN port */ - if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->dst))) { - NL_SET_ERR_MSG_MOD(extack, - "Matched UDP port is not registered as a VXLAN port"); - netdev_warn(priv->netdev, - "UDP port %d is not registered as a VXLAN port\n", - be16_to_cpu(key->dst)); - return -EOPNOTSUPP; - } - - /* dst UDP port is valid here */ - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, ntohs(mask->dst)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, ntohs(key->dst)); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, ntohs(mask->src)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, ntohs(key->src)); - - /* match on VNI */ - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { - struct flow_dissector_key_keyid *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_KEYID, - f->key); - struct flow_dissector_key_keyid *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_KEYID, - f->mask); - MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni, - be32_to_cpu(mask->keyid)); - MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, - be32_to_cpu(key->keyid)); - } - return 0; -} static int parse_tunnel_attr(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, @@ -1294,29 +1209,14 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, skb_flow_dissector_target(f->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL, f->key); - int tunnel_type; int err = 0; - tunnel_type = mlx5e_get_tunnel_type(filter_dev); - if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { - err = parse_tunnel_vxlan_attr(priv, spec, f, - headers_c, headers_v); - } else { - NL_SET_ERR_MSG_MOD(extack, - "decapsulation offload is not supported"); - netdev_warn(priv->netdev, - "decapsulation offload is not supported for %s net device (%d)\n", - mlx5e_netdev_kind(filter_dev), tunnel_type); - return -EOPNOTSUPP; - } - + err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, + headers_c, headers_v); if (err) { NL_SET_ERR_MSG_MOD(extack, "failed to parse tunnel attributes"); - netdev_warn(priv->netdev, - "failed to parse %s tunnel attributes (%d)\n", - mlx5e_netdev_kind(filter_dev), tunnel_type); - return -EOPNOTSUPP; + return err; } if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { @@ -2359,45 +2259,6 @@ static inline int hash_encap_info(struct ip_tunnel_key *key) return jhash(key, sizeof(*key), 0); } -static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, - struct net_device *mirred_dev, - struct net_device **out_dev, - struct flowi4 *fl4, - struct neighbour **out_n, - u8 *out_ttl) -{ - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5e_rep_priv *uplink_rpriv; - struct rtable *rt; - struct neighbour *n = NULL; - -#if IS_ENABLED(CONFIG_INET) - int ret; - - rt = ip_route_output_key(dev_net(mirred_dev), fl4); - ret = PTR_ERR_OR_ZERO(rt); - if (ret) - return ret; -#else - return -EOPNOTSUPP; -#endif - uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - /* if the egress device isn't on the same HW e-switch, we use the uplink */ - if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev)) - *out_dev = uplink_rpriv->netdev; - else - *out_dev = rt->dst.dev; - - if (!(*out_ttl)) - *out_ttl = ip4_dst_hoplimit(&rt->dst); - n = dst_neigh_lookup(&rt->dst, &fl4->daddr); - ip_rt_put(rt); - if (!n) - return -ENOMEM; - - *out_n = n; - return 0; -} static bool is_merged_eswitch_dev(struct mlx5e_priv *priv, struct net_device *peer_netdev) @@ -2413,354 +2274,7 @@ static bool is_merged_eswitch_dev(struct mlx5e_priv *priv, (peer_priv->mdev->priv.eswitch->mode == SRIOV_OFFLOADS)); } -static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, - struct net_device *mirred_dev, - struct net_device **out_dev, - struct flowi6 *fl6, - struct neighbour **out_n, - u8 *out_ttl) -{ - struct neighbour *n = NULL; - struct dst_entry *dst; - -#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) - struct mlx5e_rep_priv *uplink_rpriv; - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - int ret; - - ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst, - fl6); - if (ret < 0) - return ret; - - if (!(*out_ttl)) - *out_ttl = ip6_dst_hoplimit(dst); - - uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - /* if the egress device isn't on the same HW e-switch, we use the uplink */ - if (!switchdev_port_same_parent_id(priv->netdev, dst->dev)) - *out_dev = uplink_rpriv->netdev; - else - *out_dev = dst->dev; -#else - return -EOPNOTSUPP; -#endif - - n = dst_neigh_lookup(dst, &fl6->daddr); - dst_release(dst); - if (!n) - return -ENOMEM; - - *out_n = n; - return 0; -} - -static int mlx5e_gen_vxlan_header(char buf[], struct ip_tunnel_key *tun_key) -{ - __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); - struct udphdr *udp = (struct udphdr *)(buf); - struct vxlanhdr *vxh = (struct vxlanhdr *) - ((char *)udp + sizeof(struct udphdr)); - - udp->dest = tun_key->tp_dst; - vxh->vx_flags = VXLAN_HF_VNI; - vxh->vx_vni = vxlan_vni_field(tun_id); - - return 0; -} - -static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto, - struct mlx5e_encap_entry *e) -{ - int err = 0; - struct ip_tunnel_key *key = &e->tun_info.key; - - if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { - *ip_proto = IPPROTO_UDP; - err = mlx5e_gen_vxlan_header(buf, key); - } else { - pr_warn("mlx5: Cannot generate tunnel header for tunnel type (%d)\n" - , e->tunnel_type); - err = -EOPNOTSUPP; - } - - return err; -} - -static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, - struct net_device *mirred_dev, - struct mlx5e_encap_entry *e) -{ - int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); - int ipv4_encap_size = ETH_HLEN + - sizeof(struct iphdr) + - e->tunnel_hlen; - struct ip_tunnel_key *tun_key = &e->tun_info.key; - struct net_device *out_dev; - struct neighbour *n = NULL; - struct flowi4 fl4 = {}; - char *encap_header; - struct ethhdr *eth; - u8 nud_state, ttl; - struct iphdr *ip; - int err; - - if (max_encap_size < ipv4_encap_size) { - mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", - ipv4_encap_size, max_encap_size); - return -EOPNOTSUPP; - } - - encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL); - if (!encap_header) - return -ENOMEM; - - /* add the IP fields */ - fl4.flowi4_tos = tun_key->tos; - fl4.daddr = tun_key->u.ipv4.dst; - fl4.saddr = tun_key->u.ipv4.src; - ttl = tun_key->ttl; - - err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, - &fl4, &n, &ttl); - if (err) - goto free_encap; - - /* used by mlx5e_detach_encap to lookup a neigh hash table - * entry in the neigh hash table when a user deletes a rule - */ - e->m_neigh.dev = n->dev; - e->m_neigh.family = n->ops->family; - memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); - e->out_dev = out_dev; - - /* It's important to add the neigh to the hash table before checking - * the neigh validity state. So if we'll get a notification, in case the - * neigh changes it's validity state, we would find the relevant neigh - * in the hash. - */ - err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); - if (err) - goto free_encap; - - read_lock_bh(&n->lock); - nud_state = n->nud_state; - ether_addr_copy(e->h_dest, n->ha); - read_unlock_bh(&n->lock); - - /* add ethernet header */ - eth = (struct ethhdr *)encap_header; - ether_addr_copy(eth->h_dest, e->h_dest); - ether_addr_copy(eth->h_source, out_dev->dev_addr); - eth->h_proto = htons(ETH_P_IP); - - /* add ip header */ - ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr)); - ip->tos = tun_key->tos; - ip->version = 0x4; - ip->ihl = 0x5; - ip->ttl = ttl; - ip->daddr = fl4.daddr; - ip->saddr = fl4.saddr; - - /* add tunneling protocol header */ - err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr), - &ip->protocol, e); - if (err) - goto destroy_neigh_entry; - - e->encap_size = ipv4_encap_size; - e->encap_header = encap_header; - - if (!(nud_state & NUD_VALID)) { - neigh_event_send(n, NULL); - err = -EAGAIN; - goto out; - } - - err = mlx5_packet_reformat_alloc(priv->mdev, - e->reformat_type, - ipv4_encap_size, encap_header, - MLX5_FLOW_NAMESPACE_FDB, - &e->encap_id); - if (err) - goto destroy_neigh_entry; - - e->flags |= MLX5_ENCAP_ENTRY_VALID; - mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); - neigh_release(n); - return err; - -destroy_neigh_entry: - mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); -free_encap: - kfree(encap_header); -out: - if (n) - neigh_release(n); - return err; -} - -static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, - struct net_device *mirred_dev, - struct mlx5e_encap_entry *e) -{ - int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); - int ipv6_encap_size = ETH_HLEN + - sizeof(struct ipv6hdr) + - e->tunnel_hlen; - struct ip_tunnel_key *tun_key = &e->tun_info.key; - struct net_device *out_dev; - struct neighbour *n = NULL; - struct flowi6 fl6 = {}; - struct ipv6hdr *ip6h; - char *encap_header; - struct ethhdr *eth; - u8 nud_state, ttl; - int err; - - if (max_encap_size < ipv6_encap_size) { - mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", - ipv6_encap_size, max_encap_size); - return -EOPNOTSUPP; - } - - encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL); - if (!encap_header) - return -ENOMEM; - - ttl = tun_key->ttl; - - fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label); - fl6.daddr = tun_key->u.ipv6.dst; - fl6.saddr = tun_key->u.ipv6.src; - - err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, - &fl6, &n, &ttl); - if (err) - goto free_encap; - - /* used by mlx5e_detach_encap to lookup a neigh hash table - * entry in the neigh hash table when a user deletes a rule - */ - e->m_neigh.dev = n->dev; - e->m_neigh.family = n->ops->family; - memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); - e->out_dev = out_dev; - - /* It's importent to add the neigh to the hash table before checking - * the neigh validity state. So if we'll get a notification, in case the - * neigh changes it's validity state, we would find the relevant neigh - * in the hash. - */ - err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); - if (err) - goto free_encap; - - read_lock_bh(&n->lock); - nud_state = n->nud_state; - ether_addr_copy(e->h_dest, n->ha); - read_unlock_bh(&n->lock); - - /* add ethernet header */ - eth = (struct ethhdr *)encap_header; - ether_addr_copy(eth->h_dest, e->h_dest); - ether_addr_copy(eth->h_source, out_dev->dev_addr); - eth->h_proto = htons(ETH_P_IPV6); - - /* add ip header */ - ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr)); - ip6_flow_hdr(ip6h, tun_key->tos, 0); - /* the HW fills up ipv6 payload len */ - ip6h->hop_limit = ttl; - ip6h->daddr = fl6.daddr; - ip6h->saddr = fl6.saddr; - - /* add tunneling protocol header */ - err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr), - &ip6h->nexthdr, e); - if (err) - goto destroy_neigh_entry; - - e->encap_size = ipv6_encap_size; - e->encap_header = encap_header; - - if (!(nud_state & NUD_VALID)) { - neigh_event_send(n, NULL); - err = -EAGAIN; - goto out; - } - - err = mlx5_packet_reformat_alloc(priv->mdev, - e->reformat_type, - ipv6_encap_size, encap_header, - MLX5_FLOW_NAMESPACE_FDB, - &e->encap_id); - if (err) - goto destroy_neigh_entry; - - e->flags |= MLX5_ENCAP_ENTRY_VALID; - mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); - neigh_release(n); - return err; - -destroy_neigh_entry: - mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); -free_encap: - kfree(encap_header); -out: - if (n) - neigh_release(n); - return err; -} - -static int mlx5e_get_tunnel_type(struct net_device *tunnel_dev) -{ - if (netif_is_vxlan(tunnel_dev)) - return MLX5E_TC_TUNNEL_TYPE_VXLAN; - else - return MLX5E_TC_TUNNEL_TYPE_UNKNOWN; -} - -bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, - struct net_device *netdev) -{ - int tunnel_type = mlx5e_get_tunnel_type(netdev); - - if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN && - MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) - return true; - else - return false; -} -static int mlx5e_init_tunnel_attr(struct net_device *tunnel_dev, - struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e, - struct netlink_ext_ack *extack) -{ - e->tunnel_type = mlx5e_get_tunnel_type(tunnel_dev); - - if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { - int dst_port = be16_to_cpu(e->tun_info.key.tp_dst); - - if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) { - NL_SET_ERR_MSG_MOD(extack, - "vxlan udp dport was not registered with the HW"); - netdev_warn(priv->netdev, - "%d isn't an offloaded vxlan udp dport\n", - dst_port); - return -EOPNOTSUPP; - } - e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; - e->tunnel_hlen = VXLAN_HLEN; - } else { - e->reformat_type = -1; - e->tunnel_hlen = -1; - return -EOPNOTSUPP; - } - return 0; -} static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct ip_tunnel_info *tun_info, @@ -2797,16 +2311,16 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, return -ENOMEM; e->tun_info = *tun_info; - err = mlx5e_init_tunnel_attr(mirred_dev, priv, e, extack); + err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); if (err) goto out_err; INIT_LIST_HEAD(&e->flows); if (family == AF_INET) - err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e); + err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e); else if (family == AF_INET6) - err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e); + err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e); if (err && err != -EAGAIN) goto out_err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 004e679a4f53..a15c08a35054 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -70,8 +70,6 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe); int mlx5e_tc_num_filters(struct mlx5e_priv *priv); -bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, - struct net_device *netdev); #else /* CONFIG_MLX5_ESWITCH */ static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } -- cgit v1.2.3 From 0621e6fc5ed2b6e58a2ba6904074e366f290b1d8 Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Wed, 21 Nov 2018 12:15:34 +0200 Subject: net: Add netif_is_gretap()/netif_is_ip6gretap() Changed the is_gretap_dev and is_ip6gretap_dev logic from structure comparison to string comparison of the rtnl_link_ops kind field. This approach aligns with the current identification methods and function names of vxlan and geneve network devices. Convert mlxsw to use these helpers and use them in downstream mlx5 patch. Signed-off-by: Oz Shlomo Reviewed-by: Eli Britstein Reviewed-by: Ido Schimmel Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c | 4 ++-- include/net/gre.h | 13 +++++++++++-- net/ipv4/ip_gre.c | 6 ------ net/ipv6/ip6_gre.c | 6 ------ 4 files changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c index d965fd275c90..ad5a9b9e1466 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -383,7 +383,7 @@ mlxsw_sp_span_entry_gretap4_deconfigure(struct mlxsw_sp_span_entry *span_entry) } static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap4 = { - .can_handle = is_gretap_dev, + .can_handle = netif_is_gretap, .parms = mlxsw_sp_span_entry_gretap4_parms, .configure = mlxsw_sp_span_entry_gretap4_configure, .deconfigure = mlxsw_sp_span_entry_gretap4_deconfigure, @@ -484,7 +484,7 @@ mlxsw_sp_span_entry_gretap6_deconfigure(struct mlxsw_sp_span_entry *span_entry) static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap6 = { - .can_handle = is_ip6gretap_dev, + .can_handle = netif_is_ip6gretap, .parms = mlxsw_sp_span_entry_gretap6_parms, .configure = mlxsw_sp_span_entry_gretap6_configure, .deconfigure = mlxsw_sp_span_entry_gretap6_deconfigure, diff --git a/include/net/gre.h b/include/net/gre.h index 797142eee9cd..b60f212c16c6 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -37,8 +37,17 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, bool *csum_err, __be16 proto, int nhs); -bool is_gretap_dev(const struct net_device *dev); -bool is_ip6gretap_dev(const struct net_device *dev); +static inline bool netif_is_gretap(const struct net_device *dev) +{ + return dev->rtnl_link_ops && + !strcmp(dev->rtnl_link_ops->kind, "gretap"); +} + +static inline bool netif_is_ip6gretap(const struct net_device *dev) +{ + return dev->rtnl_link_ops && + !strcmp(dev->rtnl_link_ops->kind, "ip6gretap"); +} static inline int gre_calc_hlen(__be16 o_flags) { diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 76a9a5f7a40e..c7a7bd58a23c 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1341,12 +1341,6 @@ static void ipgre_tap_setup(struct net_device *dev) ip_tunnel_setup(dev, gre_tap_net_id); } -bool is_gretap_dev(const struct net_device *dev) -{ - return dev->netdev_ops == &gre_tap_netdev_ops; -} -EXPORT_SYMBOL_GPL(is_gretap_dev); - static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 81b69bcee714..229e55c99021 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1885,12 +1885,6 @@ static void ip6gre_tap_setup(struct net_device *dev) netif_keep_dst(dev); } -bool is_ip6gretap_dev(const struct net_device *dev) -{ - return dev->netdev_ops == &ip6gre_tap_netdev_ops; -} -EXPORT_SYMBOL_GPL(is_ip6gretap_dev); - static bool ip6gre_netlink_encap_parms(struct nlattr *data[], struct ip_tunnel_encap *ipencap) { -- cgit v1.2.3 From df2ef3bff193229973830fd3fd8acf29fa92715e Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Mon, 29 Oct 2018 08:54:42 +0200 Subject: net/mlx5e: Add GRE protocol offloading Add HW offloading support for TC flower filters configured on gretap/ip6gretap net devices. Signed-off-by: Oz Shlomo Reviewed-by: Eli Britstein Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 87 ++++++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/en/tc_tun.h | 3 +- 2 files changed, 89 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index eaa43477e0ea..c1515f013501 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -2,6 +2,7 @@ /* Copyright (c) 2018 Mellanox Technologies. */ #include +#include #include "lib/vxlan.h" #include "en/tc_tun.h" @@ -109,6 +110,30 @@ static int mlx5e_gen_vxlan_header(char buf[], struct ip_tunnel_key *tun_key) return 0; } +static int mlx5e_gen_gre_header(char buf[], struct ip_tunnel_key *tun_key) +{ + __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); + int hdr_len; + struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf); + + /* the HW does not calculate GRE csum or sequences */ + if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ)) + return -EOPNOTSUPP; + + greh->protocol = htons(ETH_P_TEB); + + /* GRE key */ + hdr_len = gre_calc_hlen(tun_key->tun_flags); + greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags); + if (tun_key->tun_flags & TUNNEL_KEY) { + __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); + + *ptr = tun_id; + } + + return 0; +} + static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto, struct mlx5e_encap_entry *e) { @@ -118,6 +143,9 @@ static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto, if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { *ip_proto = IPPROTO_UDP; err = mlx5e_gen_vxlan_header(buf, key); + } else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { + *ip_proto = IPPROTO_GRE; + err = mlx5e_gen_gre_header(buf, key); } else { pr_warn("mlx5: Cannot generate tunnel header for tunnel type (%d)\n" , e->tunnel_type); @@ -358,6 +386,9 @@ int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev) { if (netif_is_vxlan(tunnel_dev)) return MLX5E_TC_TUNNEL_TYPE_VXLAN; + else if (netif_is_gretap(tunnel_dev) || + netif_is_ip6gretap(tunnel_dev)) + return MLX5E_TC_TUNNEL_TYPE_GRETAP; else return MLX5E_TC_TUNNEL_TYPE_UNKNOWN; } @@ -370,6 +401,9 @@ bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN && MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) return true; + else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP && + MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap)) + return true; else return false; } @@ -394,6 +428,9 @@ int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev, } e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; e->tunnel_hlen = VXLAN_HLEN; + } else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_NVGRE; + e->tunnel_hlen = gre_calc_hlen(e->tun_info.key.tun_flags); } else { e->reformat_type = -1; e->tunnel_hlen = -1; @@ -472,6 +509,53 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, return 0; } +static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *outer_headers_c, + void *outer_headers_v) +{ + void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + + if (!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap)) { + NL_SET_ERR_MSG_MOD(f->common.extack, + "GRE HW offloading is not supported"); + netdev_warn(priv->netdev, "GRE HW offloading is not supported\n"); + return -EOPNOTSUPP; + } + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + ip_protocol, IPPROTO_GRE); + + /* gre protocol*/ + MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, gre_protocol); + MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB); + + /* gre key */ + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_dissector_key_keyid *mask = NULL; + struct flow_dissector_key_keyid *key = NULL; + + mask = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + f->mask); + MLX5_SET(fte_match_set_misc, misc_c, + gre_key.key, be32_to_cpu(mask->keyid)); + + key = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + f->key); + MLX5_SET(fte_match_set_misc, misc_v, + gre_key.key, be32_to_cpu(key->keyid)); + } + + return 0; +} + int mlx5e_tc_tun_parse(struct net_device *filter_dev, struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, @@ -486,6 +570,9 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev, if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { err = mlx5e_tc_tun_parse_vxlan(priv, spec, f, headers_c, headers_v); + } else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { + err = mlx5e_tc_tun_parse_gretap(priv, spec, f, + headers_c, headers_v); } else { netdev_warn(priv->netdev, "decapsulation offload is not supported for %s net device (%d)\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index ad4fc93b17a1..706ce7bf15e7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -13,7 +13,8 @@ enum { MLX5E_TC_TUNNEL_TYPE_UNKNOWN, - MLX5E_TC_TUNNEL_TYPE_VXLAN + MLX5E_TC_TUNNEL_TYPE_VXLAN, + MLX5E_TC_TUNNEL_TYPE_GRETAP }; int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev, -- cgit v1.2.3 From 69bd48404f251b9c45a15799fdcfc87a7ad6ab8a Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Tue, 6 Nov 2018 09:58:37 +0200 Subject: net/sched: Remove egdev mechanism The egdev mechanism was replaced by the TC indirect block notifications platform. Signed-off-by: Oz Shlomo Reviewed-by: Eli Britstein Reviewed-by: Jiri Pirko Cc: John Hurley Cc: Jakub Kicinski Signed-off-by: Saeed Mahameed --- include/net/act_api.h | 30 ------- net/sched/act_api.c | 221 -------------------------------------------------- net/sched/cls_api.c | 47 +---------- 3 files changed, 1 insertion(+), 297 deletions(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index 05c7df41d737..dbc795ec659e 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -194,35 +194,5 @@ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes, #endif } -#ifdef CONFIG_NET_CLS_ACT -int tc_setup_cb_egdev_register(const struct net_device *dev, - tc_setup_cb_t *cb, void *cb_priv); -void tc_setup_cb_egdev_unregister(const struct net_device *dev, - tc_setup_cb_t *cb, void *cb_priv); -int tc_setup_cb_egdev_call(const struct net_device *dev, - enum tc_setup_type type, void *type_data, - bool err_stop); -#else -static inline -int tc_setup_cb_egdev_register(const struct net_device *dev, - tc_setup_cb_t *cb, void *cb_priv) -{ - return 0; -} - -static inline -void tc_setup_cb_egdev_unregister(const struct net_device *dev, - tc_setup_cb_t *cb, void *cb_priv) -{ -} - -static inline -int tc_setup_cb_egdev_call(const struct net_device *dev, - enum tc_setup_type type, void *type_data, - bool err_stop) -{ - return 0; -} -#endif #endif diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 9c1b0729aebf..d4b8355737d8 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -21,8 +21,6 @@ #include #include #include -#include -#include #include #include #include @@ -1522,227 +1520,8 @@ out_module_put: return skb->len; } -struct tcf_action_net { - struct rhashtable egdev_ht; -}; - -static unsigned int tcf_action_net_id; - -struct tcf_action_egdev_cb { - struct list_head list; - tc_setup_cb_t *cb; - void *cb_priv; -}; - -struct tcf_action_egdev { - struct rhash_head ht_node; - const struct net_device *dev; - unsigned int refcnt; - struct list_head cb_list; -}; - -static const struct rhashtable_params tcf_action_egdev_ht_params = { - .key_offset = offsetof(struct tcf_action_egdev, dev), - .head_offset = offsetof(struct tcf_action_egdev, ht_node), - .key_len = sizeof(const struct net_device *), -}; - -static struct tcf_action_egdev * -tcf_action_egdev_lookup(const struct net_device *dev) -{ - struct net *net = dev_net(dev); - struct tcf_action_net *tan = net_generic(net, tcf_action_net_id); - - return rhashtable_lookup_fast(&tan->egdev_ht, &dev, - tcf_action_egdev_ht_params); -} - -static struct tcf_action_egdev * -tcf_action_egdev_get(const struct net_device *dev) -{ - struct tcf_action_egdev *egdev; - struct tcf_action_net *tan; - - egdev = tcf_action_egdev_lookup(dev); - if (egdev) - goto inc_ref; - - egdev = kzalloc(sizeof(*egdev), GFP_KERNEL); - if (!egdev) - return NULL; - INIT_LIST_HEAD(&egdev->cb_list); - egdev->dev = dev; - tan = net_generic(dev_net(dev), tcf_action_net_id); - rhashtable_insert_fast(&tan->egdev_ht, &egdev->ht_node, - tcf_action_egdev_ht_params); - -inc_ref: - egdev->refcnt++; - return egdev; -} - -static void tcf_action_egdev_put(struct tcf_action_egdev *egdev) -{ - struct tcf_action_net *tan; - - if (--egdev->refcnt) - return; - tan = net_generic(dev_net(egdev->dev), tcf_action_net_id); - rhashtable_remove_fast(&tan->egdev_ht, &egdev->ht_node, - tcf_action_egdev_ht_params); - kfree(egdev); -} - -static struct tcf_action_egdev_cb * -tcf_action_egdev_cb_lookup(struct tcf_action_egdev *egdev, - tc_setup_cb_t *cb, void *cb_priv) -{ - struct tcf_action_egdev_cb *egdev_cb; - - list_for_each_entry(egdev_cb, &egdev->cb_list, list) - if (egdev_cb->cb == cb && egdev_cb->cb_priv == cb_priv) - return egdev_cb; - return NULL; -} - -static int tcf_action_egdev_cb_call(struct tcf_action_egdev *egdev, - enum tc_setup_type type, - void *type_data, bool err_stop) -{ - struct tcf_action_egdev_cb *egdev_cb; - int ok_count = 0; - int err; - - list_for_each_entry(egdev_cb, &egdev->cb_list, list) { - err = egdev_cb->cb(type, type_data, egdev_cb->cb_priv); - if (err) { - if (err_stop) - return err; - } else { - ok_count++; - } - } - return ok_count; -} - -static int tcf_action_egdev_cb_add(struct tcf_action_egdev *egdev, - tc_setup_cb_t *cb, void *cb_priv) -{ - struct tcf_action_egdev_cb *egdev_cb; - - egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv); - if (WARN_ON(egdev_cb)) - return -EEXIST; - egdev_cb = kzalloc(sizeof(*egdev_cb), GFP_KERNEL); - if (!egdev_cb) - return -ENOMEM; - egdev_cb->cb = cb; - egdev_cb->cb_priv = cb_priv; - list_add(&egdev_cb->list, &egdev->cb_list); - return 0; -} - -static void tcf_action_egdev_cb_del(struct tcf_action_egdev *egdev, - tc_setup_cb_t *cb, void *cb_priv) -{ - struct tcf_action_egdev_cb *egdev_cb; - - egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv); - if (WARN_ON(!egdev_cb)) - return; - list_del(&egdev_cb->list); - kfree(egdev_cb); -} - -static int __tc_setup_cb_egdev_register(const struct net_device *dev, - tc_setup_cb_t *cb, void *cb_priv) -{ - struct tcf_action_egdev *egdev = tcf_action_egdev_get(dev); - int err; - - if (!egdev) - return -ENOMEM; - err = tcf_action_egdev_cb_add(egdev, cb, cb_priv); - if (err) - goto err_cb_add; - return 0; - -err_cb_add: - tcf_action_egdev_put(egdev); - return err; -} -int tc_setup_cb_egdev_register(const struct net_device *dev, - tc_setup_cb_t *cb, void *cb_priv) -{ - int err; - - rtnl_lock(); - err = __tc_setup_cb_egdev_register(dev, cb, cb_priv); - rtnl_unlock(); - return err; -} -EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_register); - -static void __tc_setup_cb_egdev_unregister(const struct net_device *dev, - tc_setup_cb_t *cb, void *cb_priv) -{ - struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev); - - if (WARN_ON(!egdev)) - return; - tcf_action_egdev_cb_del(egdev, cb, cb_priv); - tcf_action_egdev_put(egdev); -} -void tc_setup_cb_egdev_unregister(const struct net_device *dev, - tc_setup_cb_t *cb, void *cb_priv) -{ - rtnl_lock(); - __tc_setup_cb_egdev_unregister(dev, cb, cb_priv); - rtnl_unlock(); -} -EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_unregister); - -int tc_setup_cb_egdev_call(const struct net_device *dev, - enum tc_setup_type type, void *type_data, - bool err_stop) -{ - struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev); - - if (!egdev) - return 0; - return tcf_action_egdev_cb_call(egdev, type, type_data, err_stop); -} -EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_call); - -static __net_init int tcf_action_net_init(struct net *net) -{ - struct tcf_action_net *tan = net_generic(net, tcf_action_net_id); - - return rhashtable_init(&tan->egdev_ht, &tcf_action_egdev_ht_params); -} - -static void __net_exit tcf_action_net_exit(struct net *net) -{ - struct tcf_action_net *tan = net_generic(net, tcf_action_net_id); - - rhashtable_destroy(&tan->egdev_ht); -} - -static struct pernet_operations tcf_action_net_ops = { - .init = tcf_action_net_init, - .exit = tcf_action_net_exit, - .id = &tcf_action_net_id, - .size = sizeof(struct tcf_action_net), -}; - static int __init tc_action_init(void) { - int err; - - err = register_pernet_subsys(&tcf_action_net_ops); - if (err) - return err; - rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0); rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index d92f44ac4c39..6207f265b87c 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -2515,55 +2515,10 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts) } EXPORT_SYMBOL(tcf_exts_dump_stats); -static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts, - enum tc_setup_type type, - void *type_data, bool err_stop) -{ - int ok_count = 0; -#ifdef CONFIG_NET_CLS_ACT - const struct tc_action *a; - struct net_device *dev; - int i, ret; - - if (!tcf_exts_has_actions(exts)) - return 0; - - for (i = 0; i < exts->nr_actions; i++) { - a = exts->actions[i]; - if (!a->ops->get_dev) - continue; - dev = a->ops->get_dev(a); - if (!dev) - continue; - ret = tc_setup_cb_egdev_call(dev, type, type_data, err_stop); - a->ops->put_dev(dev); - if (ret < 0) - return ret; - ok_count += ret; - } -#endif - return ok_count; -} - int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts, enum tc_setup_type type, void *type_data, bool err_stop) { - int ok_count; - int ret; - - ret = tcf_block_cb_call(block, type, type_data, err_stop); - if (ret < 0) - return ret; - ok_count = ret; - - if (!exts || ok_count) - return ok_count; - ret = tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop); - if (ret < 0) - return ret; - ok_count += ret; - - return ok_count; + return tcf_block_cb_call(block, type, type_data, err_stop); } EXPORT_SYMBOL(tc_setup_cb_call); -- cgit v1.2.3